mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-02 10:39:10 -05:00
Convert all Unicode × (U+00D7) to LaTeX $\times$ in prose, tables, and math contexts across both volumes. Unicode × is preserved only inside fig-alt text for accessibility screen readers. One instance inside a plain markdown backtick code span (frameworks.qmd) was reverted to Unicode × since LaTeX doesn't render in code spans. Updates validate.py with a new lowercase-x-as-multiplication check and refines the latex_adjacent warning to distinguish _str variables (safe) from raw inline Python. Updates validate_inline_refs.py comments to reflect the new convention. Includes the conversion script in _archive.
112 lines
3.5 KiB
Python
112 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
One-shot script: Convert Unicode × to $\\times$ in QMD prose.
|
||
|
||
Skips:
|
||
- Python code blocks (```{python} ... ```)
|
||
- TikZ/LaTeX blocks (```{=tex} ... ``` and ```latex ... ```)
|
||
- Raw blocks (```{=html} etc.)
|
||
- fig-alt attributes (plain text, LaTeX doesn't render)
|
||
- Lines that are purely comments (% prefix in TikZ context)
|
||
|
||
Converts:
|
||
- Unicode × (U+00D7) in prose → $\\times$
|
||
|
||
Usage:
|
||
python3 book/tools/scripts/_archive/convert_unicode_times.py [--dry-run]
|
||
"""
|
||
|
||
import re
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
BOOK_ROOT = Path(__file__).resolve().parents[3] # book/
|
||
CONTENTS = BOOK_ROOT / "quarto" / "contents"
|
||
|
||
# Patterns
|
||
CODE_BLOCK_START = re.compile(r'^```')
|
||
PYTHON_BLOCK = re.compile(r'^```\{python\}|^```python')
|
||
TIKZ_BLOCK = re.compile(r'^```\{=tex\}|^```latex|^```\{=html\}|^```\{=typst\}')
|
||
CODE_BLOCK_END = re.compile(r'^```\s*$')
|
||
FIG_ALT = re.compile(r'fig-alt\s*=\s*"')
|
||
|
||
# Unicode × character
|
||
UNICODE_TIMES = '×'
|
||
|
||
|
||
def convert_file(qmd_path: Path, dry_run: bool = False) -> list:
|
||
"""Convert Unicode × to $\\times$ in prose lines of a QMD file."""
|
||
text = qmd_path.read_text(encoding='utf-8')
|
||
lines = text.split('\n')
|
||
changes = []
|
||
in_code = False
|
||
|
||
for i, line in enumerate(lines):
|
||
stripped = line.strip()
|
||
|
||
# Track code blocks
|
||
if not in_code and CODE_BLOCK_START.match(stripped):
|
||
in_code = True
|
||
continue
|
||
if in_code:
|
||
if CODE_BLOCK_END.match(stripped) and not PYTHON_BLOCK.match(stripped):
|
||
in_code = False
|
||
continue
|
||
|
||
# Skip if no × on this line
|
||
if UNICODE_TIMES not in line:
|
||
continue
|
||
|
||
# Handle lines with fig-alt: only preserve × inside fig-alt="..." portion
|
||
if FIG_ALT.search(line):
|
||
# Find the fig-alt="..." span and protect it
|
||
alt_match = re.search(r'(fig-alt\s*=\s*"[^"]*")', line)
|
||
if alt_match:
|
||
before = line[:alt_match.start()]
|
||
alt_text = alt_match.group(1) # preserve as-is
|
||
after = line[alt_match.end():]
|
||
new_line = (
|
||
before.replace(UNICODE_TIMES, '$\\times$')
|
||
+ alt_text
|
||
+ after.replace(UNICODE_TIMES, '$\\times$')
|
||
)
|
||
else:
|
||
# fig-alt present but couldn't parse span — skip to be safe
|
||
continue
|
||
else:
|
||
# Normal prose line: replace all ×
|
||
new_line = line.replace(UNICODE_TIMES, '$\\times$')
|
||
|
||
if new_line != line:
|
||
changes.append((i + 1, line.rstrip(), new_line.rstrip()))
|
||
lines[i] = new_line
|
||
|
||
if changes and not dry_run:
|
||
qmd_path.write_text('\n'.join(lines), encoding='utf-8')
|
||
|
||
return changes
|
||
|
||
|
||
def main():
|
||
dry_run = '--dry-run' in sys.argv
|
||
|
||
qmd_files = sorted(CONTENTS.rglob('*.qmd'))
|
||
total_changes = 0
|
||
|
||
for qmd in qmd_files:
|
||
changes = convert_file(qmd, dry_run=dry_run)
|
||
if changes:
|
||
rel = qmd.relative_to(BOOK_ROOT.parent)
|
||
print(f"\n{'[DRY RUN] ' if dry_run else ''}{rel} ({len(changes)} lines changed):")
|
||
for line_no, old, new in changes:
|
||
# Show a short context around the change
|
||
print(f" L{line_no}")
|
||
total_changes += len(changes)
|
||
|
||
mode = "would change" if dry_run else "changed"
|
||
print(f"\nTotal: {mode} {total_changes} lines across {len(qmd_files)} files scanned.")
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|