Files
cs249r_book/book/tools/scripts/_archive/convert_unicode_times.py
Vijay Janapa Reddi 5d68f0a2e0 style: standardize multiplication notation to $\times$ across all chapters
Convert all Unicode × (U+00D7) to LaTeX $\times$ in prose, tables, and
math contexts across both volumes. Unicode × is preserved only inside
fig-alt text for accessibility screen readers. One instance inside a
plain markdown backtick code span (frameworks.qmd) was reverted to
Unicode × since LaTeX doesn't render in code spans.

Updates validate.py with a new lowercase-x-as-multiplication check and
refines the latex_adjacent warning to distinguish _str variables (safe)
from raw inline Python. Updates validate_inline_refs.py comments to
reflect the new convention. Includes the conversion script in _archive.
2026-02-15 11:43:45 -05:00

112 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
One-shot script: Convert Unicode × to $\\times$ in QMD prose.
Skips:
- Python code blocks (```{python} ... ```)
- TikZ/LaTeX blocks (```{=tex} ... ``` and ```latex ... ```)
- Raw blocks (```{=html} etc.)
- fig-alt attributes (plain text, LaTeX doesn't render)
- Lines that are purely comments (% prefix in TikZ context)
Converts:
- Unicode × (U+00D7) in prose → $\\times$
Usage:
python3 book/tools/scripts/_archive/convert_unicode_times.py [--dry-run]
"""
import re
import sys
from pathlib import Path
BOOK_ROOT = Path(__file__).resolve().parents[3] # book/
CONTENTS = BOOK_ROOT / "quarto" / "contents"
# Patterns
CODE_BLOCK_START = re.compile(r'^```')
PYTHON_BLOCK = re.compile(r'^```\{python\}|^```python')
TIKZ_BLOCK = re.compile(r'^```\{=tex\}|^```latex|^```\{=html\}|^```\{=typst\}')
CODE_BLOCK_END = re.compile(r'^```\s*$')
FIG_ALT = re.compile(r'fig-alt\s*=\s*"')
# Unicode × character
UNICODE_TIMES = '×'
def convert_file(qmd_path: Path, dry_run: bool = False) -> list:
"""Convert Unicode × to $\\times$ in prose lines of a QMD file."""
text = qmd_path.read_text(encoding='utf-8')
lines = text.split('\n')
changes = []
in_code = False
for i, line in enumerate(lines):
stripped = line.strip()
# Track code blocks
if not in_code and CODE_BLOCK_START.match(stripped):
in_code = True
continue
if in_code:
if CODE_BLOCK_END.match(stripped) and not PYTHON_BLOCK.match(stripped):
in_code = False
continue
# Skip if no × on this line
if UNICODE_TIMES not in line:
continue
# Handle lines with fig-alt: only preserve × inside fig-alt="..." portion
if FIG_ALT.search(line):
# Find the fig-alt="..." span and protect it
alt_match = re.search(r'(fig-alt\s*=\s*"[^"]*")', line)
if alt_match:
before = line[:alt_match.start()]
alt_text = alt_match.group(1) # preserve as-is
after = line[alt_match.end():]
new_line = (
before.replace(UNICODE_TIMES, '$\\times$')
+ alt_text
+ after.replace(UNICODE_TIMES, '$\\times$')
)
else:
# fig-alt present but couldn't parse span — skip to be safe
continue
else:
# Normal prose line: replace all ×
new_line = line.replace(UNICODE_TIMES, '$\\times$')
if new_line != line:
changes.append((i + 1, line.rstrip(), new_line.rstrip()))
lines[i] = new_line
if changes and not dry_run:
qmd_path.write_text('\n'.join(lines), encoding='utf-8')
return changes
def main():
dry_run = '--dry-run' in sys.argv
qmd_files = sorted(CONTENTS.rglob('*.qmd'))
total_changes = 0
for qmd in qmd_files:
changes = convert_file(qmd, dry_run=dry_run)
if changes:
rel = qmd.relative_to(BOOK_ROOT.parent)
print(f"\n{'[DRY RUN] ' if dry_run else ''}{rel} ({len(changes)} lines changed):")
for line_no, old, new in changes:
# Show a short context around the change
print(f" L{line_no}")
total_changes += len(changes)
mode = "would change" if dry_run else "changed"
print(f"\nTotal: {mode} {total_changes} lines across {len(qmd_files)} files scanned.")
if __name__ == '__main__':
main()