mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-29 17:20:21 -05:00
- Update mermaid params to use PDF output with pdfcrop - Add width and height constraints with keepaspectratio in LaTeX - Redesign Module Flow diagram to LR layout with TB subgraphs - Shorten labels to fit better on page
157 lines
5.4 KiB
Python
157 lines
5.4 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
LaTeX post-processor for TinyTorch PDF builds.
|
||
|
||
Removes emojis from the generated .tex file for a clean, professional PDF.
|
||
"""
|
||
|
||
import re
|
||
from pathlib import Path
|
||
|
||
# Emojis to remove entirely (for clean, professional PDF)
|
||
EMOJIS_TO_REMOVE = [
|
||
'✅', '❌', '🧪', '🔬', '📈', '📊', '📖', '🚀', '🎓',
|
||
'🎉', '🌍', '👨', '👩', '🏫', '👥', '🏗', '🏛', '🏆', '🍃',
|
||
'🏅', '👁', '💡', '💻', '💼', '💾', '📍', '📦', '🔀', '🔄',
|
||
'🔍', '🔒', '🔢', '🔧', '🛠', '🖼', '🤖', '🤝', '🧠', '🧭',
|
||
'🎯', '⭐', '⏱', '⚠', '⚡', '✨', '🌐', '📝', '🎨', '🔗',
|
||
'📚', '🏠', '🎮', '🔮', '💪', '🌟', '📌', '🗂', '📁', '🗃',
|
||
'⚙', '🔩', '🔨', '⛏', '🪛', '🧰', '📐', '📏', '🧮', '💯',
|
||
'🎲', '🎰', '🎪', '🎭', '🎬', '🎤', '🎧', '🎵', '🎶', '🎸',
|
||
'🏃', '🚶', '🧑', '👶', '👴', '👵', '🧒', '👦', '👧', '🧓',
|
||
'', '️', # Zero-width joiner and variation selector
|
||
]
|
||
|
||
# Fire emoji replacement - use inline image for branding
|
||
FIRE_EMOJI = '🔥'
|
||
FIRE_IMAGE_LATEX = r'\raisebox{-0.1em}{\includegraphics[height=1em]{fire-emoji.png}}'
|
||
|
||
# Subscripts/superscripts - convert to LaTeX math
|
||
MATH_REPLACEMENTS = {
|
||
'ᴺ': r'$^N$',
|
||
'ᵐ': r'$^m$',
|
||
'ₙ': r'$_n$',
|
||
'ₘ': r'$_m$',
|
||
'₀': r'$_0$',
|
||
'₁': r'$_1$',
|
||
'₂': r'$_2$',
|
||
'₃': r'$_3$',
|
||
'₄': r'$_4$',
|
||
'₅': r'$_5$',
|
||
'₆': r'$_6$',
|
||
'₇': r'$_7$',
|
||
'₈': r'$_8$',
|
||
'₉': r'$_9$',
|
||
}
|
||
|
||
def process_latex_file(tex_file: Path) -> int:
|
||
"""Process .tex file, removing emojis and duplicate title page for clean PDF."""
|
||
if not tex_file.exists():
|
||
print(f"Error: {tex_file} not found")
|
||
return 0
|
||
|
||
with open(tex_file, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
original_len = len(content)
|
||
|
||
# FIRST: Remove the duplicate Sphinx-generated title page metadata
|
||
# This must happen BEFORE emoji replacement to avoid breaking the regex
|
||
# Clear \title{...}, \date{...}, \author{...} so the default title page is empty
|
||
# Use specific line replacements to be safe
|
||
content = content.replace(r'\title{Tiny🔥Torch}', r'\title{}')
|
||
content = re.sub(r'\\date\{[A-Za-z]+ \d+, \d+\}', r'\\date{}', content)
|
||
content = re.sub(r'\\author\{Prof\.\\@\{\} [^}]+\}', r'\\author{}', content)
|
||
|
||
# Replace fire emoji with inline image (for Tiny🔥Torch branding)
|
||
content = content.replace(FIRE_EMOJI, FIRE_IMAGE_LATEX)
|
||
|
||
# Remove all other emojis
|
||
for emoji in EMOJIS_TO_REMOVE:
|
||
content = content.replace(emoji, '')
|
||
|
||
# Replace math symbols
|
||
for symbol, latex in MATH_REPLACEMENTS.items():
|
||
content = content.replace(symbol, latex)
|
||
|
||
# Clean up escaped LaTeX commands that appear literally in tables
|
||
# These come from markdown files using LaTeX syntax that gets escaped
|
||
# Green checkmark: \textcolor{green!70!black}{$\checkmark$} -> ✓
|
||
content = re.sub(
|
||
r'\\textcolor\{green!70!black\}\{\$\\checkmark\$\}',
|
||
r'\\checkmark',
|
||
content
|
||
)
|
||
# Red X: \textcolor{red!70!black}{$\times$} -> ✗
|
||
content = re.sub(
|
||
r'\\textcolor\{red!70!black\}\{\$\\times\$\}',
|
||
r'$\\times$',
|
||
content
|
||
)
|
||
|
||
# Fix figure placement: change [htbp] to [H] for inline placement
|
||
content = re.sub(
|
||
r'\\begin\{figure\}\[htbp\]',
|
||
r'\\begin{figure}[H]',
|
||
content
|
||
)
|
||
|
||
# Center all includegraphics that aren't already centered
|
||
# Find \includegraphics not preceded by \centering and wrap them
|
||
content = re.sub(
|
||
r'(\\begin\{figure\}\[H\]\n)(\\includegraphics)',
|
||
r'\1\\centering\n\2',
|
||
content
|
||
)
|
||
|
||
# Scale mermaid diagrams: use adjustbox for smart max-width scaling
|
||
# This allows small diagrams to stay natural size, but caps large ones at column width
|
||
# First, ensure adjustbox is available by adding to preamble if not present
|
||
if r'\usepackage{adjustbox}' not in content:
|
||
# Add adjustbox after float package
|
||
content = content.replace(
|
||
r'\usepackage{float}',
|
||
r'\usepackage{float}' + '\n' + r'\usepackage{adjustbox}'
|
||
)
|
||
|
||
# Replace sphinxincludegraphics for mermaid with width-constrained includegraphics
|
||
# Using width=\linewidth ensures diagram fits within text margins
|
||
# height=0.6\textheight allows taller diagrams while keeping them on one page
|
||
# keepaspectratio prevents distortion - image scales to fit whichever constraint is tighter
|
||
content = re.sub(
|
||
r'\\sphinxincludegraphics\{(mermaid-[^}]+\.pdf)\}',
|
||
r'\\includegraphics[width=\\linewidth,height=0.6\\textheight,keepaspectratio]{\g<1>}',
|
||
content
|
||
)
|
||
|
||
# Write back
|
||
with open(tex_file, 'w', encoding='utf-8') as f:
|
||
f.write(content)
|
||
|
||
chars_removed = original_len - len(content)
|
||
return chars_removed
|
||
|
||
def main():
|
||
"""Main entry point."""
|
||
import sys
|
||
|
||
# Default path
|
||
site_dir = Path(__file__).parent.parent
|
||
tex_file = site_dir / '_build' / 'latex' / 'tinytorch-course.tex'
|
||
|
||
# Allow override from command line
|
||
if len(sys.argv) > 1:
|
||
tex_file = Path(sys.argv[1])
|
||
|
||
print(f"Cleaning emojis from: {tex_file.name}")
|
||
|
||
chars_removed = process_latex_file(tex_file)
|
||
|
||
if chars_removed > 0:
|
||
print(f"Removed {chars_removed} emoji characters for clean PDF")
|
||
else:
|
||
print("No emojis found")
|
||
|
||
if __name__ == '__main__':
|
||
main()
|