import os import re import sys directory = '/Users/VJ/GitHub/MLSysBook-notation-audit/book/quarto/contents/' violations = 0 def check_file(filepath): global violations with open(filepath, 'r') as f: content = f.read() # 1. Iron Law efficiency matches = re.finditer(r'R_{\\text{peak}}\s*\\cdot\s*\\eta(?![_\{])', content) for match in matches: print(f"[Violation] Bare \\eta in Iron Law compute term: {filepath}") violations += 1 # 2. Bare Subscripts (e.g., _{lat} instead of _{\text{lat}}) labels = ['lat', 'hw', 'vol', 'peak', 'wait', 'compute', 'comm', 'overlap', 'scaling', 'step', 'total', 'move', 'acquire', 'label', 'store', 'process', 'avg', 'eff', 'device', 'net', 'io', 'req'] for label in labels: if f'_{{{label}}}' in content: print(f"[Violation] Bare subscript _{{{label}}} found in {filepath}. Use _{{\\text{{{label}}}}}") violations += 1 # 3. Operators spacing if re.search(r'[^\s]\\times', content) or re.search(r'\\times[^\s]', content): # We allow \times in TikZ though, so be careful. # For now just flag it if it's in a math block $$ pass # 4. BW vs \text{BW} if re.search(r'(?