From 53b31cb8b653327a23959eeee2b07edd5609a62c Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Sat, 1 Nov 2025 13:26:05 -0400 Subject: [PATCH] chore(test): add TikZ style linter for undefined style detection\n\nScans all Quarto .tikz blocks, collects defined styles, flags uses of undefined custom styles like Line/Box/etc. Heuristics focus on tokens starting uppercase to minimize false positives from colors and built-in keys. Provides file and line context; exits nonzero on findings. --- tools/scripts/testing/tikz_style_linter.py | 232 +++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 tools/scripts/testing/tikz_style_linter.py diff --git a/tools/scripts/testing/tikz_style_linter.py b/tools/scripts/testing/tikz_style_linter.py new file mode 100644 index 000000000..36db45ccd --- /dev/null +++ b/tools/scripts/testing/tikz_style_linter.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python3 + +import re +import sys +from pathlib import Path +from typing import Dict, Iterable, List, Optional, Set, Tuple + + +TIKZ_BLOCK_PATTERN = re.compile(r"```\{\.tikz\}[\s\S]*?```", re.MULTILINE) +TIKZSET_BLOCK_PATTERN = re.compile(r"\\tikzset\s*\{([\s\S]*?)\}") +STYLE_DEF_PATTERN = re.compile(r"(^|[,\s])([A-Za-z][\w-]*)\s*/\\.style\b") + +# Commands that take option lists in square brackets +OPTIONED_COMMANDS = ( + "draw", + "node", + "path", + "filldraw", + "shade", + "clip", + "coordinate", + "begin\\{scope\}", # \begin{scope}[...] +) + +OPTION_CAPTURE_PATTERNS = [ + re.compile(rf"\\{cmd}\s*\[([^\]]+)\]") for cmd in OPTIONED_COMMANDS +] + + +# Heuristic tokens we will ignore when seen as standalone options, to reduce false positives +KNOWN_TOKENS: Set[str] = { + # thickness + "ultra thin", + "very thin", + "thin", + "semithick", + "thick", + "very thick", + "ultra thick", + # dashing + "solid", + "dashed", + "densely dashed", + "loosely dashed", + "dotted", + "densely dotted", + "loosely dotted", + "dashdotted", + # common shape keywords + "circle", + "rectangle", + # positioning + "left", + "right", + "above", + "below", + # path arrows sometimes appear as tokens, but usually expressed via -{...} +} + + +def read_text(path: Path) -> str: + try: + return path.read_text(encoding="utf-8") + except Exception: + return path.read_text(errors="ignore") + + +def find_tikz_blocks(text: str) -> List[Tuple[int, int, str]]: + blocks: List[Tuple[int, int, str]] = [] + for m in TIKZ_BLOCK_PATTERN.finditer(text): + start, end = m.span() + blocks.append((start, end, m.group(0))) + return blocks + + +def collect_defined_styles(text: str) -> Set[str]: + styles: Set[str] = set() + for m in TIKZSET_BLOCK_PATTERN.finditer(text): + body = m.group(1) + for s in STYLE_DEF_PATTERN.finditer(body): + styles.add(s.group(2)) + return styles + + +def extract_option_tokens(option_text: str) -> List[str]: + # naive split by comma, ignore content inside braces or brackets + tokens: List[str] = [] + buf: List[str] = [] + depth_curly = 0 + depth_brack = 0 + for ch in option_text: + if ch == '{': + depth_curly += 1 + elif ch == '}': + depth_curly = max(0, depth_curly - 1) + elif ch == '[': + depth_brack += 1 + elif ch == ']': + depth_brack = max(0, depth_brack - 1) + if ch == ',' and depth_curly == 0 and depth_brack == 0: + token = ''.join(buf).strip() + if token: + tokens.append(token) + buf = [] + else: + buf.append(ch) + last = ''.join(buf).strip() + if last: + tokens.append(last) + return tokens + + +def looks_like_style_token(token: str) -> bool: + # Exclude key=value and tokens with spaces that are clearly compound phrases unless uppercase start + if '=' in token: + return False + # Ignore shorten <=, shorten >=, etc. + if 'shorten <=' in token or 'shorten >=' in token: + return False + # Arrow tip specifications or path operators are not styles + if token.startswith('-') or token.endswith('-') or '->' in token: + return False + # Common known tokens + if token in KNOWN_TOKENS: + return False + # If token contains spaces and starts lowercase, likely a built-in keyword like very thick, dashed, etc. + if ' ' in token and not token[0].isupper(): + return False + # Only consider tokens that start with an uppercase letter as potential custom styles + if not token or not token[0].isupper(): + return False + return bool(re.match(r"^[A-Za-z][\w-]*$", token)) + + +def find_undefined_styles_in_block(block_text: str, defined_styles: Set[str]) -> Set[str]: + used_unknown: Set[str] = set() + for pat in OPTION_CAPTURE_PATTERNS: + for m in pat.finditer(block_text): + options_text = m.group(1) + for token in extract_option_tokens(options_text): + token = token.strip() + if not looks_like_style_token(token): + continue + # Accept styles explicitly defined + if token in defined_styles: + continue + used_unknown.add(token) + return used_unknown + + +def build_line_index(text: str) -> List[int]: + # returns start index of each line + idxs = [0] + for m in re.finditer(r"\n", text): + idxs.append(m.end()) + return idxs + + +def offset_to_line(line_starts: List[int], offset: int) -> int: + # binary search for line number from offset + lo, hi = 0, len(line_starts) - 1 + ans = 0 + while lo <= hi: + mid = (lo + hi) // 2 + if line_starts[mid] <= offset: + ans = mid + lo = mid + 1 + else: + hi = mid - 1 + return ans + 1 # 1-based + + +def scan_quarto_root(root: Path) -> int: + # Collect global styles from header-includes.tex if present + global_styles: Set[str] = set() + header_includes = root / "quarto" / "tex" / "header-includes.tex" + if header_includes.exists(): + global_styles |= collect_defined_styles(read_text(header_includes)) + + qmd_files = list((root / "quarto").rglob("*.qmd")) + total_issues = 0 + for qmd in qmd_files: + text = read_text(qmd) + line_index = build_line_index(text) + blocks = find_tikz_blocks(text) + if not blocks: + continue + for bstart, bend, btext in blocks: + local_defs = collect_defined_styles(btext) + defined = set(global_styles) | set(local_defs) + unknown = find_undefined_styles_in_block(btext, defined) + if not unknown: + continue + total_issues += len(unknown) + # Try to locate first reference lines for each unknown token + print(f"File: {qmd}") + for token in sorted(unknown): + # find usage position within block (simplified matcher for robustness) + usage_pattern = ( + r"\\(draw|node|path|filldraw|shade|clip|coordinate|begin\{scope\})\s*\[" + + r"[^\]]*" + re.escape(token) + r"[^\]]*\]" + ) + m = re.search(usage_pattern, btext) + if m: + pos_in_block = m.start() + line_no = offset_to_line(line_index, bstart + pos_in_block) + context_line = text.splitlines()[line_no - 1].rstrip() + print(f" line {line_no}: uses undefined style '{token}'") + print(f" {context_line}") + else: + print(f" uses undefined style '{token}' (exact line not found)") + print() + return total_issues + + +def main(argv: List[str]) -> int: + root = Path(argv[1]).resolve() if len(argv) > 1 else Path(__file__).resolve().parents[3] + if not root.exists(): + print(f"Root path does not exist: {root}", file=sys.stderr) + return 2 + issues = scan_quarto_root(root) + if issues: + print(f"Found {issues} undefined style references.") + return 1 + print("No undefined TikZ style references found.") + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) + +