Files
cs249r_book/tinytorch/tools/dev/validate_cli_docs.py
Vijay Janapa Reddi b15ff98abb Removes unused commands from CLI docs
Updates the CLI documentation to reflect the current set of implemented commands.

The 'dev' command no longer includes 'preflight' and 'validate' subcommands.
Removes these commands from the valid commands list.
2026-01-25 13:23:17 -05:00

243 lines
8.5 KiB
Python

#!/usr/bin/env python3
"""
Validate that CLI commands referenced in documentation match actual tito CLI.
This script extracts all `tito X Y` commands from markdown files and validates
them against the actual CLI structure. Runs as a pre-commit hook to catch
documentation drift before it reaches the repo.
Usage:
python tools/dev/validate_cli_docs.py [--fix] [--verbose]
Exit codes:
0 - All commands valid
1 - Invalid commands found
"""
import re
import subprocess
import sys
from pathlib import Path
from typing import Set, Dict, List, Tuple
# Directories to scan for markdown files
DOCS_DIRS = ["site", "modules", "tests", "milestones"]
# Files to skip (generated, vendored, etc.)
SKIP_PATTERNS = [".venv", "node_modules", "_build", ".git"]
# Known valid commands from tito --help
# Format: {command_group: [subcommands]}
VALID_COMMANDS: Dict[str, List[str]] = {
"setup": [], # No subcommands
"update": [], # No subcommands
"export": [], # Takes module args, not subcommands
"test": [], # Takes module args, not subcommands
"logo": [], # No subcommands
"system": ["info", "health", "jupyter", "update", "logo"],
"module": ["start", "view", "resume", "complete", "test", "reset", "status", "list"],
"dev": ["test", "export"],
"src": ["export", "test"],
"package": ["reset", "nbdev"],
"nbgrader": ["init", "generate", "release", "collect", "autograde", "feedback", "status", "analytics", "report"],
"milestone": ["list", "run", "info", "status", "timeline", "test", "demo"],
"community": ["login", "logout", "profile", "status", "map"],
"benchmark": ["baseline", "capstone"],
"olympics": ["logo", "status"],
"grade": ["release", "generate", "collect", "autograde", "manual", "feedback", "export", "setup"],
}
# Known INVALID commands that should be flagged
KNOWN_INVALID = {
"tito checkpoint": "Use 'tito module status' instead",
"tito milestones": "Use 'tito milestone' (singular) instead",
"tito system check": "Use 'tito system health' instead",
"tito system reset": "Command doesn't exist. Use 'tito module reset' for modules",
"tito community join": "Use 'tito community login' instead",
"tito community update": "Use 'tito community profile' instead",
"tito jupyter": "Use 'tito system jupyter' instead",
"tito notebooks": "Command doesn't exist",
}
def get_valid_command_set() -> Set[str]:
"""Build set of all valid tito commands."""
valid = set()
for group, subcommands in VALID_COMMANDS.items():
valid.add(f"tito {group}")
for sub in subcommands:
valid.add(f"tito {group} {sub}")
return valid
def extract_tito_commands(filepath: Path) -> List[Tuple[int, str]]:
"""Extract all tito commands from a markdown file.
Returns list of (line_number, command) tuples.
Only extracts commands that look like actual CLI invocations.
"""
commands = []
try:
content = filepath.read_text(encoding="utf-8")
except Exception:
return commands
# Pattern matches tito commands in code blocks or inline code
# Must start with ` or be at line start (after optional whitespace/comment chars)
# Excludes title-case words that are clearly prose (e.g., "TITO CLI Reference")
code_block_pattern = r'`tito\s+([a-z][a-z0-9_-]*(?:\s+[a-z][a-z0-9_-]*)?)'
line_start_pattern = r'^(?:#\s*)?tito\s+([a-z][a-z0-9_-]*(?:\s+[a-z][a-z0-9_-]*)?)'
# Words that indicate prose, not commands (case-insensitive check on following word)
PROSE_INDICATORS = {'cli', 'command', 'commands', 'reference', 'overview', 'guide', 'tool', 'tools'}
for i, line in enumerate(content.split('\n'), 1):
# Skip lines that are clearly URLs or links
if 'http' in line.lower() or 'github.com' in line.lower():
continue
# Skip header lines (prose)
if line.strip().startswith('#') and 'tito' in line.lower() and any(p in line.lower() for p in PROSE_INDICATORS):
continue
# Try code block pattern first (most reliable)
for match in re.finditer(code_block_pattern, line):
cmd_parts = match.group(1).lower().strip().split()
# Skip if first word after tito is a prose indicator
if cmd_parts and cmd_parts[0] in PROSE_INDICATORS:
continue
cmd = f"tito {' '.join(cmd_parts)}"
commands.append((i, cmd))
# Try line-start pattern for bash code blocks
for match in re.finditer(line_start_pattern, line.strip()):
cmd_parts = match.group(1).lower().strip().split()
# Skip if first word after tito is a prose indicator
if cmd_parts and cmd_parts[0] in PROSE_INDICATORS:
continue
cmd = f"tito {' '.join(cmd_parts)}"
# Avoid duplicates from the code block pattern
if (i, cmd) not in commands:
commands.append((i, cmd))
return commands
def find_markdown_files(base_dir: Path) -> List[Path]:
"""Find all markdown files in specified directories."""
files = []
for docs_dir in DOCS_DIRS:
search_path = base_dir / docs_dir
if search_path.exists():
for md_file in search_path.rglob("*.md"):
# Skip files in ignored directories
if any(skip in str(md_file) for skip in SKIP_PATTERNS):
continue
files.append(md_file)
# Also check root-level markdown files
for md_file in base_dir.glob("*.md"):
files.append(md_file)
return files
def validate_command(cmd: str, valid_commands: Set[str]) -> Tuple[bool, str]:
"""Check if a command is valid.
Returns (is_valid, error_message).
"""
# Check against known invalid patterns first
for invalid, suggestion in KNOWN_INVALID.items():
if cmd.startswith(invalid):
return False, suggestion
# Check if it's a valid base command
parts = cmd.split()
if len(parts) < 2:
return False, "Invalid command format"
base_cmd = f"{parts[0]} {parts[1]}"
# Check if group exists
if parts[1] not in VALID_COMMANDS:
return False, f"Unknown command group: {parts[1]}"
# If command has subcommand, validate it
if len(parts) >= 3:
full_cmd = f"{parts[0]} {parts[1]} {parts[2]}"
subcommands = VALID_COMMANDS.get(parts[1], [])
# If this group has defined subcommands, check them
if subcommands and parts[2] not in subcommands:
return False, f"Unknown subcommand: {parts[2]}. Valid: {', '.join(subcommands)}"
return True, ""
def main():
verbose = "--verbose" in sys.argv or "-v" in sys.argv
# Find tinytorch root (script is in tinytorch/tools/dev/)
script_path = Path(__file__).resolve()
tinytorch_root = script_path.parent.parent.parent
# If tinytorch_root is not actually tinytorch (e.g., we're in a different structure),
# try to find it from current working directory
if not (tinytorch_root / "bin" / "tito").exists():
cwd = Path.cwd()
if (cwd / "tinytorch" / "bin" / "tito").exists():
tinytorch_root = cwd / "tinytorch"
elif (cwd / "bin" / "tito").exists():
tinytorch_root = cwd
if verbose:
print(f"Scanning for CLI references in: {tinytorch_root}")
valid_commands = get_valid_command_set()
md_files = find_markdown_files(tinytorch_root)
if verbose:
print(f"Found {len(md_files)} markdown files to check")
errors: List[Tuple[Path, int, str, str]] = []
for md_file in md_files:
commands = extract_tito_commands(md_file)
for line_num, cmd in commands:
is_valid, error_msg = validate_command(cmd, valid_commands)
if not is_valid:
rel_path = md_file.relative_to(tinytorch_root)
errors.append((rel_path, line_num, cmd, error_msg))
if errors:
print(f"\n{'='*60}")
print(f"CLI Documentation Validation FAILED")
print(f"{'='*60}\n")
print(f"Found {len(errors)} invalid CLI command reference(s):\n")
for filepath, line, cmd, msg in errors:
print(f" {filepath}:{line}")
print(f" Command: {cmd}")
print(f" Issue: {msg}")
print()
print("Fix these issues before committing.")
print("Run 'tito --help' to see valid commands.\n")
return 1
if verbose:
print(f"\n All {len(md_files)} markdown files have valid CLI references!")
return 0
if __name__ == "__main__":
sys.exit(main())