mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-04 00:29:10 -05:00
fix: correct invalid tito CLI commands in documentation
- Fix 56+ invalid CLI references across markdown files - Replace nonexistent commands with valid alternatives: - tito checkpoint → tito module status - tito milestones → tito milestone - tito system check/doctor → tito system health - tito community leave → tito community logout - tito reset all → tito module reset XX - tito status → tito module status / tito milestone status - Add pre-commit hook to prevent future CLI documentation drift - Organize pre-commit config for monorepo (book + tinytorch sections)
This commit is contained in:
242
tinytorch/tools/dev/validate_cli_docs.py
Normal file
242
tinytorch/tools/dev/validate_cli_docs.py
Normal file
@@ -0,0 +1,242 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validate that CLI commands referenced in documentation match actual tito CLI.
|
||||
|
||||
This script extracts all `tito X Y` commands from markdown files and validates
|
||||
them against the actual CLI structure. Runs as a pre-commit hook to catch
|
||||
documentation drift before it reaches the repo.
|
||||
|
||||
Usage:
|
||||
python tools/dev/validate_cli_docs.py [--fix] [--verbose]
|
||||
|
||||
Exit codes:
|
||||
0 - All commands valid
|
||||
1 - Invalid commands found
|
||||
"""
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Set, Dict, List, Tuple
|
||||
|
||||
# Directories to scan for markdown files
|
||||
DOCS_DIRS = ["site", "modules", "tests", "milestones"]
|
||||
|
||||
# Files to skip (generated, vendored, etc.)
|
||||
SKIP_PATTERNS = [".venv", "node_modules", "_build", ".git"]
|
||||
|
||||
# Known valid commands from tito --help
|
||||
# Format: {command_group: [subcommands]}
|
||||
VALID_COMMANDS: Dict[str, List[str]] = {
|
||||
"setup": [], # No subcommands
|
||||
"update": [], # No subcommands
|
||||
"export": [], # Takes module args, not subcommands
|
||||
"test": [], # Takes module args, not subcommands
|
||||
"logo": [], # No subcommands
|
||||
"system": ["info", "health", "jupyter"],
|
||||
"module": ["start", "view", "resume", "complete", "test", "reset", "status", "list"],
|
||||
"dev": ["preflight"],
|
||||
"src": ["export", "test"],
|
||||
"package": ["reset", "nbdev"],
|
||||
"nbgrader": ["init", "generate", "release", "collect", "autograde", "feedback", "status", "analytics", "report"],
|
||||
"milestone": ["list", "run", "info", "status", "timeline", "test", "demo"],
|
||||
"community": ["login", "logout", "profile", "status", "map"],
|
||||
"benchmark": ["baseline", "capstone"],
|
||||
"olympics": ["logo", "status"],
|
||||
"grade": ["release", "generate", "collect", "autograde", "manual", "feedback", "export", "setup"],
|
||||
}
|
||||
|
||||
# Known INVALID commands that should be flagged
|
||||
KNOWN_INVALID = {
|
||||
"tito checkpoint": "Use 'tito module status' instead",
|
||||
"tito milestones": "Use 'tito milestone' (singular) instead",
|
||||
"tito system check": "Use 'tito system health' instead",
|
||||
"tito system reset": "Command doesn't exist. Use 'tito module reset' for modules",
|
||||
"tito community join": "Use 'tito community login' instead",
|
||||
"tito community update": "Use 'tito community profile' instead",
|
||||
"tito jupyter": "Use 'tito system jupyter' instead",
|
||||
"tito notebooks": "Command doesn't exist",
|
||||
}
|
||||
|
||||
|
||||
def get_valid_command_set() -> Set[str]:
|
||||
"""Build set of all valid tito commands."""
|
||||
valid = set()
|
||||
|
||||
for group, subcommands in VALID_COMMANDS.items():
|
||||
valid.add(f"tito {group}")
|
||||
for sub in subcommands:
|
||||
valid.add(f"tito {group} {sub}")
|
||||
|
||||
return valid
|
||||
|
||||
|
||||
def extract_tito_commands(filepath: Path) -> List[Tuple[int, str]]:
|
||||
"""Extract all tito commands from a markdown file.
|
||||
|
||||
Returns list of (line_number, command) tuples.
|
||||
Only extracts commands that look like actual CLI invocations.
|
||||
"""
|
||||
commands = []
|
||||
|
||||
try:
|
||||
content = filepath.read_text(encoding="utf-8")
|
||||
except Exception:
|
||||
return commands
|
||||
|
||||
# Pattern matches tito commands in code blocks or inline code
|
||||
# Must start with ` or be at line start (after optional whitespace/comment chars)
|
||||
# Excludes title-case words that are clearly prose (e.g., "TITO CLI Reference")
|
||||
code_block_pattern = r'`tito\s+([a-z][a-z0-9_-]*(?:\s+[a-z][a-z0-9_-]*)?)'
|
||||
line_start_pattern = r'^(?:#\s*)?tito\s+([a-z][a-z0-9_-]*(?:\s+[a-z][a-z0-9_-]*)?)'
|
||||
|
||||
# Words that indicate prose, not commands (case-insensitive check on following word)
|
||||
PROSE_INDICATORS = {'cli', 'command', 'commands', 'reference', 'overview', 'guide', 'tool', 'tools'}
|
||||
|
||||
for i, line in enumerate(content.split('\n'), 1):
|
||||
# Skip lines that are clearly URLs or links
|
||||
if 'http' in line.lower() or 'github.com' in line.lower():
|
||||
continue
|
||||
|
||||
# Skip header lines (prose)
|
||||
if line.strip().startswith('#') and 'tito' in line.lower() and any(p in line.lower() for p in PROSE_INDICATORS):
|
||||
continue
|
||||
|
||||
# Try code block pattern first (most reliable)
|
||||
for match in re.finditer(code_block_pattern, line):
|
||||
cmd_parts = match.group(1).lower().strip().split()
|
||||
# Skip if first word after tito is a prose indicator
|
||||
if cmd_parts and cmd_parts[0] in PROSE_INDICATORS:
|
||||
continue
|
||||
cmd = f"tito {' '.join(cmd_parts)}"
|
||||
commands.append((i, cmd))
|
||||
|
||||
# Try line-start pattern for bash code blocks
|
||||
for match in re.finditer(line_start_pattern, line.strip()):
|
||||
cmd_parts = match.group(1).lower().strip().split()
|
||||
# Skip if first word after tito is a prose indicator
|
||||
if cmd_parts and cmd_parts[0] in PROSE_INDICATORS:
|
||||
continue
|
||||
cmd = f"tito {' '.join(cmd_parts)}"
|
||||
# Avoid duplicates from the code block pattern
|
||||
if (i, cmd) not in commands:
|
||||
commands.append((i, cmd))
|
||||
|
||||
return commands
|
||||
|
||||
|
||||
def find_markdown_files(base_dir: Path) -> List[Path]:
|
||||
"""Find all markdown files in specified directories."""
|
||||
files = []
|
||||
|
||||
for docs_dir in DOCS_DIRS:
|
||||
search_path = base_dir / docs_dir
|
||||
if search_path.exists():
|
||||
for md_file in search_path.rglob("*.md"):
|
||||
# Skip files in ignored directories
|
||||
if any(skip in str(md_file) for skip in SKIP_PATTERNS):
|
||||
continue
|
||||
files.append(md_file)
|
||||
|
||||
# Also check root-level markdown files
|
||||
for md_file in base_dir.glob("*.md"):
|
||||
files.append(md_file)
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def validate_command(cmd: str, valid_commands: Set[str]) -> Tuple[bool, str]:
|
||||
"""Check if a command is valid.
|
||||
|
||||
Returns (is_valid, error_message).
|
||||
"""
|
||||
# Check against known invalid patterns first
|
||||
for invalid, suggestion in KNOWN_INVALID.items():
|
||||
if cmd.startswith(invalid):
|
||||
return False, suggestion
|
||||
|
||||
# Check if it's a valid base command
|
||||
parts = cmd.split()
|
||||
if len(parts) < 2:
|
||||
return False, "Invalid command format"
|
||||
|
||||
base_cmd = f"{parts[0]} {parts[1]}"
|
||||
|
||||
# Check if group exists
|
||||
if parts[1] not in VALID_COMMANDS:
|
||||
return False, f"Unknown command group: {parts[1]}"
|
||||
|
||||
# If command has subcommand, validate it
|
||||
if len(parts) >= 3:
|
||||
full_cmd = f"{parts[0]} {parts[1]} {parts[2]}"
|
||||
subcommands = VALID_COMMANDS.get(parts[1], [])
|
||||
|
||||
# If this group has defined subcommands, check them
|
||||
if subcommands and parts[2] not in subcommands:
|
||||
return False, f"Unknown subcommand: {parts[2]}. Valid: {', '.join(subcommands)}"
|
||||
|
||||
return True, ""
|
||||
|
||||
|
||||
def main():
|
||||
verbose = "--verbose" in sys.argv or "-v" in sys.argv
|
||||
|
||||
# Find tinytorch root (script is in tinytorch/tools/dev/)
|
||||
script_path = Path(__file__).resolve()
|
||||
tinytorch_root = script_path.parent.parent.parent
|
||||
|
||||
# If tinytorch_root is not actually tinytorch (e.g., we're in a different structure),
|
||||
# try to find it from current working directory
|
||||
if not (tinytorch_root / "bin" / "tito").exists():
|
||||
cwd = Path.cwd()
|
||||
if (cwd / "tinytorch" / "bin" / "tito").exists():
|
||||
tinytorch_root = cwd / "tinytorch"
|
||||
elif (cwd / "bin" / "tito").exists():
|
||||
tinytorch_root = cwd
|
||||
|
||||
if verbose:
|
||||
print(f"Scanning for CLI references in: {tinytorch_root}")
|
||||
|
||||
valid_commands = get_valid_command_set()
|
||||
md_files = find_markdown_files(tinytorch_root)
|
||||
|
||||
if verbose:
|
||||
print(f"Found {len(md_files)} markdown files to check")
|
||||
|
||||
errors: List[Tuple[Path, int, str, str]] = []
|
||||
|
||||
for md_file in md_files:
|
||||
commands = extract_tito_commands(md_file)
|
||||
|
||||
for line_num, cmd in commands:
|
||||
is_valid, error_msg = validate_command(cmd, valid_commands)
|
||||
|
||||
if not is_valid:
|
||||
rel_path = md_file.relative_to(tinytorch_root)
|
||||
errors.append((rel_path, line_num, cmd, error_msg))
|
||||
|
||||
if errors:
|
||||
print(f"\n{'='*60}")
|
||||
print(f"CLI Documentation Validation FAILED")
|
||||
print(f"{'='*60}\n")
|
||||
print(f"Found {len(errors)} invalid CLI command reference(s):\n")
|
||||
|
||||
for filepath, line, cmd, msg in errors:
|
||||
print(f" {filepath}:{line}")
|
||||
print(f" Command: {cmd}")
|
||||
print(f" Issue: {msg}")
|
||||
print()
|
||||
|
||||
print("Fix these issues before committing.")
|
||||
print("Run 'tito --help' to see valid commands.\n")
|
||||
return 1
|
||||
|
||||
if verbose:
|
||||
print(f"\n All {len(md_files)} markdown files have valid CLI references!")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user