""" Module Test Command for TinyTorch CLI. Provides comprehensive module testing functionality: - Run individual module tests with educational output - Three-phase testing: Inline → Module → Integration - Display detailed test results with WHAT/WHY context - Track test failures and successes This enables students to verify their implementations and understand what each test is checking and why it matters. TESTING PHILOSOPHY: ================== When a student runs `tito module test 05`, we want them to understand: 1. Does my implementation work? (Inline tests) 2. Does it handle edge cases? (Module tests with --tinytorch) 3. Does it integrate correctly with previous modules? (Integration tests) Each phase builds confidence and understanding. """ import subprocess import sys from argparse import ArgumentParser, Namespace from pathlib import Path from typing import Dict, List, Tuple, Optional from rich.panel import Panel from rich.table import Table from rich.text import Text from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn from rich.console import Console, Group from rich.rule import Rule from ..base import BaseCommand from ...core.modules import get_module_mapping, normalize_module_number class ModuleTestCommand(BaseCommand): """Command to test module implementations with educational output.""" @property def name(self) -> str: return "test" @property def description(self) -> str: return "Run module tests to verify implementation" def add_arguments(self, parser: ArgumentParser) -> None: """Add test command arguments.""" parser.add_argument( "module_number", nargs="?", default=None, help="Module number to test (01, 02, etc.)", ) parser.add_argument( "--all", action="store_true", help="Test all modules sequentially" ) parser.add_argument( "--verbose", "-v", action="store_true", help="Show detailed test output", ) parser.add_argument( "--stop-on-fail", action="store_true", help="Stop testing if a module fails (only with --all)", ) parser.add_argument( "--summary", action="store_true", help="Show only summary without running tests", ) parser.add_argument( "--unit-only", action="store_true", help="Run only inline unit tests (skip pytest and integration)", ) parser.add_argument( "--no-integration", action="store_true", help="Skip integration tests", ) # Module mapping and normalization now imported from core.modules def run_inline_tests( self, module_name: str, module_number: str, verbose: bool = False ) -> Tuple[bool, str]: """ Phase 1: Run inline unit tests from the module source file. These are the quick sanity checks embedded in the module itself, triggered by the if __name__ == "__main__" block. """ console = self.console src_dir = self.config.project_root / "src" module_file = src_dir / module_name / f"{module_name}.py" if not module_file.exists(): return False, f"Module file not found: {module_file}" try: result = subprocess.run( [sys.executable, str(module_file)], capture_output=True, text=True, cwd=self.config.project_root, timeout=300, ) if verbose: if result.stdout: console.print("[dim]" + result.stdout + "[/dim]") if result.stderr: console.print("[yellow]" + result.stderr + "[/yellow]") if result.returncode == 0: return True, result.stdout else: return False, result.stderr except subprocess.TimeoutExpired: return False, "Test timeout (>5 minutes)" except Exception as e: return False, f"Test execution failed: {str(e)}" def run_module_pytest( self, module_name: str, module_number: str, verbose: bool = False ) -> Tuple[bool, str]: """ Phase 2: Run pytest on module-specific tests with educational output. These tests use the --tinytorch flag to provide WHAT/WHY context for each test, helping students understand what's being checked. """ console = self.console tests_dir = self.config.project_root / "tests" / module_name if not tests_dir.exists(): # No module-specific tests - that's OK return True, "No module-specific tests found" try: # Run pytest with --tinytorch for educational output # Use --no-cov to avoid root pyproject.toml coverage requirements cmd = [ sys.executable, "-m", "pytest", str(tests_dir), "--tinytorch", "-v" if verbose else "-q", "--tb=short", "--no-cov", ] result = subprocess.run( cmd, capture_output=True, text=True, cwd=self.config.project_root, timeout=300, ) # Always show pytest output for educational value if result.stdout: console.print(result.stdout) if result.stderr and verbose: console.print("[yellow]" + result.stderr + "[/yellow]") if result.returncode == 0: return True, result.stdout else: return False, result.stderr or result.stdout except subprocess.TimeoutExpired: return False, "Pytest timeout (>5 minutes)" except Exception as e: return False, f"Pytest execution failed: {str(e)}" def run_integration_tests( self, module_number: str, verbose: bool = False ) -> Tuple[bool, str]: """ Phase 3: Run integration tests for modules 01 through N. This verifies that the student's implementation works correctly with all the previous modules they've built. """ console = self.console integration_dir = self.config.project_root / "tests" / "integration" if not integration_dir.exists(): return True, "No integration tests directory" # Find integration tests relevant to this module and earlier module_num = int(module_number) # Key integration test files that should run progressively relevant_tests = [] # Map module numbers to relevant integration tests # Each module inherits tests from earlier modules (progressive testing) # IMPORTANT: Only map tests that use features available UP TO that module! integration_test_map = { # Foundation modules (01-08) 1: ["test_basic_integration.py"], 2: ["test_basic_integration.py"], 3: ["test_layers_integration.py"], 4: [], # Loss tests that need training moved to module 7+ 5: ["test_dataloader_integration.py"], # DataLoader 6: [], # Autograd-only tests (gradient_flow requires optimizers) 7: ["test_loss_gradients.py", "test_gradient_flow.py", "test_training_flow.py"], # Optimizers - can now run gradient tests 8: ["test_training_flow.py"], # Training # Architecture modules (09-13) 9: ["test_cnn_integration.py"], 10: [], # Tokenization: self-contained, no integration deps 11: [], # Embeddings: tested in NLP pipeline (module 12) 12: ["test_nlp_pipeline_flow.py"], # Attention 13: ["test_nlp_pipeline_flow.py"], # Transformers # Performance modules (14-19) - build on all previous # These use the same integration tests to ensure optimizations # don't break existing functionality 14: [], # Profiling: observational, no integration changes 15: [], # Quantization: tested in module-specific tests 16: [], # Compression: tested in module-specific tests 17: [], # Acceleration: tested in module-specific tests 18: [], # Memoization: tested in module-specific tests 19: [], # Benchmarking: tested in module-specific tests # Capstone (20) - runs comprehensive validation 20: ["test_training_flow.py", "test_nlp_pipeline_flow.py", "test_cnn_integration.py"], } # Collect all relevant tests up to and including this module for i in range(1, module_num + 1): if i in integration_test_map: for test_file in integration_test_map[i]: test_path = integration_dir / test_file if test_path.exists() and str(test_path) not in relevant_tests: relevant_tests.append(str(test_path)) if not relevant_tests: return True, "No relevant integration tests for this module" try: # Use --no-cov to avoid root pyproject.toml coverage requirements cmd = [ sys.executable, "-m", "pytest", *relevant_tests, "--tinytorch", "-v" if verbose else "-q", "--tb=short", "--no-cov", ] result = subprocess.run( cmd, capture_output=True, text=True, cwd=self.config.project_root, timeout=600, # 10 minute timeout for integration tests ) if result.stdout: console.print(result.stdout) if result.stderr and verbose: console.print("[yellow]" + result.stderr + "[/yellow]") if result.returncode == 0: return True, result.stdout else: return False, result.stderr or result.stdout except subprocess.TimeoutExpired: return False, "Integration tests timeout (>10 minutes)" except Exception as e: return False, f"Integration tests failed: {str(e)}" def test_module( self, module_name: str, module_number: str, verbose: bool = False, unit_only: bool = False, no_integration: bool = False ) -> Tuple[bool, str]: """ Run comprehensive tests for a single module in three phases: Phase 1 - Inline Tests: Quick sanity checks from the module itself Phase 2 - Module Tests: Detailed pytest with educational output Phase 3 - Integration Tests: Verify compatibility with earlier modules Returns: (success, output) tuple """ console = self.console all_passed = True all_output = [] # Header console.print() console.print(Panel( f"[bold cyan]Testing Module {module_number}: {module_name}[/bold cyan]\n\n" "[dim]Three-phase testing ensures your implementation is correct,[/dim]\n" "[dim]handles edge cases, and integrates with previous modules.[/dim]", border_style="cyan", )) console.print() # ───────────────────────────────────────────────────────────── # Phase 1: Inline Unit Tests # ───────────────────────────────────────────────────────────── console.print(Rule("[bold yellow]Phase 1: Inline Unit Tests[/bold yellow]", style="yellow")) console.print("[dim]Running quick sanity checks from the module source...[/dim]") console.print() success, output = self.run_inline_tests(module_name, module_number, verbose) all_output.append(output) if success: console.print("[green]✓ Phase 1 PASSED: Inline unit tests[/green]") else: console.print("[red]✗ Phase 1 FAILED: Inline unit tests[/red]") if not verbose: console.print(f"[dim]{output[:500]}...[/dim]" if len(output) > 500 else f"[dim]{output}[/dim]") all_passed = False console.print() # Stop here if unit-only mode if unit_only: return all_passed, "\n".join(all_output) # ───────────────────────────────────────────────────────────── # Phase 2: Module Pytest Tests # ───────────────────────────────────────────────────────────── console.print(Rule("[bold blue]Phase 2: Module Tests (with educational output)[/bold blue]", style="blue")) console.print("[dim]Running pytest with WHAT/WHY context for each test...[/dim]") console.print() success, output = self.run_module_pytest(module_name, module_number, verbose) all_output.append(output) if success: console.print("[green]✓ Phase 2 PASSED: Module tests[/green]") else: console.print("[red]✗ Phase 2 FAILED: Module tests[/red]") all_passed = False console.print() # ───────────────────────────────────────────────────────────── # Phase 3: Integration Tests (optional) # ───────────────────────────────────────────────────────────── if not no_integration: console.print(Rule("[bold magenta]Phase 3: Integration Tests[/bold magenta]", style="magenta")) console.print(f"[dim]Verifying Module {module_number} works with modules 01-{module_number}...[/dim]") console.print() success, output = self.run_integration_tests(module_number, verbose) all_output.append(output) if success: console.print("[green]✓ Phase 3 PASSED: Integration tests[/green]") else: console.print("[red]✗ Phase 3 FAILED: Integration tests[/red]") all_passed = False console.print() return all_passed, "\n".join(all_output) def test_all_modules( self, verbose: bool = False, stop_on_fail: bool = False ) -> int: """Test all modules sequentially.""" console = self.console module_mapping = get_module_mapping() console.print() console.print( Panel( f"[bold cyan]Running All Module Tests[/bold cyan]\n\n" f"[bold]Testing {len(module_mapping)} modules sequentially[/bold]\n" f" • Verbose: {'Yes' if verbose else 'No'}\n" f" • Stop on failure: {'Yes' if stop_on_fail else 'No'}\n\n" f"[dim]This will take several minutes...[/dim]", title="🧪 Test All Modules", border_style="cyan", ) ) console.print() passed = [] failed = [] errors = {} with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TaskProgressColumn(), console=console ) as progress: task = progress.add_task("[cyan]Testing modules...", total=len(module_mapping)) for module_num, module_name in sorted(module_mapping.items()): progress.update(task, description=f"[cyan]Testing Module {module_num}: {module_name}...") success, output = self.test_module(module_name, module_num, verbose) progress.advance(task) if success: passed.append((module_num, module_name)) else: failed.append((module_num, module_name)) errors[module_num] = output if stop_on_fail: console.print() console.print( Panel( f"[red]Testing stopped due to failure in Module {module_num}[/red]\n\n" f"[dim]Use --verbose to see full error details[/dim]", title="Stopped on Failure", border_style="red", ) ) break console.print() # Display summary console.print() console.print("[bold cyan]" + "=" * 70 + "[/bold cyan]") console.print("[bold cyan]Test Summary[/bold cyan]") console.print("[bold cyan]" + "=" * 70 + "[/bold cyan]") console.print() # Create results table table = Table(title="Module Test Results", show_header=True) table.add_column("Module", style="cyan") table.add_column("Name", style="dim") table.add_column("Status", justify="center") for module_num, module_name in sorted(module_mapping.items()): if (module_num, module_name) in passed: status = "[green]✓ PASS[/green]" elif (module_num, module_name) in failed: status = "[red]✗ FAIL[/red]" else: status = "[dim]⏭ SKIPPED[/dim]" table.add_row(f"Module {module_num}", module_name, status) console.print(table) console.print() # Summary stats total = len(module_mapping) pass_count = len(passed) fail_count = len(failed) skip_count = total - pass_count - fail_count if fail_count == 0: console.print( Panel( f"[bold green]✅ ALL TESTS PASSED![/bold green]\n\n" f"[green]Passed: {pass_count}/{total} modules[/green]\n\n" f"[bold]All TinyTorch modules are working correctly![/bold]", title="🎉 Success", border_style="green", ) ) return 0 else: console.print( Panel( f"[bold red]❌ SOME TESTS FAILED[/bold red]\n\n" f"[green]Passed: {pass_count} modules[/green]\n" f"[red]Failed: {fail_count} modules[/red]\n" + (f"[dim]Skipped: {skip_count} modules[/dim]\n" if skip_count > 0 else "") + f"\n[bold]Failed modules:[/bold]\n" + "\n".join([f" • Module {num}: {name}" for num, name in failed]), title="⚠️ Test Failures", border_style="red", ) ) # Show error details for failed modules if errors and not verbose: console.print() console.print("[yellow]Failure details (run with --verbose for full output):[/yellow]") console.print() for module_num in sorted(errors.keys()): console.print(f"[red]Module {module_num}:[/red]") console.print(f"[dim]{errors[module_num][:500]}...[/dim]") console.print() return 1 def run(self, args: Namespace) -> int: """Execute the test command.""" console = self.console # Handle --all (test all modules) if getattr(args, "all", False): return self.test_all_modules( verbose=args.verbose, stop_on_fail=args.stop_on_fail ) # Require module number for single module test if not args.module_number: console.print( Panel( "[red]Error: Module number required[/red]\n\n" "[dim]Examples:[/dim]\n" "[dim] tito module test 01 # Test module 01[/dim]\n" "[dim] tito module test 01 -v # Test with verbose output[/dim]\n" "[dim] tito module test --all # Test all modules[/dim]", title="Module Number Required", border_style="red", ) ) return 1 # Normalize and validate module number module_mapping = get_module_mapping() normalized = normalize_module_number(args.module_number) if normalized not in module_mapping: console.print(f"[red]Invalid module number: {args.module_number}[/red]") console.print("Available modules: 01-20") return 1 module_name = module_mapping[normalized] # Test single module with enhanced three-phase testing success, output = self.test_module( module_name, normalized, verbose=args.verbose, unit_only=getattr(args, "unit_only", False), no_integration=getattr(args, "no_integration", False), ) if success: console.print( Panel( f"[bold green]✅ Module {normalized} - All Tests Passed![/bold green]\n\n" f"[green]Your {module_name} implementation is working correctly[/green]\n" f"[green]and integrates well with previous modules.[/green]", title=f"✓ {module_name}", border_style="green", ) ) return 0 else: console.print( Panel( f"[bold red]❌ Module {normalized} - Some Tests Failed[/bold red]\n\n" f"[yellow]Review the test output above to understand what failed.[/yellow]\n" f"[dim]Each test includes WHAT it's checking and WHY it matters.[/dim]\n\n" f"[dim]Tips:[/dim]\n" f"[dim] • Use -v flag for more detailed output[/dim]\n" f"[dim] • Use --unit-only to test just inline tests[/dim]\n" f"[dim] • Use --no-integration to skip integration tests[/dim]", title=f"✗ {module_name}", border_style="red", ) ) return 1