Release preparation: fix package exports, tests, and documentation

Package exports: - Fix tinytorch/__init__.py to export all required components for milestones - Add Dense as alias for Linear for compatibility - Add loss functions (MSELoss, CrossEntropyLoss, BinaryCrossEntropyLoss) - Export spatial operations, data loaders, and transformer components Test infrastructure: - Create tests/conftest.py to handle path setup - Create tests/test_utils.py with shared test utilities - Rename test_progressive_integration.py files to include module number - Fix syntax errors in test files (spaces in class names) - Remove stale test file referencing non-existent modules Documentation: - Update README.md with correct milestone file names - Fix milestone requirements to match actual module dependencies Export system: - Run tito export --all to regenerate package from source modules - Ensure all 20 modules are properly exported
2025-12-05 19:17:52 -06:00 · 2025-12-02 14:19:56 -05:00
parent 4b22d229d4
commit bd7fcb2177
42 changed files with 1526 additions and 393 deletions
--- a/README.md
+++ b/README.md
@@ -317,26 +317,28 @@ tito module complete 01

 As you complete modules, unlock historical ML milestones demonstrating YOUR implementations:

-### 🧠 01. Perceptron (1957) - After Module 03
+### 🧠 01. Perceptron (1957) - After Module 07
 ```bash
 cd milestones/01_1957_perceptron
-python perceptron_trained.py
+python 01_rosenblatt_forward.py      # Forward pass demo (after Module 03)
+python 02_rosenblatt_trained.py      # Training demo (after Module 07)
 # Rosenblatt's first trainable neural network
 # YOUR Linear layer + Sigmoid recreates history!
 ```
-**Requirements**: Modules 01-03 (Tensor, Activations, Layers)  
+**Requirements**: Modules 01-07 (Tensor through Training)  
 **Achievement**: Binary classification with gradient descent

 ---

-### ⚡ 02. XOR Crisis (1969) - After Module 05
+### ⚡ 02. XOR Crisis (1969) - After Module 07
 ```bash
-cd milestones/02_1969_xor_crisis
-python xor_solved.py
+cd milestones/02_1969_xor
+python 01_xor_crisis.py              # Demonstrate the problem
+python 02_xor_solved.py              # Solve with hidden layers!
 # Solve Minsky's XOR challenge with hidden layers
 # YOUR autograd enables multi-layer learning!
 ```
-**Requirements**: Modules 01-05 (+ Autograd)  
+**Requirements**: Modules 01-07 (Tensor through Training)  
 **Achievement**: Non-linear problem solving

 ---
--- a/tests/01_tensor/test_01_progressive_integration.py
+++ b/tests/01_tensor/test_01_progressive_integration.py
--- a/tests/02_activations/test_02_progressive_integration.py
+++ b/tests/02_activations/test_02_progressive_integration.py
@@ -14,7 +14,7 @@ from pathlib import Path
 sys.path.insert(0, str(Path(__file__).parent.parent.parent))


-class TestModule01Still Working:
+class TestModule01StillWorking:
    """Verify Module 01 (Setup) functionality is still intact."""
    
    def test_setup_environment_stable(self):
@@ -88,7 +88,7 @@ class TestModule03ActivationsCore:
            assert True, "Module 02: Sigmoid not implemented yet"


-class TestProgressive StackIntegration:
+class TestProgressiveStackIntegration:
    """Test that the full stack (01→02→03) works together."""
    
    def test_tensor_activation_pipeline(self):
--- a/tests/03_layers/test_03_progressive_integration.py
+++ b/tests/03_layers/test_03_progressive_integration.py
--- a/tests/03_layers/test_layers_networks_integration.py
+++ b/tests/03_layers/test_layers_networks_integration.py
@@ -1,334 +0,0 @@
-"""
-Integration Tests - Layers and Dense Networks
-
-Tests cross-module interfaces and compatibility between individual Layers and Dense Network modules.
-Focuses on integration, not re-testing individual module functionality.
-"""
-
-import pytest
-import numpy as np
-from test_utils import setup_integration_test
-
-# Ensure proper setup before importing
-setup_integration_test()
-
-# Import ONLY from TinyTorch package
-from tinytorch.core.tensor import Tensor
-from tinytorch.core.layers import Dense
-from tinytorch.core.dense import Sequential, create_mlp, MLP
-from tinytorch.core.activations import ReLU, Sigmoid, Tanh
-
-
-class TestLayersDenseNetworkInterface:
-    """Test interface compatibility between individual Layers and Dense Networks."""
-    
-    def test_dense_layer_to_sequential_network(self):
-        """Test that Dense layers can be integrated into Sequential networks."""
-        # Create individual dense layers
-        layer1 = Dense(input_size=4, output_size=8)
-        layer2 = Dense(input_size=8, output_size=3)
-        
-        # Test integration into Sequential
-        network = Sequential([layer1, ReLU(), layer2])
-        
-        # Test interface compatibility
-        x = Tensor(np.random.randn(2, 4))
-        result = network(x)
-        
-        # Verify integration works
-        assert isinstance(result, Tensor), "Sequential should work with Dense layers"
-        assert result.shape == (2, 3), "Sequential should process through all layers"
-    
-    def test_dense_layer_compatibility_with_mlp(self):
-        """Test that Dense layers are compatible with MLP construction."""
-        # Test that MLP uses same interface as individual Dense layers
-        individual_layer = Dense(input_size=6, output_size=10)
-        mlp_network = create_mlp(input_size=6, hidden_sizes=[10], output_size=3)
-        
-        # Test same input works with both
-        x = Tensor(np.random.randn(1, 6))
-        
-        # Individual layer output
-        layer_output = individual_layer(x)
-        
-        # MLP output (should accept same input)
-        mlp_output = mlp_network(x)
-        
-        # Verify interface compatibility
-        assert isinstance(layer_output, Tensor), "Dense layer should return Tensor"
-        assert isinstance(mlp_output, Tensor), "MLP should return Tensor"
-        assert layer_output.shape == (1, 10), "Dense layer should have expected output shape"
-        assert mlp_output.shape == (1, 3), "MLP should have expected output shape"
-    
-    def test_layer_output_as_network_input(self):
-        """Test that Dense layer output can be used as network input."""
-        # Create preprocessing layer
-        preprocessor = Dense(input_size=5, output_size=8)
-        
-        # Create network that processes preprocessor output
-        network = Sequential([
-            Dense(input_size=8, output_size=12),
-            ReLU(),
-            Dense(input_size=12, output_size=4)
-        ])
-        
-        # Test pipeline: input → layer → network
-        x = Tensor(np.random.randn(3, 5))
-        preprocessed = preprocessor(x)
-        final_output = network(preprocessed)
-        
-        # Verify pipeline interface
-        assert isinstance(preprocessed, Tensor), "Layer should produce Tensor for network"
-        assert isinstance(final_output, Tensor), "Network should accept layer output"
-        assert final_output.shape == (3, 4), "Pipeline should work end-to-end"
-    
-    def test_network_layer_composition(self):
-        """Test that networks can be composed with individual layers."""
-        # Create base network
-        base_network = create_mlp(input_size=4, hidden_sizes=[6], output_size=8)
-        
-        # Add additional processing layer
-        final_layer = Dense(input_size=8, output_size=2)
-        
-        # Test composition
-        x = Tensor(np.random.randn(2, 4))
-        
-        # Pipeline: input → network → layer
-        network_output = base_network(x)
-        final_output = final_layer(network_output)
-        
-        # Verify composition interface
-        assert isinstance(network_output, Tensor), "Network should produce Tensor for layer"
-        assert isinstance(final_output, Tensor), "Layer should accept network output"
-        assert network_output.shape == (2, 8), "Network output should have expected shape"
-        assert final_output.shape == (2, 2), "Layer should process network output correctly"
-
-
-class TestLayerNetworkDataFlow:
-    """Test data flow compatibility between layers and networks."""
-    
-    def test_shape_preservation_across_layer_network_boundary(self):
-        """Test shape preservation when crossing layer-network boundaries."""
-        shape_configs = [
-            (1, 4, 8, 2),    # Single sample
-            (5, 6, 10, 3),   # Small batch
-            (10, 8, 16, 4),  # Larger batch
-        ]
-        
-        for batch_size, input_size, hidden_size, output_size in shape_configs:
-            # Create layer and network
-            layer = Dense(input_size=input_size, output_size=hidden_size)
-            network = Sequential([
-                Dense(input_size=hidden_size, output_size=hidden_size),
-            ReLU(),
-                Dense(input_size=hidden_size, output_size=output_size)
-            ])
-        
-            # Test data flow
-            x = Tensor(np.random.randn(batch_size, input_size))
-            layer_out = layer(x)
-            network_out = network(layer_out)
-        
-            # Verify shape flow
-            assert layer_out.shape == (batch_size, hidden_size), f"Layer should output correct shape for config {shape_configs}"
-            assert network_out.shape == (batch_size, output_size), f"Network should output correct shape for config {shape_configs}"
-    
-    def test_dtype_preservation_across_layer_network_boundary(self):
-        """Test data type preservation across layer-network boundaries."""
-        # Test float32 flow
-        layer_f32 = Dense(input_size=4, output_size=6)
-        network_f32 = create_mlp(input_size=6, hidden_sizes=[8], output_size=2)
-        
-        x_f32 = Tensor(np.random.randn(2, 4).astype(np.float32))
-        layer_out_f32 = layer_f32(x_f32)
-        network_out_f32 = network_f32(layer_out_f32)
-        
-        # Verify dtype preservation
-        assert layer_out_f32.dtype == np.float32, "Layer should preserve float32"
-        assert network_out_f32.dtype == np.float32, "Network should preserve float32 from layer"
-        
-        # Test float64 flow
-        layer_f64 = Dense(input_size=4, output_size=6)
-        network_f64 = create_mlp(input_size=6, hidden_sizes=[8], output_size=2)
-        
-        x_f64 = Tensor(np.random.randn(2, 4).astype(np.float64))
-        layer_out_f64 = layer_f64(x_f64)
-        network_out_f64 = network_f64(layer_out_f64)
-        
-        # Verify dtype preservation
-        assert layer_out_f64.dtype == np.float64, "Layer should preserve float64"
-        assert network_out_f64.dtype == np.float64, "Network should preserve float64 from layer"
-    
-    def test_error_handling_at_layer_network_boundary(self):
-        """Test error handling when layer-network interfaces are incompatible."""
-        # Create mismatched layer and network
-        layer = Dense(input_size=4, output_size=6)
-        mismatched_network = Sequential([Dense(input_size=8, output_size=2)])  # Expects 8, gets 6
-        
-        x = Tensor(np.random.randn(1, 4))
-        layer_output = layer(x)  # Shape (1, 6)
-        
-        # Should fail gracefully with dimension mismatch
-        try:
-            result = mismatched_network(layer_output)  # Expects (1, 8)
-            assert False, "Should have failed with dimension mismatch"
-        except (ValueError, AssertionError, TypeError) as e:
-            # Expected behavior
-            assert isinstance(e, (ValueError, AssertionError, TypeError)), "Should fail gracefully with dimension mismatch"
-
-
-class TestLayerNetworkSystemIntegration:
-    """Test system-level integration scenarios with layers and networks."""
-    
-    def test_multi_stage_processing_pipeline(self):
-        """Test multi-stage processing using layers and networks."""
-        # Stage 1: Preprocessing layer
-        preprocessor = Dense(input_size=8, output_size=12)
-        
-        # Stage 2: Feature extraction network
-        feature_extractor = Sequential([
-            Dense(input_size=12, output_size=16),
-            ReLU(),
-            Dense(input_size=16, output_size=10)
-        ])
-        
-        # Stage 3: Classification layer
-        classifier = Dense(input_size=10, output_size=3)
-        
-        # Test complete pipeline
-        x = Tensor(np.random.randn(4, 8))
-        
-        preprocessed = preprocessor(x)
-        features = feature_extractor(preprocessed)
-        predictions = classifier(features)
-        
-        # Verify multi-stage integration
-        assert isinstance(preprocessed, Tensor), "Preprocessor should output Tensor"
-        assert isinstance(features, Tensor), "Feature extractor should output Tensor"
-        assert isinstance(predictions, Tensor), "Classifier should output Tensor"
-        assert predictions.shape == (4, 3), "Pipeline should produce expected final shape"
-    
-    def test_parallel_layer_processing(self):
-        """Test parallel processing with multiple layers feeding into network."""
-        # Create parallel processing layers
-        branch1 = Dense(input_size=6, output_size=4)
-        branch2 = Dense(input_size=6, output_size=4)
-        branch3 = Dense(input_size=6, output_size=4)
-        
-        # Fusion network
-        fusion_network = Sequential([
-            Dense(input_size=12, output_size=8),  # 4+4+4=12 from parallel branches
-            ReLU(),
-            Dense(input_size=8, output_size=2)
-        ])
-        
-        # Test parallel processing
-        x = Tensor(np.random.randn(2, 6))
-        
-        # Process in parallel
-        out1 = branch1(x)
-        out2 = branch2(x)
-        out3 = branch3(x)
-        
-        # Manually concatenate (simulating fusion)
-        # In a real implementation, this would be handled by a concatenation layer
-        fused_data = np.concatenate([out1.data, out2.data, out3.data], axis=1)
-        fused_tensor = Tensor(fused_data)
-        
-        # Final processing
-        final_output = fusion_network(fused_tensor)
-        
-        # Verify parallel processing integration
-        assert out1.shape == (2, 4), "Branch 1 should output correct shape"
-        assert out2.shape == (2, 4), "Branch 2 should output correct shape"
-        assert out3.shape == (2, 4), "Branch 3 should output correct shape"
-        assert fused_tensor.shape == (2, 12), "Fusion should combine all branches"
-        assert final_output.shape == (2, 2), "Final network should process fused input"
-    
-    def test_layer_network_modularity(self):
-        """Test that layers and networks can be replaced modularly."""
-        # Create modular components
-        input_processors = [
-            Dense(input_size=5, output_size=8),
-            Dense(input_size=5, output_size=8),  # Different instance
-        ]
-        
-        core_networks = [
-            create_mlp(input_size=8, hidden_sizes=[10], output_size=6),
-            Sequential([Dense(input_size=8, output_size=6)]),  # Different architecture
-        ]
-        
-        output_processors = [
-            Dense(input_size=6, output_size=3),
-            Dense(input_size=6, output_size=3),  # Different instance
-        ]
-        
-        # Test all combinations work
-        x = Tensor(np.random.randn(1, 5))
-        
-        for input_proc in input_processors:
-            for core_net in core_networks:
-                for output_proc in output_processors:
-                    # Test modular pipeline
-                    intermediate1 = input_proc(x)
-                    intermediate2 = core_net(intermediate1)
-                    final = output_proc(intermediate2)
-                    
-                    # Verify modularity
-                    assert isinstance(final, Tensor), "Modular combination should work"
-                    assert final.shape == (1, 3), "Modular combination should produce expected output"
-
-
-class TestLayerNetworkInterfaceStandards:
-    """Test that layers and networks follow consistent interface standards."""
-    
-    def test_consistent_call_interface(self):
-        """Test that layers and networks have consistent callable interface."""
-        # Create different components
-        components = [
-            Dense(input_size=4, output_size=6),
-            Sequential([Dense(input_size=4, output_size=6)]),
-            create_mlp(input_size=4, hidden_sizes=[8], output_size=6),
-            MLP([4, 8, 6])
-        ]
-        
-        x = Tensor(np.random.randn(1, 4))
-        
-        # Test all components have consistent interface
-        for component in components:
-            # Should be callable with same signature
-            result = component(x)
-            
-            # Verify consistent interface
-            assert isinstance(result, Tensor), f"{type(component).__name__} should return Tensor"
-            assert result.shape[0] == 1, f"{type(component).__name__} should preserve batch dimension"
-            assert result.shape[1] == 6, f"{type(component).__name__} should produce expected output size"
-    
-    def test_component_property_consistency(self):
-        """Test that layers and networks have consistent properties."""
-        # Create components
-        layer = Dense(input_size=3, output_size=5)
-        network = Sequential([Dense(input_size=3, output_size=5)])
-        mlp = create_mlp(input_size=3, hidden_sizes=[], output_size=5)
-        
-        # Test that all components can be used interchangeably
-        x = Tensor(np.random.randn(2, 3))
-        
-        results = []
-        for component in [layer, network, mlp]:
-            result = component(x)
-            results.append(result)
-            
-            # Verify consistent interface properties
-            assert hasattr(result, 'shape'), f"{type(component).__name__} result should have shape"
-            assert hasattr(result, 'data'), f"{type(component).__name__} result should have data"
-            assert hasattr(result, 'dtype'), f"{type(component).__name__} result should have dtype"
-        
-        # All should produce same output shape
-        expected_shape = (2, 5)
-        for i, result in enumerate(results):
-            assert result.shape == expected_shape, f"Component {i} should produce consistent shape"
-
-
-if __name__ == "__main__":
-    pytest.main([__file__]) 
--- a/tests/04_losses/test_04_progressive_integration.py
+++ b/tests/04_losses/test_04_progressive_integration.py
--- a/tests/05_autograd/test_05_progressive_integration.py
+++ b/tests/05_autograd/test_05_progressive_integration.py
--- a/tests/06_optimizers/test_06_progressive_integration.py
+++ b/tests/06_optimizers/test_06_progressive_integration.py
--- a/tests/07_training/test_07_progressive_integration.py
+++ b/tests/07_training/test_07_progressive_integration.py
--- a/tests/08_dataloader/test_08_progressive_integration.py
+++ b/tests/08_dataloader/test_08_progressive_integration.py
--- a/tests/09_spatial/test_09_progressive_integration.py
+++ b/tests/09_spatial/test_09_progressive_integration.py
--- a/tests/10_tokenization/test_10_progressive_integration.py
+++ b/tests/10_tokenization/test_10_progressive_integration.py
--- a/tests/11_embeddings/test_11_progressive_integration.py
+++ b/tests/11_embeddings/test_11_progressive_integration.py
--- a/tests/12_attention/test_12_progressive_integration.py
+++ b/tests/12_attention/test_12_progressive_integration.py
--- a/tests/13_transformers/test_13_progressive_integration.py
+++ b/tests/13_transformers/test_13_progressive_integration.py
--- a/tests/14_profiling/test_14_progressive_integration.py
+++ b/tests/14_profiling/test_14_progressive_integration.py
--- a/tests/15_memoization/test_15_progressive_integration.py
+++ b/tests/15_memoization/test_15_progressive_integration.py
--- a/tests/16_quantization/test_16_progressive_integration.py
+++ b/tests/16_quantization/test_16_progressive_integration.py
--- a/tests/17_compression/test_17_progressive_integration.py
+++ b/tests/17_compression/test_17_progressive_integration.py
--- a/tests/18_acceleration/test_18_progressive_integration.py
+++ b/tests/18_acceleration/test_18_progressive_integration.py
--- a/tests/19_benchmarking/test_19_progressive_integration.py
+++ b/tests/19_benchmarking/test_19_progressive_integration.py
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,29 @@
+"""
+Pytest configuration for TinyTorch tests.
+
+This file is automatically loaded by pytest and sets up the test environment.
+"""
+
+import sys
+import os
+from pathlib import Path
+
+# Add tests directory to Python path so test_utils can be imported
+tests_dir = Path(__file__).parent
+if str(tests_dir) not in sys.path:
+    sys.path.insert(0, str(tests_dir))
+
+# Add project root to Python path
+project_root = tests_dir.parent
+if str(project_root) not in sys.path:
+    sys.path.insert(0, str(project_root))
+
+# Set quiet mode for tinytorch imports during tests
+os.environ['TINYTORCH_QUIET'] = '1'
+
+# Import test utilities to make them available
+try:
+    from test_utils import setup_integration_test, create_test_tensor, assert_tensors_close
+except ImportError:
+    pass  # test_utils not yet created or has issues
+
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -0,0 +1,114 @@
+"""
+TinyTorch Test Utilities
+
+Shared utilities for integration tests across all modules.
+Provides setup functions and common test helpers.
+"""
+
+import sys
+import os
+from pathlib import Path
+
+
+def setup_integration_test():
+    """
+    Set up the environment for integration testing.
+    
+    This function ensures:
+    1. The TinyTorch package is importable
+    2. NumPy random seed is set for reproducibility
+    3. Warning filters are set appropriately
+    
+    Call this at the top of integration test files before importing TinyTorch.
+    """
+    import warnings
+    import numpy as np
+    
+    # Ensure tinytorch is on the path (from project root)
+    project_root = Path(__file__).parent.parent
+    if str(project_root) not in sys.path:
+        sys.path.insert(0, str(project_root))
+    
+    # Set random seed for reproducibility
+    np.random.seed(42)
+    
+    # Suppress certain warnings during tests
+    warnings.filterwarnings('ignore', category=DeprecationWarning)
+    warnings.filterwarnings('ignore', category=FutureWarning)
+    
+    # Set quiet mode for tinytorch imports during tests
+    os.environ['TINYTORCH_QUIET'] = '1'
+
+
+def get_project_root() -> Path:
+    """Return the project root directory."""
+    return Path(__file__).parent.parent
+
+
+def get_test_data_path() -> Path:
+    """Return the path to test data directory."""
+    return get_project_root() / "datasets"
+
+
+def create_test_tensor(shape, requires_grad=True, seed=None):
+    """
+    Create a test tensor with random data.
+    
+    Args:
+        shape: Tuple specifying tensor shape
+        requires_grad: Whether tensor should track gradients
+        seed: Optional random seed for reproducibility
+    
+    Returns:
+        Tensor with random data
+    """
+    import numpy as np
+    from tinytorch.core.tensor import Tensor
+    
+    if seed is not None:
+        np.random.seed(seed)
+    
+    data = np.random.randn(*shape).astype(np.float32)
+    return Tensor(data, requires_grad=requires_grad)
+
+
+def assert_tensors_close(t1, t2, rtol=1e-5, atol=1e-8, msg=""):
+    """
+    Assert that two tensors are element-wise close.
+    
+    Args:
+        t1: First tensor
+        t2: Second tensor
+        rtol: Relative tolerance
+        atol: Absolute tolerance
+        msg: Optional message for assertion error
+    """
+    import numpy as np
+    
+    # Extract data from tensors if needed
+    data1 = t1.data if hasattr(t1, 'data') else t1
+    data2 = t2.data if hasattr(t2, 'data') else t2
+    
+    if not np.allclose(data1, data2, rtol=rtol, atol=atol):
+        diff = np.abs(data1 - data2)
+        max_diff = np.max(diff)
+        raise AssertionError(
+            f"Tensors not close (max diff: {max_diff:.6e}). {msg}"
+        )
+
+
+def assert_gradients_exist(tensor, msg=""):
+    """Assert that a tensor has computed gradients."""
+    if tensor.grad is None:
+        raise AssertionError(f"Tensor has no gradients. {msg}")
+
+
+def skip_if_no_tinytorch():
+    """Pytest skip decorator for when tinytorch isn't available."""
+    import pytest
+    try:
+        import tinytorch
+        return pytest.mark.skipif(False, reason="TinyTorch available")
+    except ImportError:
+        return pytest.mark.skip(reason="TinyTorch not installed")
+
--- a/tinytorch/init.py
+++ b/tinytorch/init.py
@@ -1,28 +1,97 @@
+"""
+TinyTorch - Build ML Systems From First Principles
+
+A complete educational ML framework for learning neural network internals
+by implementing everything from scratch.
+
+Top-level exports provide easy access to commonly used components.
+For advanced modules (optimization, profiling), import from submodules:
+    from tinytorch.profiling.profiler import Profiler
+    from tinytorch.optimization.quantization import quantize_int8
+    from tinytorch.generation.kv_cache import enable_kv_cache
+"""
+
 __version__ = "0.1.0"

-# Import core functionality
-from . import core
-
-# Make common components easily accessible at top level
+# ============================================================================
+# Core Functionality (Modules 01-07)
+# ============================================================================
 from .core.tensor import Tensor
-from .core.layers import Linear, Dropout
 from .core.activations import Sigmoid, ReLU, Tanh, GELU, Softmax
-# from .core.losses import MSELoss, CrossEntropyLoss, BinaryCrossEntropyLoss  # TEMP: removed for testing
-from .core.optimizers import SGD, AdamW
+from .core.layers import Layer, Linear, Dense, Dropout
+from .core.losses import MSELoss, CrossEntropyLoss, BinaryCrossEntropyLoss
+from .core.optimizers import SGD, Adam, AdamW
+from .core.training import Trainer, CosineSchedule, clip_grad_norm

-# 🔥 CRITICAL: Enable automatic differentiation
-# This patches Tensor operations to track gradients
-# Use quiet=True when imported by CLI tools to avoid cluttering output
+# ============================================================================
+# Data Loading (Module 08)
+# ============================================================================
+from .data.loader import Dataset, TensorDataset, DataLoader
+
+# ============================================================================
+# Spatial Operations (Module 09)
+# ============================================================================
+from .core.spatial import Conv2d, MaxPool2d
+
+# ============================================================================
+# Text Processing (Modules 10-11)
+# ============================================================================
+from .text.tokenization import Tokenizer, CharTokenizer, BPETokenizer
+from .text.embeddings import Embedding, PositionalEncoding, EmbeddingLayer
+
+# ============================================================================
+# Attention & Transformers (Modules 12-13)
+# ============================================================================
+from .core.attention import MultiHeadAttention, scaled_dot_product_attention
+from .models.transformer import LayerNorm, MLP, TransformerBlock, GPT
+
+# ============================================================================
+# Enable Autograd (CRITICAL - must happen after imports)
+# ============================================================================
 import os
 from .core.autograd import enable_autograd
+
+# Enable autograd quietly when imported by CLI tools
 enable_autograd(quiet=os.environ.get('TINYTORCH_QUIET', '').lower() in ('1', 'true', 'yes'))

-# Export main public API
+# ============================================================================
+# Public API
+# ============================================================================
 __all__ = [
-    'core',
+    # Version
+    '__version__',
+    
+    # Core - Tensor
    'Tensor',
-    'Linear', 'Dropout',
+    
+    # Core - Activations
    'Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax',
-    # 'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss',  # TEMP: removed for testing
-    'SGD', 'AdamW'
+    
+    # Core - Layers
+    'Layer', 'Linear', 'Dense', 'Dropout',
+    
+    # Core - Losses
+    'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss',
+    
+    # Core - Optimizers
+    'SGD', 'Adam', 'AdamW',
+    
+    # Core - Training
+    'Trainer', 'CosineSchedule', 'clip_grad_norm',
+    
+    # Data Loading
+    'Dataset', 'TensorDataset', 'DataLoader',
+    
+    # Core - Spatial (CNN)
+    'Conv2d', 'MaxPool2d',
+    
+    # Text/NLP
+    'Tokenizer', 'CharTokenizer', 'BPETokenizer',
+    'Embedding', 'PositionalEncoding', 'EmbeddingLayer',
+    
+    # Core - Attention
+    'MultiHeadAttention', 'scaled_dot_product_attention',
+    
+    # Models
+    'LayerNorm', 'MLP', 'TransformerBlock', 'GPT',
 ]
--- a/tinytorch/_modidx.py
+++ b/tinytorch/_modidx.py
@@ -63,6 +63,14 @@ d = { 'settings': { 'branch': 'main',
                                                                                                                        'tinytorch/benchmarking/benchmark.py'),
                                                  'tinytorch.benchmarking.benchmark.Benchmark.run_memory_benchmark': ( '19_benchmarking/benchmarking.html#benchmark.run_memory_benchmark',
                                                                                                                       'tinytorch/benchmarking/benchmark.py'),
+                                                  'tinytorch.benchmarking.benchmark.BenchmarkResult': ( '19_benchmarking/benchmarking.html#benchmarkresult',
+                                                                                                        'tinytorch/benchmarking/benchmark.py'),
+                                                  'tinytorch.benchmarking.benchmark.BenchmarkResult.__post_init__': ( '19_benchmarking/benchmarking.html#benchmarkresult.__post_init__',
+                                                                                                                      'tinytorch/benchmarking/benchmark.py'),
+                                                  'tinytorch.benchmarking.benchmark.BenchmarkResult.__str__': ( '19_benchmarking/benchmarking.html#benchmarkresult.__str__',
+                                                                                                                'tinytorch/benchmarking/benchmark.py'),
+                                                  'tinytorch.benchmarking.benchmark.BenchmarkResult.to_dict': ( '19_benchmarking/benchmarking.html#benchmarkresult.to_dict',
+                                                                                                                'tinytorch/benchmarking/benchmark.py'),
                                                  'tinytorch.benchmarking.benchmark.BenchmarkSuite': ( '19_benchmarking/benchmarking.html#benchmarksuite',
                                                                                                       'tinytorch/benchmarking/benchmark.py'),
                                                  'tinytorch.benchmarking.benchmark.BenchmarkSuite.__init__': ( '19_benchmarking/benchmarking.html#benchmarksuite.__init__',
@@ -89,10 +97,33 @@ d = { 'settings': { 'branch': 'main',
                                                                                                                          'tinytorch/benchmarking/benchmark.py'),
                                                  'tinytorch.benchmarking.benchmark.test_unit_benchmark': ( '19_benchmarking/benchmarking.html#test_unit_benchmark',
                                                                                                            'tinytorch/benchmarking/benchmark.py'),
+                                                  'tinytorch.benchmarking.benchmark.test_unit_benchmark_result': ( '19_benchmarking/benchmarking.html#test_unit_benchmark_result',
+                                                                                                                   'tinytorch/benchmarking/benchmark.py'),
                                                  'tinytorch.benchmarking.benchmark.test_unit_benchmark_suite': ( '19_benchmarking/benchmarking.html#test_unit_benchmark_suite',
                                                                                                                  'tinytorch/benchmarking/benchmark.py'),
                                                  'tinytorch.benchmarking.benchmark.test_unit_tinymlperf': ( '19_benchmarking/benchmarking.html#test_unit_tinymlperf',
                                                                                                             'tinytorch/benchmarking/benchmark.py')},
+            'tinytorch.capstone': { 'tinytorch.capstone.BenchmarkReport': ( '20_capstone/capstone.html#benchmarkreport',
+                                                                            'tinytorch/capstone.py'),
+                                    'tinytorch.capstone.BenchmarkReport.__init__': ( '20_capstone/capstone.html#benchmarkreport.__init__',
+                                                                                     'tinytorch/capstone.py'),
+                                    'tinytorch.capstone.BenchmarkReport._get_system_info': ( '20_capstone/capstone.html#benchmarkreport._get_system_info',
+                                                                                             'tinytorch/capstone.py'),
+                                    'tinytorch.capstone.BenchmarkReport.benchmark_model': ( '20_capstone/capstone.html#benchmarkreport.benchmark_model',
+                                                                                            'tinytorch/capstone.py'),
+                                    'tinytorch.capstone.SimpleMLP': ('20_capstone/capstone.html#simplemlp', 'tinytorch/capstone.py'),
+                                    'tinytorch.capstone.SimpleMLP.__init__': ( '20_capstone/capstone.html#simplemlp.__init__',
+                                                                               'tinytorch/capstone.py'),
+                                    'tinytorch.capstone.SimpleMLP.count_parameters': ( '20_capstone/capstone.html#simplemlp.count_parameters',
+                                                                                       'tinytorch/capstone.py'),
+                                    'tinytorch.capstone.SimpleMLP.forward': ( '20_capstone/capstone.html#simplemlp.forward',
+                                                                              'tinytorch/capstone.py'),
+                                    'tinytorch.capstone.SimpleMLP.parameters': ( '20_capstone/capstone.html#simplemlp.parameters',
+                                                                                 'tinytorch/capstone.py'),
+                                    'tinytorch.capstone.generate_submission': ( '20_capstone/capstone.html#generate_submission',
+                                                                                'tinytorch/capstone.py'),
+                                    'tinytorch.capstone.save_submission': ( '20_capstone/capstone.html#save_submission',
+                                                                            'tinytorch/capstone.py')},
            'tinytorch.competition.submit': { 'tinytorch.competition.submit.generate_baseline': ( 'source/20_competition/competition_dev.html#generate_baseline',
                                                                                                  'tinytorch/competition/submit.py'),
                                              'tinytorch.competition.submit.generate_submission': ( 'source/20_competition/competition_dev.html#generate_submission',
@@ -115,6 +146,8 @@ d = { 'settings': { 'branch': 'main',
                                                                                          'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.GELU.forward': ( '02_activations/activations.html#gelu.forward',
                                                                                         'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.GELU.parameters': ( '02_activations/activations.html#gelu.parameters',
+                                                                                            'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.ReLU': ( '02_activations/activations.html#relu',
                                                                                 'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.ReLU.__call__': ( '02_activations/activations.html#relu.__call__',
@@ -123,6 +156,8 @@ d = { 'settings': { 'branch': 'main',
                                                                                          'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.ReLU.forward': ( '02_activations/activations.html#relu.forward',
                                                                                         'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.ReLU.parameters': ( '02_activations/activations.html#relu.parameters',
+                                                                                            'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.Sigmoid': ( '02_activations/activations.html#sigmoid',
                                                                                    'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.Sigmoid.__call__': ( '02_activations/activations.html#sigmoid.__call__',
@@ -131,6 +166,8 @@ d = { 'settings': { 'branch': 'main',
                                                                                             'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.Sigmoid.forward': ( '02_activations/activations.html#sigmoid.forward',
                                                                                            'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Sigmoid.parameters': ( '02_activations/activations.html#sigmoid.parameters',
+                                                                                               'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.Softmax': ( '02_activations/activations.html#softmax',
                                                                                    'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.Softmax.__call__': ( '02_activations/activations.html#softmax.__call__',
@@ -139,6 +176,8 @@ d = { 'settings': { 'branch': 'main',
                                                                                             'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.Softmax.forward': ( '02_activations/activations.html#softmax.forward',
                                                                                            'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Softmax.parameters': ( '02_activations/activations.html#softmax.parameters',
+                                                                                               'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.Tanh': ( '02_activations/activations.html#tanh',
                                                                                 'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.Tanh.__call__': ( '02_activations/activations.html#tanh.__call__',
@@ -146,7 +185,9 @@ d = { 'settings': { 'branch': 'main',
                                            'tinytorch.core.activations.Tanh.backward': ( '02_activations/activations.html#tanh.backward',
                                                                                          'tinytorch/core/activations.py'),
                                            'tinytorch.core.activations.Tanh.forward': ( '02_activations/activations.html#tanh.forward',
-                                                                                         'tinytorch/core/activations.py')},
+                                                                                         'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Tanh.parameters': ( '02_activations/activations.html#tanh.parameters',
+                                                                                            'tinytorch/core/activations.py')},
            'tinytorch.core.attention': { 'tinytorch.core.attention.MultiHeadAttention': ( '12_attention/attention.html#multiheadattention',
                                                                                           'tinytorch/core/attention.py'),
                                          'tinytorch.core.attention.MultiHeadAttention.__call__': ( '12_attention/attention.html#multiheadattention.__call__',
@@ -264,6 +305,20 @@ d = { 'settings': { 'branch': 'main',
                                                                                      'tinytorch/core/spatial.py'),
                                        'tinytorch.core.spatial.AvgPool2d.parameters': ( '09_spatial/spatial.html#avgpool2d.parameters',
                                                                                         'tinytorch/core/spatial.py'),
+                                        'tinytorch.core.spatial.BatchNorm2d': ( '09_spatial/spatial.html#batchnorm2d',
+                                                                                'tinytorch/core/spatial.py'),
+                                        'tinytorch.core.spatial.BatchNorm2d.__call__': ( '09_spatial/spatial.html#batchnorm2d.__call__',
+                                                                                         'tinytorch/core/spatial.py'),
+                                        'tinytorch.core.spatial.BatchNorm2d.__init__': ( '09_spatial/spatial.html#batchnorm2d.__init__',
+                                                                                         'tinytorch/core/spatial.py'),
+                                        'tinytorch.core.spatial.BatchNorm2d.eval': ( '09_spatial/spatial.html#batchnorm2d.eval',
+                                                                                     'tinytorch/core/spatial.py'),
+                                        'tinytorch.core.spatial.BatchNorm2d.forward': ( '09_spatial/spatial.html#batchnorm2d.forward',
+                                                                                        'tinytorch/core/spatial.py'),
+                                        'tinytorch.core.spatial.BatchNorm2d.parameters': ( '09_spatial/spatial.html#batchnorm2d.parameters',
+                                                                                           'tinytorch/core/spatial.py'),
+                                        'tinytorch.core.spatial.BatchNorm2d.train': ( '09_spatial/spatial.html#batchnorm2d.train',
+                                                                                      'tinytorch/core/spatial.py'),
                                        'tinytorch.core.spatial.Conv2d': ('09_spatial/spatial.html#conv2d', 'tinytorch/core/spatial.py'),
                                        'tinytorch.core.spatial.Conv2d.__call__': ( '09_spatial/spatial.html#conv2d.__call__',
                                                                                    'tinytorch/core/spatial.py'),
@@ -367,8 +422,16 @@ d = { 'settings': { 'branch': 'main',
                                         'tinytorch.core.training.Trainer.save_checkpoint': ( '07_training/training.html#trainer.save_checkpoint',
                                                                                              'tinytorch/core/training.py'),
                                         'tinytorch.core.training.Trainer.train_epoch': ( '07_training/training.html#trainer.train_epoch',
-                                                                                          'tinytorch/core/training.py')},
-            'tinytorch.data.loader': { 'tinytorch.data.loader.DataLoader': ( '08_dataloader/dataloader.html#dataloader',
+                                                                                          'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.clip_grad_norm': ( '07_training/training.html#clip_grad_norm',
+                                                                                     'tinytorch/core/training.py')},
+            'tinytorch.data.loader': { 'tinytorch.data.loader.Compose': ( '08_dataloader/dataloader.html#compose',
+                                                                          'tinytorch/data/loader.py'),
+                                       'tinytorch.data.loader.Compose.__call__': ( '08_dataloader/dataloader.html#compose.__call__',
+                                                                                   'tinytorch/data/loader.py'),
+                                       'tinytorch.data.loader.Compose.__init__': ( '08_dataloader/dataloader.html#compose.__init__',
+                                                                                   'tinytorch/data/loader.py'),
+                                       'tinytorch.data.loader.DataLoader': ( '08_dataloader/dataloader.html#dataloader',
                                                                             'tinytorch/data/loader.py'),
                                       'tinytorch.data.loader.DataLoader.__init__': ( '08_dataloader/dataloader.html#dataloader.__init__',
                                                                                      'tinytorch/data/loader.py'),
@@ -384,6 +447,18 @@ d = { 'settings': { 'branch': 'main',
                                                                                      'tinytorch/data/loader.py'),
                                       'tinytorch.data.loader.Dataset.__len__': ( '08_dataloader/dataloader.html#dataset.__len__',
                                                                                  'tinytorch/data/loader.py'),
+                                       'tinytorch.data.loader.RandomCrop': ( '08_dataloader/dataloader.html#randomcrop',
+                                                                             'tinytorch/data/loader.py'),
+                                       'tinytorch.data.loader.RandomCrop.__call__': ( '08_dataloader/dataloader.html#randomcrop.__call__',
+                                                                                      'tinytorch/data/loader.py'),
+                                       'tinytorch.data.loader.RandomCrop.__init__': ( '08_dataloader/dataloader.html#randomcrop.__init__',
+                                                                                      'tinytorch/data/loader.py'),
+                                       'tinytorch.data.loader.RandomHorizontalFlip': ( '08_dataloader/dataloader.html#randomhorizontalflip',
+                                                                                       'tinytorch/data/loader.py'),
+                                       'tinytorch.data.loader.RandomHorizontalFlip.__call__': ( '08_dataloader/dataloader.html#randomhorizontalflip.__call__',
+                                                                                                'tinytorch/data/loader.py'),
+                                       'tinytorch.data.loader.RandomHorizontalFlip.__init__': ( '08_dataloader/dataloader.html#randomhorizontalflip.__init__',
+                                                                                                'tinytorch/data/loader.py'),
                                       'tinytorch.data.loader.TensorDataset': ( '08_dataloader/dataloader.html#tensordataset',
                                                                                'tinytorch/data/loader.py'),
                                       'tinytorch.data.loader.TensorDataset.__getitem__': ( '08_dataloader/dataloader.html#tensordataset.__getitem__',
@@ -406,6 +481,8 @@ d = { 'settings': { 'branch': 'main',
                                                                                                'tinytorch/generation/kv_cache.py'),
                                               'tinytorch.generation.kv_cache.KVCache.update': ( '17_memoization/memoization.html#kvcache.update',
                                                                                                 'tinytorch/generation/kv_cache.py'),
+                                               'tinytorch.generation.kv_cache.create_kv_cache': ( '17_memoization/memoization.html#create_kv_cache',
+                                                                                                  'tinytorch/generation/kv_cache.py'),
                                               'tinytorch.generation.kv_cache.disable_kv_cache': ( '17_memoization/memoization.html#disable_kv_cache',
                                                                                                   'tinytorch/generation/kv_cache.py'),
                                               'tinytorch.generation.kv_cache.enable_kv_cache': ( '17_memoization/memoization.html#enable_kv_cache',
@@ -454,7 +531,12 @@ d = { 'settings': { 'branch': 'main',
                                                                                                         'tinytorch/models/transformer.py'),
                                              'tinytorch.models.transformer.TransformerBlock.parameters': ( '13_transformers/transformers.html#transformerblock.parameters',
                                                                                                            'tinytorch/models/transformer.py')},
-            'tinytorch.optimization.acceleration': {},
+            'tinytorch.optimization.acceleration': { 'tinytorch.optimization.acceleration.fused_gelu': ( '18_acceleration/acceleration.html#fused_gelu',
+                                                                                                         'tinytorch/optimization/acceleration.py'),
+                                                     'tinytorch.optimization.acceleration.tiled_matmul': ( '18_acceleration/acceleration.html#tiled_matmul',
+                                                                                                           'tinytorch/optimization/acceleration.py'),
+                                                     'tinytorch.optimization.acceleration.vectorized_matmul': ( '18_acceleration/acceleration.html#vectorized_matmul',
+                                                                                                                'tinytorch/optimization/acceleration.py')},
            'tinytorch.optimization.compression': { 'tinytorch.optimization.compression.CompressionComplete': ( '16_compression/compression.html#compressioncomplete',
                                                                                                                'tinytorch/optimization/compression.py'),
                                                    'tinytorch.optimization.compression.CompressionComplete.compress_model': ( '16_compression/compression.html#compressioncomplete.compress_model',
@@ -479,6 +561,8 @@ d = { 'settings': { 'branch': 'main',
                                                                                                                                    'tinytorch/optimization/compression.py'),
                                                    'tinytorch.optimization.compression.compress_model': ( '16_compression/compression.html#compress_model',
                                                                                                           'tinytorch/optimization/compression.py'),
+                                                    'tinytorch.optimization.compression.low_rank_approximate': ( '16_compression/compression.html#low_rank_approximate',
+                                                                                                                 'tinytorch/optimization/compression.py'),
                                                    'tinytorch.optimization.compression.magnitude_prune': ( '16_compression/compression.html#magnitude_prune',
                                                                                                            'tinytorch/optimization/compression.py'),
                                                    'tinytorch.optimization.compression.measure_sparsity': ( '16_compression/compression.html#measure_sparsity',
@@ -515,6 +599,8 @@ d = { 'settings': { 'branch': 'main',
                                                                                                                   'tinytorch/optimization/quantization.py'),
                                                     'tinytorch.optimization.quantization.SimpleModel.forward': ( '15_quantization/quantization.html#simplemodel.forward',
                                                                                                                  'tinytorch/optimization/quantization.py'),
+                                                     'tinytorch.optimization.quantization.compare_model_sizes': ( '15_quantization/quantization.html#compare_model_sizes',
+                                                                                                                  'tinytorch/optimization/quantization.py'),
                                                     'tinytorch.optimization.quantization.dequantize_int8': ( '15_quantization/quantization.html#dequantize_int8',
                                                                                                              'tinytorch/optimization/quantization.py'),
                                                     'tinytorch.optimization.quantization.quantize_int8': ( '15_quantization/quantization.html#quantize_int8',
@@ -578,7 +664,9 @@ d = { 'settings': { 'branch': 'main',
                                           'tinytorch.text.embeddings.PositionalEncoding.forward': ( '11_embeddings/embeddings.html#positionalencoding.forward',
                                                                                                     'tinytorch/text/embeddings.py'),
                                           'tinytorch.text.embeddings.PositionalEncoding.parameters': ( '11_embeddings/embeddings.html#positionalencoding.parameters',
-                                                                                                        'tinytorch/text/embeddings.py')},
+                                                                                                        'tinytorch/text/embeddings.py'),
+                                           'tinytorch.text.embeddings.create_sinusoidal_embeddings': ( '11_embeddings/embeddings.html#create_sinusoidal_embeddings',
+                                                                                                       'tinytorch/text/embeddings.py')},
            'tinytorch.text.tokenization': { 'tinytorch.text.tokenization.BPETokenizer': ( '10_tokenization/tokenization.html#bpetokenizer',
                                                                                           'tinytorch/text/tokenization.py'),
                                             'tinytorch.text.tokenization.BPETokenizer.__init__': ( '10_tokenization/tokenization.html#bpetokenizer.__init__',
--- a/tinytorch/benchmarking/benchmark.py
+++ b/tinytorch/benchmarking/benchmark.py
@@ -15,14 +15,116 @@
 # ║     The tinytorch/ directory is generated code - edit source files instead!  ║
 # ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
-__all__ = ['DEFAULT_WARMUP_RUNS', 'DEFAULT_MEASUREMENT_RUNS', 'Benchmark', 'test_unit_benchmark', 'BenchmarkSuite',
-           'test_unit_benchmark_suite', 'TinyMLPerf', 'test_unit_tinymlperf']
+__all__ = ['DEFAULT_WARMUP_RUNS', 'DEFAULT_MEASUREMENT_RUNS', 'BenchmarkResult', 'test_unit_benchmark_result', 'Benchmark',
+           'test_unit_benchmark', 'BenchmarkSuite', 'test_unit_benchmark_suite', 'TinyMLPerf', 'test_unit_tinymlperf']

 # %% ../../modules/19_benchmarking/19_benchmarking.ipynb 0
 # Constants for benchmarking defaults
 DEFAULT_WARMUP_RUNS = 5  # Default warmup runs for JIT compilation and cache warming
 DEFAULT_MEASUREMENT_RUNS = 10  # Default measurement runs for statistical significance

+# %% ../../modules/19_benchmarking/19_benchmarking.ipynb 9
+@dataclass
+class BenchmarkResult:
+    """
+    Container for benchmark measurements with statistical analysis.
+
+    TODO: Implement a robust result container that stores measurements and metadata
+
+    APPROACH:
+    1. Store raw measurements and computed statistics
+    2. Include metadata about test conditions
+    3. Provide methods for statistical analysis
+    4. Support serialization for result persistence
+
+    EXAMPLE:
+    >>> result = BenchmarkResult("model_accuracy", [0.95, 0.94, 0.96])
+    >>> print(f"Mean: {result.mean:.3f} ± {result.std:.3f}")
+    Mean: 0.950 ± 0.010
+
+    HINTS:
+    - Use statistics module for robust mean/std calculations
+    - Store both raw data and summary statistics
+    - Include confidence intervals for professional reporting
+    """
+    ### BEGIN SOLUTION
+    metric_name: str
+    values: List[float]
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def __post_init__(self):
+        """Compute statistics after initialization."""
+        if not self.values:
+            raise ValueError(
+                "BenchmarkResult requires at least one measurement.\n"
+                "  Issue: Cannot compute statistics without any measurements.\n"
+                "  Fix: Ensure benchmark runs produce at least one measurement before creating BenchmarkResult."
+            )
+
+        self.mean = statistics.mean(self.values)
+        self.std = statistics.stdev(self.values) if len(self.values) > 1 else 0.0
+        self.median = statistics.median(self.values)
+        self.min_val = min(self.values)
+        self.max_val = max(self.values)
+        self.count = len(self.values)
+
+        # 95% confidence interval for the mean
+        if len(self.values) > 1:
+            t_score = 1.96  # Approximate for large samples
+            margin_error = t_score * (self.std / np.sqrt(self.count))
+            self.ci_lower = self.mean - margin_error
+            self.ci_upper = self.mean + margin_error
+        else:
+            self.ci_lower = self.ci_upper = self.mean
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            'metric_name': self.metric_name,
+            'values': self.values,
+            'mean': self.mean,
+            'std': self.std,
+            'median': self.median,
+            'min': self.min_val,
+            'max': self.max_val,
+            'count': self.count,
+            'ci_lower': self.ci_lower,
+            'ci_upper': self.ci_upper,
+            'metadata': self.metadata
+        }
+
+    def __str__(self) -> str:
+        return f"{self.metric_name}: {self.mean:.4f} ± {self.std:.4f} (n={self.count})"
+    ### END SOLUTION
+
+def test_unit_benchmark_result():
+    """🔬 Test BenchmarkResult statistical calculations."""
+    print("🔬 Unit Test: BenchmarkResult...")
+
+    # Test basic statistics
+    values = [1.0, 2.0, 3.0, 4.0, 5.0]
+    result = BenchmarkResult("test_metric", values)
+
+    assert result.mean == 3.0
+    assert abs(result.std - statistics.stdev(values)) < 1e-10
+    assert result.median == 3.0
+    assert result.min_val == 1.0
+    assert result.max_val == 5.0
+    assert result.count == 5
+
+    # Test confidence intervals
+    assert result.ci_lower < result.mean < result.ci_upper
+
+    # Test serialization
+    result_dict = result.to_dict()
+    assert result_dict['metric_name'] == "test_metric"
+    assert result_dict['mean'] == 3.0
+
+    print("✅ BenchmarkResult works correctly!")
+
+if __name__ == "__main__":
+    test_unit_benchmark_result()
+
 # %% ../../modules/19_benchmarking/19_benchmarking.ipynb 13
 class Benchmark:
    """
--- a/tinytorch/core/attention.py
+++ b/tinytorch/core/attention.py
@@ -293,11 +293,10 @@ class MultiHeadAttention:
        mask_reshaped = mask
        if mask is not None and len(mask.shape) == 3:
             # Add head dimension: (batch, seq, seq) -> (batch, 1, seq, seq)
-             # Note: Tensor.reshape doesn't support adding dims easily without full shape
-             # But we can use numpy reshape on data and wrap in Tensor?
-             # Or just rely on broadcasting if mask is 2D?
-             # In the proof script, mask is None, so this is fine.
-             pass
+             # This allows the mask to broadcast across all attention heads
+             batch_size_mask, seq_len_mask, _ = mask.shape
+             mask_data = mask.data.reshape(batch_size_mask, 1, seq_len_mask, seq_len_mask)
+             mask_reshaped = Tensor(mask_data, requires_grad=False)

        attended, _ = scaled_dot_product_attention(Q, K, V, mask=mask_reshaped)

--- a/tinytorch/core/autograd.py
+++ b/tinytorch/core/autograd.py
@@ -446,6 +446,7 @@ class EmbeddingBackward(Function):

        return (grad_weight,)

+#| export

 class SliceBackward(Function):
    """
@@ -1298,6 +1299,6 @@ def enable_autograd(quiet=False):
        print("   - requires_grad=True enables tracking")

 # Auto-enable when module is imported
-# Check TINYTORCH_QUIET env var to suppress messages (for CLI tools)
+# Always quiet to avoid cluttering user imports
 import os
-enable_autograd(quiet=os.environ.get('TINYTORCH_QUIET', '').lower() in ('1', 'true', 'yes'))
+enable_autograd(quiet=True)
--- a/tinytorch/core/layers.py
+++ b/tinytorch/core/layers.py
@@ -15,7 +15,7 @@
 # ║     The tinytorch/ directory is generated code - edit source files instead!  ║
 # ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
-__all__ = ['XAVIER_SCALE_FACTOR', 'HE_SCALE_FACTOR', 'DROPOUT_MIN_PROB', 'DROPOUT_MAX_PROB', 'Layer', 'Linear', 'Dropout']
+__all__ = ['XAVIER_SCALE_FACTOR', 'HE_SCALE_FACTOR', 'DROPOUT_MIN_PROB', 'DROPOUT_MAX_PROB', 'Layer', 'Linear', 'Dense', 'Dropout']

 # %% ../../modules/03_layers/03_layers.ipynb 1
 import numpy as np
@@ -273,3 +273,7 @@ class Dropout(Layer):

    def __repr__(self):
        return f"Dropout(p={self.p})"
+
+# Alias for compatibility - Dense is the same as Linear
+# Some frameworks use Dense, some use Linear - they're identical
+Dense = Linear
--- a/tinytorch/core/optimizers.py
+++ b/tinytorch/core/optimizers.py
@@ -240,9 +240,14 @@ class SGD(Optimizer):
            if param.grad is None:
                continue

-            # Get gradient data (grad is a Tensor from Module 01)
+            # Get gradient data - grad can be Tensor or numpy array
            grad = param.grad
-            grad_data = grad.data
+            # Handle both Tensor (with .data) and numpy array (from autograd) cases
+            if isinstance(grad, Tensor):
+                grad_data = grad.data
+            else:
+                # grad is already a numpy array from autograd
+                grad_data = grad

            # Apply weight decay
            if self.weight_decay != 0:
@@ -342,9 +347,14 @@ class Adam(Optimizer):
            if param.grad is None:
                continue

-            # Get gradient data (grad is a Tensor from Module 01)
+            # Get gradient data - grad can be Tensor or numpy array
            grad = param.grad
-            grad_data = grad.data
+            # Handle both Tensor (with .data) and numpy array (from autograd) cases
+            if isinstance(grad, Tensor):
+                grad_data = grad.data
+            else:
+                # grad is already a numpy array from autograd
+                grad_data = grad

            # Apply weight decay
            if self.weight_decay != 0:
@@ -446,9 +456,14 @@ class AdamW(Optimizer):
            if param.grad is None:
                continue

-            # Get gradient data (NOT modified by weight decay)
+            # Get gradient data - grad can be Tensor or numpy array
            grad = param.grad
-            grad_data = grad.data
+            # Handle both Tensor (with .data) and numpy array (from autograd) cases
+            if isinstance(grad, Tensor):
+                grad_data = grad.data
+            else:
+                # grad is already a numpy array from autograd
+                grad_data = grad

            # Initialize buffers if needed
            if self.m_buffers[i] is None:
--- a/tinytorch/core/spatial.py
+++ b/tinytorch/core/spatial.py
@@ -16,7 +16,7 @@
 # ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
 __all__ = ['DEFAULT_KERNEL_SIZE', 'DEFAULT_STRIDE', 'DEFAULT_PADDING', 'Conv2dBackward', 'Conv2d', 'MaxPool2dBackward',
-           'MaxPool2d', 'AvgPool2d', 'SimpleCNN']
+           'MaxPool2d', 'AvgPool2d', 'BatchNorm2d', 'SimpleCNN']

 # %% ../../modules/09_spatial/09_spatial.ipynb 1
 import numpy as np
@@ -133,6 +133,7 @@ class Conv2dBackward(Function):
        # Following TinyTorch protocol: return (grad_input, grad_weight, grad_bias)
        return grad_input, grad_weight, grad_bias

+#| export

 class Conv2d:
    """
@@ -392,6 +393,7 @@ class MaxPool2dBackward(Function):
        # Return as tuple (following Function protocol)
        return (grad_input,)

+#| export

 class MaxPool2d:
    """
@@ -662,7 +664,160 @@ class AvgPool2d:
        """Enable model(x) syntax."""
        return self.forward(x)

-# %% ../../modules/09_spatial/09_spatial.ipynb 21
+# %% ../../modules/09_spatial/09_spatial.ipynb 15
+class BatchNorm2d:
+    """
+    Batch Normalization for 2D spatial inputs (images).
+    
+    Normalizes activations across batch and spatial dimensions for each channel,
+    then applies learnable scale (gamma) and shift (beta) parameters.
+    
+    Key behaviors:
+    - Training: Uses batch statistics, updates running statistics
+    - Eval: Uses frozen running statistics for consistent inference
+    
+    Args:
+        num_features: Number of channels (C in NCHW format)
+        eps: Small constant for numerical stability (default: 1e-5)
+        momentum: Momentum for running statistics update (default: 0.1)
+    """
+    
+    def __init__(self, num_features, eps=1e-5, momentum=0.1):
+        """
+        Initialize BatchNorm2d layer.
+        
+        TODO: Initialize learnable and running parameters
+        
+        APPROACH:
+        1. Store hyperparameters (num_features, eps, momentum)
+        2. Initialize gamma (scale) to ones - identity at start
+        3. Initialize beta (shift) to zeros - no shift at start  
+        4. Initialize running_mean to zeros
+        5. Initialize running_var to ones
+        6. Set training mode to True initially
+        
+        EXAMPLE:
+        >>> bn = BatchNorm2d(64)  # For 64-channel feature maps
+        >>> print(bn.gamma.shape)  # (64,)
+        >>> print(bn.training)     # True
+        """
+        super().__init__()
+        
+        ### BEGIN SOLUTION
+        self.num_features = num_features
+        self.eps = eps
+        self.momentum = momentum
+        
+        # Learnable parameters (requires_grad=True for training)
+        # gamma (scale): initialized to 1 so output = normalized input initially
+        self.gamma = Tensor(np.ones(num_features), requires_grad=True)
+        # beta (shift): initialized to 0 so no shift initially  
+        self.beta = Tensor(np.zeros(num_features), requires_grad=True)
+        
+        # Running statistics (not trained, accumulated during training)
+        # These are used during evaluation for consistent normalization
+        self.running_mean = np.zeros(num_features)
+        self.running_var = np.ones(num_features)
+        
+        # Training mode flag
+        self.training = True
+        ### END SOLUTION
+    
+    def train(self):
+        """Set layer to training mode."""
+        self.training = True
+        return self
+    
+    def eval(self):
+        """Set layer to evaluation mode."""
+        self.training = False
+        return self
+    
+    def forward(self, x):
+        """
+        Forward pass through BatchNorm2d.
+        
+        TODO: Implement batch normalization forward pass
+        
+        APPROACH:
+        1. Validate input shape (must be 4D: batch, channels, height, width)
+        2. If training:
+           a. Compute batch mean and variance per channel
+           b. Normalize using batch statistics
+           c. Update running statistics with momentum
+        3. If eval:
+           a. Use running mean and variance
+           b. Normalize using frozen statistics
+        4. Apply scale (gamma) and shift (beta)
+        
+        EXAMPLE:
+        >>> bn = BatchNorm2d(16)
+        >>> x = Tensor(np.random.randn(2, 16, 8, 8))  # batch=2, channels=16, 8x8
+        >>> y = bn(x)
+        >>> print(y.shape)  # (2, 16, 8, 8) - same shape
+        
+        HINTS:
+        - Compute mean/var over axes (0, 2, 3) to get per-channel statistics
+        - Reshape gamma/beta to (1, C, 1, 1) for broadcasting
+        - Running stat update: running = (1 - momentum) * running + momentum * batch
+        """
+        ### BEGIN SOLUTION
+        # Input validation
+        if len(x.shape) != 4:
+            raise ValueError(f"Expected 4D input (batch, channels, height, width), got {x.shape}")
+        
+        batch_size, channels, height, width = x.shape
+        
+        if channels != self.num_features:
+            raise ValueError(f"Expected {self.num_features} channels, got {channels}")
+        
+        if self.training:
+            # Compute batch statistics per channel
+            # Mean over batch and spatial dimensions: axes (0, 2, 3)
+            batch_mean = np.mean(x.data, axis=(0, 2, 3))  # Shape: (C,)
+            batch_var = np.var(x.data, axis=(0, 2, 3))    # Shape: (C,)
+            
+            # Update running statistics (exponential moving average)
+            self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * batch_mean
+            self.running_var = (1 - self.momentum) * self.running_var + self.momentum * batch_var
+            
+            # Use batch statistics for normalization
+            mean = batch_mean
+            var = batch_var
+        else:
+            # Use running statistics (frozen during eval)
+            mean = self.running_mean
+            var = self.running_var
+        
+        # Normalize: (x - mean) / sqrt(var + eps)
+        # Reshape mean and var for broadcasting: (C,) -> (1, C, 1, 1)
+        mean_reshaped = mean.reshape(1, channels, 1, 1)
+        var_reshaped = var.reshape(1, channels, 1, 1)
+        
+        x_normalized = (x.data - mean_reshaped) / np.sqrt(var_reshaped + self.eps)
+        
+        # Apply scale (gamma) and shift (beta)
+        # Reshape for broadcasting: (C,) -> (1, C, 1, 1)
+        gamma_reshaped = self.gamma.data.reshape(1, channels, 1, 1)
+        beta_reshaped = self.beta.data.reshape(1, channels, 1, 1)
+        
+        output = gamma_reshaped * x_normalized + beta_reshaped
+        
+        # Return Tensor with gradient tracking
+        result = Tensor(output, requires_grad=x.requires_grad or self.gamma.requires_grad)
+        
+        return result
+        ### END SOLUTION
+    
+    def parameters(self):
+        """Return learnable parameters (gamma and beta)."""
+        return [self.gamma, self.beta]
+    
+    def __call__(self, x):
+        """Enable model(x) syntax."""
+        return self.forward(x)
+
+# %% ../../modules/09_spatial/09_spatial.ipynb 25
 class SimpleCNN:
    """
    Simple CNN demonstrating spatial operations integration.
--- a/tinytorch/core/tensor.py
+++ b/tinytorch/core/tensor.py
@@ -146,8 +146,9 @@ class Tensor:
            new_shape[unknown_idx] = unknown_dim
            new_shape = tuple(new_shape)
        if np.prod(new_shape) != self.size:
+            target_size = int(np.prod(new_shape))
            raise ValueError(
-                f"Cannot reshape tensor of size {self.size} to shape {new_shape}"
+                f"Total elements must match: {self.size} ≠ {target_size}"
            )
        reshaped_data = np.reshape(self.data, new_shape)
        result = Tensor(reshaped_data, requires_grad=self.requires_grad)
--- a/tinytorch/core/training.py
+++ b/tinytorch/core/training.py
@@ -15,7 +15,7 @@
 # ║     The tinytorch/ directory is generated code - edit source files instead!  ║
 # ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
-__all__ = ['DEFAULT_MAX_LR', 'DEFAULT_MIN_LR', 'DEFAULT_TOTAL_EPOCHS', 'CosineSchedule', 'Trainer']
+__all__ = ['DEFAULT_MAX_LR', 'DEFAULT_MIN_LR', 'DEFAULT_TOTAL_EPOCHS', 'CosineSchedule', 'clip_grad_norm', 'Trainer']

 # %% ../../modules/07_training/07_training.ipynb 1
 import numpy as np
@@ -76,6 +76,67 @@ class CosineSchedule:
        return self.min_lr + (self.max_lr - self.min_lr) * cosine_factor
    ### END SOLUTION

+# %% ../../modules/07_training/07_training.ipynb 10
+def clip_grad_norm(parameters: List, max_norm: float = 1.0) -> float:
+    """
+    Clip gradients by global norm to prevent exploding gradients.
+
+    This is crucial for training stability, especially with RNNs and deep networks.
+    Instead of clipping each gradient individually, we compute the global norm
+    across all parameters and scale uniformly if needed.
+
+    TODO: Implement gradient clipping by global norm
+
+    APPROACH:
+    1. Compute total norm: sqrt(sum of squared gradients across all parameters)
+    2. If total_norm > max_norm, compute clip_coef = max_norm / total_norm
+    3. Scale all gradients by clip_coef: grad *= clip_coef
+    4. Return the original norm for monitoring
+
+    EXAMPLE:
+    >>> params = [Tensor([1, 2, 3], requires_grad=True)]
+    >>> params[0].grad = Tensor([10, 20, 30])  # Large gradients
+    >>> original_norm = clip_grad_norm(params, max_norm=1.0)
+    >>> print(f"Clipped norm: {np.linalg.norm(params[0].grad.data):.2f}")  # Should be ≤ 1.0
+
+    HINTS:
+    - Use np.linalg.norm() to compute norms
+    - Only clip if total_norm > max_norm
+    - Modify gradients in-place for efficiency
+    """
+    ### BEGIN SOLUTION
+    if not parameters:
+        return 0.0
+
+    # Collect all gradients and compute global norm
+    total_norm = 0.0
+    for param in parameters:
+        if param.grad is not None:
+            # Handle both Tensor gradients and numpy array gradients
+            if isinstance(param.grad, np.ndarray):
+                grad_data = param.grad
+            else:
+                # Trust that Tensor has .data attribute
+                grad_data = param.grad.data
+            total_norm += np.sum(grad_data ** 2)
+
+    total_norm = np.sqrt(total_norm)
+
+    # Clip if necessary
+    if total_norm > max_norm:
+        clip_coef = max_norm / total_norm
+        for param in parameters:
+            if param.grad is not None:
+                # Handle both Tensor gradients and numpy array gradients
+                if isinstance(param.grad, np.ndarray):
+                    param.grad = param.grad * clip_coef
+                else:
+                    # Trust that Tensor has .data attribute
+                    param.grad.data = param.grad.data * clip_coef
+
+    return float(total_norm)
+    ### END SOLUTION
+
 # %% ../../modules/07_training/07_training.ipynb 14
 class Trainer:
    """
--- a/tinytorch/data/loader.py
+++ b/tinytorch/data/loader.py
@@ -15,7 +15,7 @@
 # ║     The tinytorch/ directory is generated code - edit source files instead!  ║
 # ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
-__all__ = ['Dataset', 'TensorDataset', 'DataLoader']
+__all__ = ['Dataset', 'TensorDataset', 'DataLoader', 'RandomHorizontalFlip', 'RandomCrop', 'Compose']

 # %% ../../modules/08_dataloader/08_dataloader.ipynb 0
 #| default_exp data.loader
@@ -262,3 +262,210 @@ class DataLoader:

        return tuple(batched_tensors)
        ### END SOLUTION
+
+# %% ../../modules/08_dataloader/08_dataloader.ipynb 12
+class RandomHorizontalFlip:
+    """
+    Randomly flip images horizontally with given probability.
+    
+    A simple but effective augmentation for most image datasets.
+    Flipping is appropriate when horizontal orientation doesn't change class
+    (cats, dogs, cars - not digits or text!).
+    
+    Args:
+        p: Probability of flipping (default: 0.5)
+    """
+    
+    def __init__(self, p=0.5):
+        """
+        Initialize RandomHorizontalFlip.
+        
+        TODO: Store flip probability
+        
+        EXAMPLE:
+        >>> flip = RandomHorizontalFlip(p=0.5)  # 50% chance to flip
+        """
+        ### BEGIN SOLUTION
+        if not 0.0 <= p <= 1.0:
+            raise ValueError(f"Probability must be between 0 and 1, got {p}")
+        self.p = p
+        ### END SOLUTION
+    
+    def __call__(self, x):
+        """
+        Apply random horizontal flip to input.
+        
+        TODO: Implement random horizontal flip
+        
+        APPROACH:
+        1. Generate random number in [0, 1)
+        2. If random < p, flip horizontally
+        3. Otherwise, return unchanged
+        
+        Args:
+            x: Input array with shape (..., H, W) or (..., H, W, C)
+               Flips along the last-1 axis (width dimension)
+        
+        Returns:
+            Flipped or unchanged array (same shape as input)
+        
+        EXAMPLE:
+        >>> flip = RandomHorizontalFlip(0.5)
+        >>> img = np.array([[1, 2, 3], [4, 5, 6]])  # 2x3 image
+        >>> # 50% chance output is [[3, 2, 1], [6, 5, 4]]
+        
+        HINT: Use np.flip(x, axis=-1) to flip along width axis
+        """
+        ### BEGIN SOLUTION
+        if np.random.random() < self.p:
+            # Flip along the width axis (last axis for HW format, second-to-last for HWC)
+            # Using axis=-1 works for both (..., H, W) and (..., H, W, C)
+            if isinstance(x, Tensor):
+                return Tensor(np.flip(x.data, axis=-1).copy())
+            else:
+                return np.flip(x, axis=-1).copy()
+        return x
+        ### END SOLUTION
+
+#| export
+
+class RandomCrop:
+    """
+    Randomly crop image after padding.
+    
+    This is the standard augmentation for CIFAR-10:
+    1. Pad image by `padding` pixels on each side
+    2. Randomly crop back to original size
+    
+    This simulates small translations in the image, forcing the model
+    to recognize objects regardless of their exact position.
+    
+    Args:
+        size: Output crop size (int for square, or tuple (H, W))
+        padding: Pixels to pad on each side before cropping (default: 4)
+    """
+    
+    def __init__(self, size, padding=4):
+        """
+        Initialize RandomCrop.
+        
+        TODO: Store crop parameters
+        
+        EXAMPLE:
+        >>> crop = RandomCrop(32, padding=4)  # CIFAR-10 standard
+        >>> # Pads to 40x40, then crops back to 32x32
+        """
+        ### BEGIN SOLUTION
+        if isinstance(size, int):
+            self.size = (size, size)
+        else:
+            self.size = size
+        self.padding = padding
+        ### END SOLUTION
+    
+    def __call__(self, x):
+        """
+        Apply random crop after padding.
+        
+        TODO: Implement random crop with padding
+        
+        APPROACH:
+        1. Add zero-padding to all sides
+        2. Choose random top-left corner for crop
+        3. Extract crop of target size
+        
+        Args:
+            x: Input image with shape (C, H, W) or (H, W) or (H, W, C)
+               Assumes spatial dimensions are H, W
+        
+        Returns:
+            Cropped image with target size
+        
+        EXAMPLE:
+        >>> crop = RandomCrop(32, padding=4)
+        >>> img = np.random.randn(3, 32, 32)  # CIFAR-10 format (C, H, W)
+        >>> out = crop(img)
+        >>> print(out.shape)  # (3, 32, 32)
+        
+        HINTS:
+        - Use np.pad for adding zeros
+        - Handle both (C, H, W) and (H, W) formats
+        - Random offsets should be in [0, 2*padding]
+        """
+        ### BEGIN SOLUTION
+        is_tensor = isinstance(x, Tensor)
+        data = x.data if is_tensor else x
+        
+        target_h, target_w = self.size
+        
+        # Determine image format and dimensions
+        if len(data.shape) == 2:
+            # (H, W) format
+            h, w = data.shape
+            padded = np.pad(data, self.padding, mode='constant', constant_values=0)
+            
+            # Random crop position
+            top = np.random.randint(0, 2 * self.padding + h - target_h + 1)
+            left = np.random.randint(0, 2 * self.padding + w - target_w + 1)
+            
+            cropped = padded[top:top + target_h, left:left + target_w]
+            
+        elif len(data.shape) == 3:
+            if data.shape[0] <= 4:  # Likely (C, H, W) format
+                c, h, w = data.shape
+                # Pad only spatial dimensions
+                padded = np.pad(data, 
+                              ((0, 0), (self.padding, self.padding), (self.padding, self.padding)),
+                              mode='constant', constant_values=0)
+                
+                # Random crop position
+                top = np.random.randint(0, 2 * self.padding + 1)
+                left = np.random.randint(0, 2 * self.padding + 1)
+                
+                cropped = padded[:, top:top + target_h, left:left + target_w]
+            else:  # Likely (H, W, C) format
+                h, w, c = data.shape
+                padded = np.pad(data,
+                              ((self.padding, self.padding), (self.padding, self.padding), (0, 0)),
+                              mode='constant', constant_values=0)
+                
+                top = np.random.randint(0, 2 * self.padding + 1)
+                left = np.random.randint(0, 2 * self.padding + 1)
+                
+                cropped = padded[top:top + target_h, left:left + target_w, :]
+        else:
+            raise ValueError(f"Expected 2D or 3D input, got shape {data.shape}")
+        
+        return Tensor(cropped) if is_tensor else cropped
+        ### END SOLUTION
+
+#| export
+
+class Compose:
+    """
+    Compose multiple transforms into a pipeline.
+    
+    Applies transforms in sequence, passing output of each
+    as input to the next.
+    
+    Args:
+        transforms: List of transform callables
+    """
+    
+    def __init__(self, transforms):
+        """
+        Initialize Compose with list of transforms.
+        
+        EXAMPLE:
+        >>> transforms = Compose([
+        ...     RandomHorizontalFlip(0.5),
+        ...     RandomCrop(32, padding=4)
+        ... ])
+        """
+        self.transforms = transforms
+    
+    def __call__(self, x):
+        """Apply all transforms in sequence."""
+        for transform in self.transforms:
+            x = transform(x)
+        return x
--- a/tinytorch/generation/kv_cache.py
+++ b/tinytorch/generation/kv_cache.py
@@ -15,7 +15,7 @@
 # ║     The tinytorch/ directory is generated code - edit source files instead!  ║
 # ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
-__all__ = ['BYTES_PER_FLOAT32', 'MB_TO_BYTES', 'KVCache', 'enable_kv_cache', 'disable_kv_cache']
+__all__ = ['BYTES_PER_FLOAT32', 'MB_TO_BYTES', 'KVCache', 'create_kv_cache', 'enable_kv_cache', 'disable_kv_cache']

 # %% ../../modules/17_memoization/17_memoization.ipynb 1
 import numpy as np
@@ -303,11 +303,11 @@ class KVCache:
        }

 # %% ../../modules/17_memoization/17_memoization.ipynb 11
-def enable_kv_cache(batch_size: int, max_seq_len: int, num_layers: int,
+def create_kv_cache(batch_size: int, max_seq_len: int, num_layers: int,
                    num_heads: int, head_dim: int) -> KVCache:
    """
    Create and return a KVCache instance for model generation.
-    
+
    This function creates a properly sized cache for the model architecture.
    Call this before starting generation, then pass the cache to your
    generation loop.
--- a/tinytorch/optimization/acceleration.py
+++ b/tinytorch/optimization/acceleration.py
@@ -15,8 +15,208 @@
 # ║     The tinytorch/ directory is generated code - edit source files instead!  ║
 # ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
-__all__ = []
+__all__ = ['vectorized_matmul', 'fused_gelu', 'tiled_matmul']

 # %% ../../modules/18_acceleration/18_acceleration.ipynb 0
 #| default_exp optimization.acceleration
 #| export
+
+# %% ../../modules/18_acceleration/18_acceleration.ipynb 7
+def vectorized_matmul(a: Tensor, b: Tensor) -> Tensor:
+    """
+    High-performance matrix multiplication using vectorized operations.
+
+    This implementation leverages optimized BLAS libraries that use:
+    - SIMD instructions for parallel computation
+    - Cache-blocking for memory efficiency
+    - Multi-threading for CPU parallelization
+
+    TODO: Implement production-grade matrix multiplication
+
+    APPROACH:
+    1. Validate shapes are compatible for matrix multiplication
+    2. Use NumPy's optimized dot product (calls BLAS GEMM)
+    3. Return result wrapped in Tensor
+
+    Args:
+        a: First tensor for multiplication (M×K or batch×M×K)
+        b: Second tensor for multiplication (K×N or batch×K×N)
+
+    Returns:
+        Result tensor of shape (M×N or batch×M×N)
+
+    EXAMPLE:
+    Matrix multiplication visualization:
+    >>> a = Tensor([[1, 2], [3, 4]])  # 2×2
+    >>> b = Tensor([[5, 6], [7, 8]])  # 2×2
+    >>> result = vectorized_matmul(a, b)
+    >>> print(result.data)
+    [[19 22]    # [1×5+2×7, 1×6+2×8] = [19, 22]
+     [43 50]]   # [3×5+4×7, 3×6+4×8] = [43, 50]
+
+    PERFORMANCE CHARACTERISTICS:
+    - Time Complexity: O(N³) but highly optimized
+    - Space Complexity: O(N²) for result
+    - Arithmetic Intensity: 2N³ FLOPs / 3N² bytes = 2N/3 (good for large N)
+
+    HINTS:
+    - Check a.shape[-1] == b.shape[-2] for inner dimension match
+    - Use np.matmul() for batch support and optimization
+    - Trust BLAS to handle the vectorization magic
+    """
+    ### BEGIN SOLUTION
+    # Input validation for matrix multiplication
+    if len(a.shape) < 2 or len(b.shape) < 2:
+        raise ValueError(
+            f"Matrix multiplication requires 2D+ tensors, got shapes {a.shape} and {b.shape}. "
+            f"💡 HINT: Use reshape() to add dimensions if needed."
+        )
+
+    if a.shape[-1] != b.shape[-2]:
+        raise ValueError(
+            f"Matrix multiplication shape mismatch: {a.shape} @ {b.shape}. "
+            f"Inner dimensions must match: a.shape[-1]={a.shape[-1]} != b.shape[-2]={b.shape[-2]}. "
+            f"💡 HINT: For A@B, A's columns must equal B's rows."
+        )
+
+    # Use NumPy's highly optimized matrix multiplication
+    # This calls BLAS GEMM (General Matrix Multiply), which uses:
+    # - SIMD vectorization for parallel arithmetic
+    # - Cache blocking for memory efficiency
+    # - Multi-threading on multi-core systems
+    result_data = np.matmul(a.data, b.data)
+
+    return Tensor(result_data)
+    ### END SOLUTION
+
+# %% ../../modules/18_acceleration/18_acceleration.ipynb 10
+def fused_gelu(x: Tensor) -> Tensor:
+    """
+    Fused GELU activation that combines all operations in a single kernel.
+
+    GELU combines the benefits of ReLU and sigmoid:
+    - Smooth everywhere (unlike ReLU's discontinuity at 0)
+    - Non-saturating for positive values (unlike sigmoid)
+    - Probabilistic interpretation: x * P(X ≤ x) where X ~ N(0,1)
+
+    Mathematical Definition:
+    GELU(x) = x * Φ(x) where Φ(x) is the standard normal CDF
+
+    Fast Approximation (used here):
+    GELU(x) ≈ 0.5 * x * (1 + tanh(√(2/π) * (x + 0.044715 * x³)))
+
+    TODO: Implement fused GELU to minimize memory bandwidth
+
+    APPROACH:
+    1. Compute all intermediate values in a single expression
+    2. Avoid creating temporary arrays
+    3. Let NumPy's broadcasting handle vectorization
+
+    Args:
+        x: Input tensor to apply GELU activation
+
+    Returns:
+        GELU-activated tensor (same shape as input)
+
+    EXAMPLE:
+    >>> x = Tensor([-2, -1, 0, 1, 2])
+    >>> result = fused_gelu(x)
+    >>> print(result.data)
+    [-0.04550026 -0.15865526  0.          0.8413447   1.9544997 ]
+    # Notice: smooth transition through 0, positive bias
+
+    MEMORY EFFICIENCY:
+    - Unfused: 5 temporary arrays × input_size × 4 bytes
+    - Fused: 0 temporary arrays, direct computation
+    - Bandwidth reduction: ~80% for memory-bound operations
+
+    HINTS:
+    - Use np.sqrt(2.0 / np.pi) for the constant
+    - Keep entire expression in one line for maximum fusion
+    - NumPy will optimize the expression tree automatically
+    """
+    ### BEGIN SOLUTION
+    # Mathematical constant for GELU approximation
+    sqrt_2_over_pi = np.sqrt(2.0 / np.pi)
+
+    # Fused GELU computation - all operations in single expression
+    # This minimizes memory bandwidth by avoiding intermediate arrays
+    # NumPy's expression evaluator will optimize this into efficient machine code
+    result_data = 0.5 * x.data * (
+        1.0 + np.tanh(sqrt_2_over_pi * (x.data + 0.044715 * x.data**3))
+    )
+
+    return Tensor(result_data)
+    ### END SOLUTION
+
+# %% ../../modules/18_acceleration/18_acceleration.ipynb 16
+def tiled_matmul(a: Tensor, b: Tensor, tile_size: int = 64) -> Tensor:
+    """
+    Cache-aware matrix multiplication using tiling/blocking.
+
+    Demonstrates blocking algorithm for cache optimization by breaking
+    large matrix multiplications into cache-sized chunks.
+
+    TODO: Implement cache-aware tiled matrix multiplication
+
+    APPROACH:
+    1. Validate inputs for matrix multiplication compatibility
+    2. Use NumPy's optimized matmul (which already implements tiling internally)
+    3. In production, explicit tiling would use nested loops over blocks
+
+    Args:
+        a: First matrix (M×K)
+        b: Second matrix (K×N)
+        tile_size: Block size for cache efficiency (default: 64)
+
+    Returns:
+        Result matrix (M×N)
+
+    EXAMPLE:
+    >>> a = Tensor(np.random.randn(256, 256))
+    >>> b = Tensor(np.random.randn(256, 256))
+    >>> result = tiled_matmul(a, b, tile_size=64)
+    >>> # Same result as vectorized_matmul, but more cache-friendly for large matrices
+
+    PERFORMANCE CHARACTERISTICS:
+    - Reduces cache misses by working on blocks that fit in L1/L2
+    - Especially beneficial for matrices larger than cache size
+    - tile_size should match cache line size (typically 64 bytes)
+
+    HINTS:
+    - For educational purposes, we use NumPy's optimized BLAS
+    - BLAS libraries (MKL, OpenBLAS) already implement cache blocking
+    - Explicit tiling would use 6 nested loops (3 for tiles, 3 for elements)
+    """
+    ### BEGIN SOLUTION
+    # Input validation
+    if len(a.shape) < 2 or len(b.shape) < 2:
+        raise ValueError(
+            f"Tiled matmul requires 2D+ tensors, got shapes {a.shape} and {b.shape}. "
+            f"💡 HINT: Tiling works on matrix operations."
+        )
+
+    if a.shape[-1] != b.shape[-2]:
+        raise ValueError(
+            f"Shape mismatch: {a.shape} @ {b.shape}. "
+            f"Inner dimensions must match for matrix multiplication. "
+            f"💡 HINT: a.shape[-1]={a.shape[-1]} != b.shape[-2]={b.shape[-2]}"
+        )
+
+    # For educational purposes, we use NumPy's matmul which already
+    # implements cache-aware tiling via BLAS libraries (MKL, OpenBLAS)
+    # These libraries automatically partition large matrices into
+    # cache-sized blocks for optimal performance
+
+    # In a full educational implementation, you would write:
+    # for i_tile in range(0, M, tile_size):
+    #     for j_tile in range(0, N, tile_size):
+    #         for k_tile in range(0, K, tile_size):
+    #             # Multiply tile blocks that fit in cache
+    #             C[i_tile:i_tile+tile_size, j_tile:j_tile+tile_size] +=
+    #                 A[i_tile:i_tile+tile_size, k_tile:k_tile+tile_size] @
+    #                 B[k_tile:k_tile+tile_size, j_tile:j_tile+tile_size]
+
+    result_data = np.matmul(a.data, b.data)
+    return Tensor(result_data)
+    ### END SOLUTION
--- a/tinytorch/optimization/compression.py
+++ b/tinytorch/optimization/compression.py
@@ -15,8 +15,8 @@
 # ║     The tinytorch/ directory is generated code - edit source files instead!  ║
 # ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
-__all__ = ['BYTES_PER_FLOAT32', 'MB_TO_BYTES', 'magnitude_prune', 'structured_prune', 'KnowledgeDistillation',
-           'CompressionComplete', 'measure_sparsity', 'compress_model']
+__all__ = ['BYTES_PER_FLOAT32', 'MB_TO_BYTES', 'magnitude_prune', 'structured_prune', 'low_rank_approximate',
+           'KnowledgeDistillation', 'CompressionComplete', 'measure_sparsity', 'compress_model']

 # %% ../../modules/16_compression/16_compression.ipynb 1
 import numpy as np
@@ -145,6 +145,48 @@ def structured_prune(model, prune_ratio=0.5):
    return model
    ### END SOLUTION

+# %% ../../modules/16_compression/16_compression.ipynb 18
+def low_rank_approximate(weight_matrix, rank_ratio=0.5):
+    """
+    Approximate weight matrix using low-rank decomposition (SVD).
+
+    TODO: Implement SVD-based low-rank approximation
+
+    APPROACH:
+    1. Perform SVD: W = U @ S @ V^T
+    2. Keep only top k singular values where k = rank_ratio * min(dimensions)
+    3. Reconstruct: W_approx = U[:,:k] @ diag(S[:k]) @ V[:k,:]
+    4. Return decomposed matrices for memory savings
+
+    EXAMPLE:
+    >>> weight = np.random.randn(100, 50)
+    >>> U, S, V = low_rank_approximate(weight, rank_ratio=0.3)
+    >>> # Original: 100*50 = 5000 params
+    >>> # Compressed: 100*15 + 15*50 = 2250 params (55% reduction)
+
+    HINTS:
+    - Use np.linalg.svd() for decomposition
+    - Choose k = int(rank_ratio * min(m, n))
+    - Return U[:,:k], S[:k], V[:k,:] for reconstruction
+    """
+    ### BEGIN SOLUTION
+    m, n = weight_matrix.shape
+
+    # Perform SVD
+    U, S, V = np.linalg.svd(weight_matrix, full_matrices=False)
+
+    # Determine target rank
+    max_rank = min(m, n)
+    target_rank = max(1, int(rank_ratio * max_rank))
+
+    # Truncate to target rank
+    U_truncated = U[:, :target_rank]
+    S_truncated = S[:target_rank]
+    V_truncated = V[:target_rank, :]
+
+    return U_truncated, S_truncated, V_truncated
+    ### END SOLUTION
+
 # %% ../../modules/16_compression/16_compression.ipynb 21
 class KnowledgeDistillation:
    """
--- a/tinytorch/optimization/quantization.py
+++ b/tinytorch/optimization/quantization.py
@@ -16,8 +16,8 @@
 # ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
 __all__ = ['INT8_MIN_VALUE', 'INT8_MAX_VALUE', 'INT8_RANGE', 'EPSILON', 'BYTES_PER_FLOAT32', 'BYTES_PER_INT8', 'MB_TO_BYTES',
-           'SimpleModel', 'QuantizedLinear', 'QuantizationComplete', 'quantize_int8', 'dequantize_int8',
-           'quantize_model']
+           'SimpleModel', 'QuantizedLinear', 'compare_model_sizes', 'QuantizationComplete', 'quantize_int8',
+           'dequantize_int8', 'quantize_model']

 # %% ../../modules/15_quantization/15_quantization.ipynb 3
 import numpy as np
@@ -198,6 +198,84 @@ class QuantizedLinear:
        }
        ### END SOLUTION

+# %% ../../modules/15_quantization/15_quantization.ipynb 24
+def compare_model_sizes(original_model, quantized_model) -> Dict[str, float]:
+    """
+    Compare memory usage between original and quantized models.
+
+    TODO: Calculate comprehensive memory comparison
+
+    APPROACH:
+    1. Count parameters in both models
+    2. Calculate bytes used (FP32 vs INT8)
+    3. Include quantization overhead
+    4. Return comparison metrics
+
+    Args:
+        original_model: Model before quantization
+        quantized_model: Model after quantization
+
+    Returns:
+        Dictionary with 'original_mb', 'quantized_mb', 'reduction_ratio', 'memory_saved_mb'
+
+    EXAMPLE:
+    >>> layer1 = Linear(100, 50)
+    >>> layer2 = Linear(50, 10)
+    >>> model = SimpleModel(layer1, layer2)
+    >>> quantize_model(model)
+    >>> stats = compare_model_sizes(model, model)  # Same model after in-place quantization
+    >>> print(f"Reduced to {stats['reduction_ratio']:.1f}x smaller")
+    Reduced to 4.0x smaller
+
+    HINTS:
+    - FP32 uses 4 bytes per parameter, INT8 uses 1 byte
+    - Include scale/zero_point overhead (2 values per quantized layer)
+    - Expected ratio: ~4x for INT8 quantization
+    """
+    ### BEGIN SOLUTION
+    # Count original model parameters
+    # SimpleModel has .layers attribute, layers may have .parameters() method
+    original_params = 0
+    original_bytes = 0
+    for layer in original_model.layers:
+        if hasattr(layer, 'parameters'):
+            params = layer.parameters()
+            for param in params:
+                original_params += param.data.size
+                original_bytes += param.data.size * BYTES_PER_FLOAT32
+
+    # Count quantized model parameters
+    quantized_params = 0
+    quantized_bytes = 0
+    for layer in quantized_model.layers:
+        if isinstance(layer, QuantizedLinear):
+            memory_info = layer.memory_usage()
+            quantized_bytes += memory_info['quantized_bytes']
+            params = layer.parameters()
+            for param in params:
+                quantized_params += param.data.size
+        else:
+            # Non-quantized layers - may have .parameters() method
+            if hasattr(layer, 'parameters'):
+                params = layer.parameters()
+                for param in params:
+                    quantized_params += param.data.size
+                    quantized_bytes += param.data.size * BYTES_PER_FLOAT32
+
+    compression_ratio = original_bytes / quantized_bytes if quantized_bytes > 0 else 1.0
+    memory_saved = original_bytes - quantized_bytes
+
+    return {
+        'original_params': original_params,
+        'quantized_params': quantized_params,
+        'original_bytes': original_bytes,
+        'quantized_bytes': quantized_bytes,
+        'compression_ratio': compression_ratio,
+        'memory_saved_mb': memory_saved / MB_TO_BYTES,
+        'memory_saved_percent': (memory_saved / original_bytes) * 100 if original_bytes > 0 else 0
+    }
+    ### END SOLUTION
+
 # %% ../../modules/15_quantization/15_quantization.ipynb 36
 class QuantizationComplete:
    """
--- a/tinytorch/text/embeddings.py
+++ b/tinytorch/text/embeddings.py
@@ -15,7 +15,8 @@
 # ║     The tinytorch/ directory is generated code - edit source files instead!  ║
 # ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
-__all__ = ['BYTES_PER_FLOAT32', 'MB_TO_BYTES', 'Embedding', 'PositionalEncoding', 'EmbeddingLayer']
+__all__ = ['BYTES_PER_FLOAT32', 'MB_TO_BYTES', 'Embedding', 'PositionalEncoding', 'create_sinusoidal_embeddings',
+           'EmbeddingLayer']

 # %% ../../modules/11_embeddings/11_embeddings.ipynb 2
 import numpy as np
@@ -226,6 +227,67 @@ class PositionalEncoding:
        return f"PositionalEncoding(max_seq_len={self.max_seq_len}, embed_dim={self.embed_dim})"
    ### END SOLUTION

+# %% ../../modules/11_embeddings/11_embeddings.ipynb 14
+def create_sinusoidal_embeddings(max_seq_len: int, embed_dim: int) -> Tensor:
+    """
+    Create sinusoidal positional encodings as used in "Attention Is All You Need".
+
+    These fixed encodings use sine and cosine functions to create unique
+    positional patterns that don't require training and can extrapolate
+    to longer sequences than seen during training.
+
+    TODO: Implement sinusoidal positional encoding generation
+
+    APPROACH:
+    1. Create position indices: [0, 1, 2, ..., max_seq_len-1]
+    2. Create dimension indices for frequency calculation
+    3. Apply sine to even dimensions, cosine to odd dimensions
+    4. Use the transformer paper formula with 10000 base
+
+    MATHEMATICAL FORMULA:
+    PE(pos, 2i) = sin(pos / 10000^(2i/embed_dim))
+    PE(pos, 2i+1) = cos(pos / 10000^(2i/embed_dim))
+
+    EXAMPLE:
+    >>> pe = create_sinusoidal_embeddings(512, 64)
+    >>> print(pe.shape)
+    (512, 64)
+    >>> # Position 0: [0, 1, 0, 1, 0, 1, ...] (sin(0)=0, cos(0)=1)
+    >>> # Each position gets unique trigonometric signature
+
+    HINTS:
+    - Use np.arange to create position and dimension arrays
+    - Calculate div_term using exponential for frequency scaling
+    - Apply different formulas to even/odd dimensions
+    - The 10000 base creates different frequencies for different dimensions
+    """
+
+    ### BEGIN SOLUTION
+    # Create position indices [0, 1, 2, ..., max_seq_len-1]
+    position = np.arange(max_seq_len, dtype=np.float32)[:, np.newaxis]  # (max_seq_len, 1)
+
+    # Create dimension indices for calculating frequencies
+    div_term = np.exp(
+        np.arange(0, embed_dim, 2, dtype=np.float32) *
+        -(math.log(10000.0) / embed_dim)
+    )  # (embed_dim//2,)
+
+    # Initialize the positional encoding matrix
+    pe = np.zeros((max_seq_len, embed_dim), dtype=np.float32)
+
+    # Apply sine to even indices (0, 2, 4, ...)
+    pe[:, 0::2] = np.sin(position * div_term)
+
+    # Apply cosine to odd indices (1, 3, 5, ...)
+    if embed_dim % 2 == 1:
+        # Handle odd embed_dim by only filling available positions
+        pe[:, 1::2] = np.cos(position * div_term[:-1])
+    else:
+        pe[:, 1::2] = np.cos(position * div_term)
+
+    return Tensor(pe)
+    ### END SOLUTION
+
 # %% ../../modules/11_embeddings/11_embeddings.ipynb 18
 class EmbeddingLayer:
    """
--- a/tito/commands/init.py
+++ b/tito/commands/init.py
@@ -14,6 +14,7 @@ from .src import SrcCommand
 from .nbgrader import NBGraderCommand
 from .benchmark import BenchmarkCommand
 from .community import CommunityCommand
+from .verify import VerifyCommand

 # Command groups (with subcommands organized in subfolders)
 from .system import SystemCommand
@@ -29,6 +30,7 @@ __all__ = [
    'NBGraderCommand',
    'BenchmarkCommand',
    'CommunityCommand',
+    'VerifyCommand',
    # Command groups
    'SystemCommand',
    'ModuleWorkflowCommand',
--- a/tito/commands/verify.py
+++ b/tito/commands/verify.py
@@ -0,0 +1,232 @@
+"""
+TinyTorch Verify Command
+
+Checks that the environment is set up correctly and ready to use.
+On success, prompts to join the community map.
+
+This is essentially `tito system health` + package import check + postcard.
+"""
+
+import sys
+import os
+import webbrowser
+from argparse import ArgumentParser, Namespace
+from pathlib import Path
+
+from rich.panel import Panel
+from rich.table import Table
+from rich import box
+
+from .base import BaseCommand
+
+
+class VerifyCommand(BaseCommand):
+    """Verify TinyTorch setup is ready, then join the community."""
+
+    @property
+    def name(self) -> str:
+        return "verify"
+
+    @property
+    def description(self) -> str:
+        return "Verify setup is ready, then join the community map"
+
+    def add_arguments(self, parser: ArgumentParser) -> None:
+        parser.add_argument(
+            "--skip-registration",
+            action="store_true",
+            help="Skip registration prompt after verification"
+        )
+
+    def run(self, args: Namespace) -> int:
+        """Run verification checks and prompt for registration."""
+        
+        self.console.print()
+        self.console.print(Panel.fit(
+            "[bold cyan]🔬 Verifying TinyTorch Setup[/bold cyan]",
+            border_style="cyan"
+        ))
+        self.console.print()
+        
+        all_passed = True
+        
+        # 1. Environment checks
+        all_passed &= self._check_environment()
+        
+        # 2. Project structure checks
+        all_passed &= self._check_structure()
+        
+        # 3. Package import checks
+        all_passed &= self._check_package()
+        
+        # Result
+        self.console.print()
+        if all_passed:
+            self._show_success()
+            if not args.skip_registration:
+                self._prompt_registration()
+            return 0
+        else:
+            self._show_failure()
+            return 1
+
+    def _check_environment(self) -> bool:
+        """Check Python environment and dependencies."""
+        self.console.print("[bold]Environment[/bold]")
+        
+        all_ok = True
+        
+        # Python
+        self.console.print(f"  [green]✓[/green] Python {sys.version.split()[0]}")
+        
+        # Virtual environment
+        venv_exists = self.venv_path.exists()
+        in_venv = (
+            os.environ.get('VIRTUAL_ENV') is not None or
+            (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix) or
+            hasattr(sys, 'real_prefix')
+        )
+        
+        if venv_exists and in_venv:
+            self.console.print("  [green]✓[/green] Virtual environment active")
+        elif venv_exists:
+            self.console.print("  [yellow]![/yellow] Virtual environment exists but not active")
+            self.console.print("    [dim]Run: source activate.sh[/dim]")
+        else:
+            self.console.print("  [yellow]![/yellow] No virtual environment")
+        
+        # Required dependencies
+        required = [
+            ('numpy', 'NumPy'),
+            ('rich', 'Rich'),
+            ('yaml', 'PyYAML'),
+        ]
+        
+        for module, name in required:
+            try:
+                __import__(module)
+                self.console.print(f"  [green]✓[/green] {name}")
+            except ImportError:
+                self.console.print(f"  [red]✗[/red] {name} [dim](pip install {module})[/dim]")
+                all_ok = False
+        
+        self.console.print()
+        return all_ok
+
+    def _check_structure(self) -> bool:
+        """Check project structure exists."""
+        self.console.print("[bold]Project Structure[/bold]")
+        
+        all_ok = True
+        
+        paths = [
+            ('tinytorch/', 'Package'),
+            ('tinytorch/core/', 'Core modules'),
+            ('src/', 'Source modules'),
+        ]
+        
+        for path, desc in paths:
+            if Path(path).exists():
+                self.console.print(f"  [green]✓[/green] {path}")
+            else:
+                self.console.print(f"  [red]✗[/red] {path} [dim]({desc})[/dim]")
+                all_ok = False
+        
+        self.console.print()
+        return all_ok
+
+    def _check_package(self) -> bool:
+        """Check that tinytorch package is importable."""
+        self.console.print("[bold]Package[/bold]")
+        
+        all_ok = True
+        
+        # Import tinytorch
+        try:
+            import tinytorch
+            self.console.print("  [green]✓[/green] import tinytorch")
+        except ImportError as e:
+            self.console.print(f"  [red]✗[/red] import tinytorch")
+            self.console.print(f"    [dim red]{e}[/dim red]")
+            return False
+        
+        # Check core components
+        try:
+            from tinytorch import Tensor
+            self.console.print("  [green]✓[/green] Tensor available")
+        except ImportError:
+            self.console.print("  [red]✗[/red] Tensor not available")
+            all_ok = False
+        
+        try:
+            from tinytorch import Linear, ReLU
+            self.console.print("  [green]✓[/green] Layers available")
+        except ImportError:
+            self.console.print("  [red]✗[/red] Layers not available")
+            all_ok = False
+        
+        try:
+            from tinytorch import SGD
+            self.console.print("  [green]✓[/green] Optimizer available")
+        except ImportError:
+            self.console.print("  [red]✗[/red] Optimizer not available")
+            all_ok = False
+        
+        return all_ok
+
+    def _show_success(self) -> None:
+        """Show success message."""
+        self.console.print(Panel.fit(
+            "[bold green]✅ TinyTorch is ready![/bold green]\n\n"
+            "Your environment is set up correctly.\n"
+            "You can start working on modules.",
+            border_style="green",
+            box=box.ROUNDED
+        ))
+
+    def _show_failure(self) -> None:
+        """Show failure message."""
+        self.console.print(Panel.fit(
+            "[bold red]❌ Setup incomplete[/bold red]\n\n"
+            "Some checks failed. See above for details.\n\n"
+            "[dim]Run 'tito setup' to fix common issues[/dim]",
+            border_style="red",
+            box=box.ROUNDED
+        ))
+
+    def _prompt_registration(self) -> None:
+        """Prompt user to join the community."""
+        from rich.prompt import Confirm
+        
+        self.console.print()
+        self.console.print(Panel.fit(
+            "[bold cyan]🌍 Join the TinyTorch Community[/bold cyan]\n\n"
+            "Add yourself to the map at [link=https://tinytorch.ai/map]tinytorch.ai/map[/link]\n\n"
+            "[dim]• See learners worldwide\n"
+            "• Country & institution (optional)\n"
+            "• No account required[/dim]",
+            border_style="cyan"
+        ))
+        
+        join = Confirm.ask("\n[bold]Join the community?[/bold]", default=True)
+        
+        if join:
+            self._open_registration()
+        else:
+            self.console.print("[dim]No problem! Run 'tito verify' anytime to join later.[/dim]")
+
+    def _open_registration(self) -> None:
+        """Open registration page."""
+        url = "https://tinytorch.ai/join"
+        
+        self.console.print(f"\n[cyan]Opening registration...[/cyan]")
+        
+        try:
+            webbrowser.open(url)
+            self.console.print(f"[green]✓[/green] Browser opened")
+            self.console.print(f"[dim]  {url}[/dim]")
+        except Exception:
+            self.console.print(f"[yellow]Could not open browser.[/yellow]")
+            self.console.print(f"Please visit: [cyan]{url}[/cyan]")
+        
+        self.console.print("\n[green]Welcome to the community! 🎉[/green]")
--- a/tito/main.py
+++ b/tito/main.py
@@ -38,6 +38,7 @@ from .commands.milestone import MilestoneCommand
 from .commands.setup import SetupCommand
 from .commands.benchmark import BenchmarkCommand
 from .commands.community import CommunityCommand
+from .commands.verify import VerifyCommand

 # Configure logging
 logging.basicConfig(
@@ -79,6 +80,8 @@ class TinyTorchCLI:
            'test': TestCommand,
            'grade': GradeCommand,
            'logo': LogoCommand,
+            # Verification
+            'verify': VerifyCommand,
        }

        # Command categorization for help display
@@ -91,6 +94,7 @@ class TinyTorchCLI:
                ('[green]tito setup[/green]', 'First-time setup'),
                ('[green]tito module start 01[/green]', 'Start Module 01 (tensors)'),
                ('[green]tito module complete 01[/green]', 'Test, export, and track progress'),
+                ('[green]tito verify[/green]', 'Verify installation and join community'),
            ],
            'track_progress': [
                ('[yellow]tito module status[/yellow]', 'View module progress'),