diff --git a/tinytorch/_modidx.py b/tinytorch/_modidx.py
index 88d63238..3df88156 100644
--- a/tinytorch/_modidx.py
+++ b/tinytorch/_modidx.py
@@ -1,19 +1,3 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/[unknown]/[unknown]_dev.py              ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
 # Autogenerated by nbdev
 
 d = { 'settings': { 'branch': 'main',
@@ -21,36 +5,7 @@ d = { 'settings': { 'branch': 'main',
                 'doc_host': 'https://tinytorch.github.io',
                 'git_url': 'https://github.com/tinytorch/TinyTorch/',
                 'lib_path': 'tinytorch'},
-  'syms': { 'tinytorch.applications.tinygpt': { 'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline': ( '20_capstone/capstone_dev.html#completetinygptpipeline',
-                                                                                                            'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline.__init__': ( '20_capstone/capstone_dev.html#completetinygptpipeline.__init__',
-                                                                                                                     'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline.generate_text': ( '20_capstone/capstone_dev.html#completetinygptpipeline.generate_text',
-                                                                                                                          'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline.optimize_model': ( '20_capstone/capstone_dev.html#completetinygptpipeline.optimize_model',
-                                                                                                                           'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline.prepare_training_data': ( '20_capstone/capstone_dev.html#completetinygptpipeline.prepare_training_data',
-                                                                                                                                  'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline.train': ( '20_capstone/capstone_dev.html#completetinygptpipeline.train',
-                                                                                                                  'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.TinyGPT': ( '20_capstone/capstone_dev.html#tinygpt',
-                                                                                            'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.TinyGPT.__init__': ( '20_capstone/capstone_dev.html#tinygpt.__init__',
-                                                                                                     'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.TinyGPTTrainer': ( '20_capstone/capstone_dev.html#tinygpttrainer',
-                                                                                                   'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.TinyGPTTrainer.__init__': ( '20_capstone/capstone_dev.html#tinygpttrainer.__init__',
-                                                                                                            'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.TinyGPTTrainer.prepare_batch': ( '20_capstone/capstone_dev.html#tinygpttrainer.prepare_batch',
-                                                                                                                 'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.TinyGPTTrainer.train_step': ( '20_capstone/capstone_dev.html#tinygpttrainer.train_step',
-                                                                                                              'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.test_unit_complete_pipeline': ( '20_capstone/capstone_dev.html#test_unit_complete_pipeline',
-                                                                                                                'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.test_unit_tinygpt_init': ( '20_capstone/capstone_dev.html#test_unit_tinygpt_init',
-                                                                                                           'tinytorch/applications/tinygpt.py'),
-                                                'tinytorch.applications.tinygpt.test_unit_training_pipeline': ( '20_capstone/capstone_dev.html#test_unit_training_pipeline',
-                                                                                                                'tinytorch/applications/tinygpt.py')},
+  'syms': { 'tinytorch.applications.tinygpt': {},
             'tinytorch.benchmarking.benchmark': { 'tinytorch.benchmarking.benchmark.Benchmark': ( '19_benchmarking/benchmarking_dev.html#benchmark',
                                                                                                   'tinytorch/benchmarking/benchmark.py'),
                                                   'tinytorch.benchmarking.benchmark.Benchmark.__init__': ( '19_benchmarking/benchmarking_dev.html#benchmark.__init__',
@@ -89,6 +44,8 @@ d = { 'settings': { 'branch': 'main',
                                                                                                                       'tinytorch/benchmarking/benchmark.py'),
                                                   'tinytorch.benchmarking.benchmark.TinyMLPerf.run_standard_benchmark': ( '19_benchmarking/benchmarking_dev.html#tinymlperf.run_standard_benchmark',
                                                                                                                           'tinytorch/benchmarking/benchmark.py'),
+                                                  'tinytorch.benchmarking.benchmark.calculate_normalized_scores': ( '19_benchmarking/benchmarking_dev.html#calculate_normalized_scores',
+                                                                                                                    'tinytorch/benchmarking/benchmark.py'),
                                                   'tinytorch.benchmarking.benchmark.test_unit_benchmark': ( '19_benchmarking/benchmarking_dev.html#test_unit_benchmark',
                                                                                                             'tinytorch/benchmarking/benchmark.py'),
                                                   'tinytorch.benchmarking.benchmark.test_unit_benchmark_suite': ( '19_benchmarking/benchmarking_dev.html#test_unit_benchmark_suite',
@@ -105,6 +62,8 @@ d = { 'settings': { 'branch': 'main',
                                                                                                          'tinytorch/competition/submit.py'),
                                               'tinytorch.competition.submit.validate_installation': ( '20_competition/competition_dev.html#validate_installation',
                                                                                                       'tinytorch/competition/submit.py'),
+                                              'tinytorch.competition.submit.validate_submission': ( '20_competition/competition_dev.html#validate_submission',
+                                                                                                    'tinytorch/competition/submit.py'),
                                               'tinytorch.competition.submit.worked_example_optimization': ( '20_competition/competition_dev.html#worked_example_optimization',
                                                                                                             'tinytorch/competition/submit.py')},
             'tinytorch.core.activations': { 'tinytorch.core.activations.GELU': ( '02_activations/activations_dev.html#gelu',
@@ -341,7 +300,11 @@ d = { 'settings': { 'branch': 'main',
                                          'tinytorch.core.training.Trainer.save_checkpoint': ( '07_training/training_dev.html#trainer.save_checkpoint',
                                                                                               'tinytorch/core/training.py'),
                                          'tinytorch.core.training.Trainer.train_epoch': ( '07_training/training_dev.html#trainer.train_epoch',
-                                                                                          'tinytorch/core/training.py')},
+                                                                                          'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.load_checkpoint': ( '07_training/training_dev.html#load_checkpoint',
+                                                                                      'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.save_checkpoint': ( '07_training/training_dev.html#save_checkpoint',
+                                                                                      'tinytorch/core/training.py')},
             'tinytorch.data.loader': { 'tinytorch.data.loader.DataLoader': ( '08_dataloader/dataloader_dev.html#dataloader',
                                                                              'tinytorch/data/loader.py'),
                                        'tinytorch.data.loader.DataLoader.__init__': ( '08_dataloader/dataloader_dev.html#dataloader.__init__',
@@ -386,8 +349,6 @@ d = { 'settings': { 'branch': 'main',
                                                                                                   'tinytorch/generation/kv_cache.py')},
             'tinytorch.models.transformer': { 'tinytorch.models.transformer.GPT': ( '13_transformers/transformers_dev.html#gpt',
                                                                                     'tinytorch/models/transformer.py'),
-                                              'tinytorch.models.transformer.GPT.__call__': ( '13_transformers/transformers_dev.html#gpt.__call__',
-                                                                                             'tinytorch/models/transformer.py'),
                                               'tinytorch.models.transformer.GPT.__init__': ( '13_transformers/transformers_dev.html#gpt.__init__',
                                                                                              'tinytorch/models/transformer.py'),
                                               'tinytorch.models.transformer.GPT._create_causal_mask': ( '13_transformers/transformers_dev.html#gpt._create_causal_mask',
@@ -400,8 +361,6 @@ d = { 'settings': { 'branch': 'main',
                                                                                                'tinytorch/models/transformer.py'),
                                               'tinytorch.models.transformer.LayerNorm': ( '13_transformers/transformers_dev.html#layernorm',
                                                                                           'tinytorch/models/transformer.py'),
-                                              'tinytorch.models.transformer.LayerNorm.__call__': ( '13_transformers/transformers_dev.html#layernorm.__call__',
-                                                                                                   'tinytorch/models/transformer.py'),
                                               'tinytorch.models.transformer.LayerNorm.__init__': ( '13_transformers/transformers_dev.html#layernorm.__init__',
                                                                                                    'tinytorch/models/transformer.py'),
                                               'tinytorch.models.transformer.LayerNorm.forward': ( '13_transformers/transformers_dev.html#layernorm.forward',
@@ -410,8 +369,6 @@ d = { 'settings': { 'branch': 'main',
                                                                                                      'tinytorch/models/transformer.py'),
                                               'tinytorch.models.transformer.MLP': ( '13_transformers/transformers_dev.html#mlp',
                                                                                     'tinytorch/models/transformer.py'),
-                                              'tinytorch.models.transformer.MLP.__call__': ( '13_transformers/transformers_dev.html#mlp.__call__',
-                                                                                             'tinytorch/models/transformer.py'),
                                               'tinytorch.models.transformer.MLP.__init__': ( '13_transformers/transformers_dev.html#mlp.__init__',
                                                                                              'tinytorch/models/transformer.py'),
                                               'tinytorch.models.transformer.MLP.forward': ( '13_transformers/transformers_dev.html#mlp.forward',
@@ -420,8 +377,6 @@ d = { 'settings': { 'branch': 'main',
                                                                                                'tinytorch/models/transformer.py'),
                                               'tinytorch.models.transformer.TransformerBlock': ( '13_transformers/transformers_dev.html#transformerblock',
                                                                                                  'tinytorch/models/transformer.py'),
-                                              'tinytorch.models.transformer.TransformerBlock.__call__': ( '13_transformers/transformers_dev.html#transformerblock.__call__',
-                                                                                                          'tinytorch/models/transformer.py'),
                                               'tinytorch.models.transformer.TransformerBlock.__init__': ( '13_transformers/transformers_dev.html#transformerblock.__init__',
                                                                                                           'tinytorch/models/transformer.py'),
                                               'tinytorch.models.transformer.TransformerBlock.forward': ( '13_transformers/transformers_dev.html#transformerblock.forward',
@@ -429,49 +384,54 @@ d = { 'settings': { 'branch': 'main',
                                               'tinytorch.models.transformer.TransformerBlock.parameters': ( '13_transformers/transformers_dev.html#transformerblock.parameters',
                                                                                                             'tinytorch/models/transformer.py')},
             'tinytorch.optimization.acceleration': {},
-            'tinytorch.optimization.compression': { 'tinytorch.optimization.compression.CompressionComplete': ( '17_compression/compression_dev.html#compressioncomplete',
-                                                                                                                'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.CompressionComplete.compress_model': ( '17_compression/compression_dev.html#compressioncomplete.compress_model',
-                                                                                                                               'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.CompressionComplete.magnitude_prune': ( '17_compression/compression_dev.html#compressioncomplete.magnitude_prune',
-                                                                                                                                'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.CompressionComplete.measure_sparsity': ( '17_compression/compression_dev.html#compressioncomplete.measure_sparsity',
-                                                                                                                                 'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.CompressionComplete.structured_prune': ( '17_compression/compression_dev.html#compressioncomplete.structured_prune',
-                                                                                                                                 'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.KnowledgeDistillation': ( '17_compression/compression_dev.html#knowledgedistillation',
-                                                                                                                  'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.KnowledgeDistillation.__init__': ( '17_compression/compression_dev.html#knowledgedistillation.__init__',
-                                                                                                                           'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.KnowledgeDistillation._cross_entropy': ( '17_compression/compression_dev.html#knowledgedistillation._cross_entropy',
-                                                                                                                                 'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.KnowledgeDistillation._kl_divergence': ( '17_compression/compression_dev.html#knowledgedistillation._kl_divergence',
-                                                                                                                                 'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.KnowledgeDistillation._softmax': ( '17_compression/compression_dev.html#knowledgedistillation._softmax',
-                                                                                                                           'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.KnowledgeDistillation.distillation_loss': ( '17_compression/compression_dev.html#knowledgedistillation.distillation_loss',
-                                                                                                                                    'tinytorch/optimization/compression.py'),
+            'tinytorch.optimization.compression': { 'tinytorch.optimization.compression.Linear': ( '17_compression/compression_dev.html#linear',
+                                                                                                   'tinytorch/optimization/compression.py'),
+                                                    'tinytorch.optimization.compression.Linear.__init__': ( '17_compression/compression_dev.html#linear.__init__',
+                                                                                                            'tinytorch/optimization/compression.py'),
+                                                    'tinytorch.optimization.compression.Linear.forward': ( '17_compression/compression_dev.html#linear.forward',
+                                                                                                           'tinytorch/optimization/compression.py'),
+                                                    'tinytorch.optimization.compression.Linear.parameters': ( '17_compression/compression_dev.html#linear.parameters',
+                                                                                                              'tinytorch/optimization/compression.py'),
                                                     'tinytorch.optimization.compression.Sequential': ( '17_compression/compression_dev.html#sequential',
                                                                                                        'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.Sequential.__call__': ( '17_compression/compression_dev.html#sequential.__call__',
-                                                                                                                'tinytorch/optimization/compression.py'),
                                                     'tinytorch.optimization.compression.Sequential.__init__': ( '17_compression/compression_dev.html#sequential.__init__',
                                                                                                                 'tinytorch/optimization/compression.py'),
                                                     'tinytorch.optimization.compression.Sequential.forward': ( '17_compression/compression_dev.html#sequential.forward',
                                                                                                                'tinytorch/optimization/compression.py'),
                                                     'tinytorch.optimization.compression.Sequential.parameters': ( '17_compression/compression_dev.html#sequential.parameters',
                                                                                                                   'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.compress_model': ( '17_compression/compression_dev.html#compress_model',
+                                                    'tinytorch.optimization.compression.Tensor': ( '17_compression/compression_dev.html#tensor',
+                                                                                                   'tinytorch/optimization/compression.py'),
+                                                    'tinytorch.optimization.compression.Tensor.__add__': ( '17_compression/compression_dev.html#tensor.__add__',
                                                                                                            'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.magnitude_prune': ( '17_compression/compression_dev.html#magnitude_prune',
+                                                    'tinytorch.optimization.compression.Tensor.__init__': ( '17_compression/compression_dev.html#tensor.__init__',
                                                                                                             'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.measure_sparsity': ( '17_compression/compression_dev.html#measure_sparsity',
-                                                                                                             'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.structured_prune': ( '17_compression/compression_dev.html#structured_prune',
-                                                                                                             'tinytorch/optimization/compression.py'),
-                                                    'tinytorch.optimization.compression.test_unit_knowledge_distillation': ( '17_compression/compression_dev.html#test_unit_knowledge_distillation',
-                                                                                                                             'tinytorch/optimization/compression.py')},
-            'tinytorch.optimization.quantization': {},
+                                                    'tinytorch.optimization.compression.Tensor.__mul__': ( '17_compression/compression_dev.html#tensor.__mul__',
+                                                                                                           'tinytorch/optimization/compression.py'),
+                                                    'tinytorch.optimization.compression.Tensor.__repr__': ( '17_compression/compression_dev.html#tensor.__repr__',
+                                                                                                            'tinytorch/optimization/compression.py'),
+                                                    'tinytorch.optimization.compression.Tensor.abs': ( '17_compression/compression_dev.html#tensor.abs',
+                                                                                                       'tinytorch/optimization/compression.py'),
+                                                    'tinytorch.optimization.compression.Tensor.matmul': ( '17_compression/compression_dev.html#tensor.matmul',
+                                                                                                          'tinytorch/optimization/compression.py'),
+                                                    'tinytorch.optimization.compression.Tensor.sum': ( '17_compression/compression_dev.html#tensor.sum',
+                                                                                                       'tinytorch/optimization/compression.py')},
+            'tinytorch.optimization.quantization': { 'tinytorch.optimization.quantization.QuantizationComplete': ( '16_quantization/quantization_dev.html#quantizationcomplete',
+                                                                                                                   'tinytorch/optimization/quantization.py'),
+                                                     'tinytorch.optimization.quantization.QuantizationComplete.compare_models': ( '16_quantization/quantization_dev.html#quantizationcomplete.compare_models',
+                                                                                                                                  'tinytorch/optimization/quantization.py'),
+                                                     'tinytorch.optimization.quantization.QuantizationComplete.dequantize_tensor': ( '16_quantization/quantization_dev.html#quantizationcomplete.dequantize_tensor',
+                                                                                                                                     'tinytorch/optimization/quantization.py'),
+                                                     'tinytorch.optimization.quantization.QuantizationComplete.quantize_model': ( '16_quantization/quantization_dev.html#quantizationcomplete.quantize_model',
+                                                                                                                                  'tinytorch/optimization/quantization.py'),
+                                                     'tinytorch.optimization.quantization.QuantizationComplete.quantize_tensor': ( '16_quantization/quantization_dev.html#quantizationcomplete.quantize_tensor',
+                                                                                                                                   'tinytorch/optimization/quantization.py'),
+                                                     'tinytorch.optimization.quantization.dequantize_int8': ( '16_quantization/quantization_dev.html#dequantize_int8',
+                                                                                                              'tinytorch/optimization/quantization.py'),
+                                                     'tinytorch.optimization.quantization.quantize_int8': ( '16_quantization/quantization_dev.html#quantize_int8',
+                                                                                                            'tinytorch/optimization/quantization.py'),
+                                                     'tinytorch.optimization.quantization.quantize_model': ( '16_quantization/quantization_dev.html#quantize_model',
+                                                                                                             'tinytorch/optimization/quantization.py')},
             'tinytorch.profiling.profiler': { 'tinytorch.profiling.profiler.Profiler': ( '14_profiling/profiling_dev.html#profiler',
                                                                                          'tinytorch/profiling/profiler.py'),
                                               'tinytorch.profiling.profiler.Profiler.__init__': ( '14_profiling/profiling_dev.html#profiler.__init__',
@@ -496,8 +456,6 @@ d = { 'settings': { 'branch': 'main',
                                                                                               'tinytorch/profiling/profiler.py')},
             'tinytorch.text.embeddings': { 'tinytorch.text.embeddings.Embedding': ( '11_embeddings/embeddings_dev.html#embedding',
                                                                                     'tinytorch/text/embeddings.py'),
-                                           'tinytorch.text.embeddings.Embedding.__call__': ( '11_embeddings/embeddings_dev.html#embedding.__call__',
-                                                                                             'tinytorch/text/embeddings.py'),
                                            'tinytorch.text.embeddings.Embedding.__init__': ( '11_embeddings/embeddings_dev.html#embedding.__init__',
                                                                                              'tinytorch/text/embeddings.py'),
                                            'tinytorch.text.embeddings.Embedding.__repr__': ( '11_embeddings/embeddings_dev.html#embedding.__repr__',
@@ -508,8 +466,6 @@ d = { 'settings': { 'branch': 'main',
                                                                                                'tinytorch/text/embeddings.py'),
                                            'tinytorch.text.embeddings.EmbeddingLayer': ( '11_embeddings/embeddings_dev.html#embeddinglayer',
                                                                                          'tinytorch/text/embeddings.py'),
-                                           'tinytorch.text.embeddings.EmbeddingLayer.__call__': ( '11_embeddings/embeddings_dev.html#embeddinglayer.__call__',
-                                                                                                  'tinytorch/text/embeddings.py'),
                                            'tinytorch.text.embeddings.EmbeddingLayer.__init__': ( '11_embeddings/embeddings_dev.html#embeddinglayer.__init__',
                                                                                                   'tinytorch/text/embeddings.py'),
                                            'tinytorch.text.embeddings.EmbeddingLayer.__repr__': ( '11_embeddings/embeddings_dev.html#embeddinglayer.__repr__',
@@ -520,8 +476,6 @@ d = { 'settings': { 'branch': 'main',
                                                                                                     'tinytorch/text/embeddings.py'),
                                            'tinytorch.text.embeddings.PositionalEncoding': ( '11_embeddings/embeddings_dev.html#positionalencoding',
                                                                                              'tinytorch/text/embeddings.py'),
-                                           'tinytorch.text.embeddings.PositionalEncoding.__call__': ( '11_embeddings/embeddings_dev.html#positionalencoding.__call__',
-                                                                                                      'tinytorch/text/embeddings.py'),
                                            'tinytorch.text.embeddings.PositionalEncoding.__init__': ( '11_embeddings/embeddings_dev.html#positionalencoding.__init__',
                                                                                                       'tinytorch/text/embeddings.py'),
                                            'tinytorch.text.embeddings.PositionalEncoding.__repr__': ( '11_embeddings/embeddings_dev.html#positionalencoding.__repr__',
diff --git a/tinytorch/applications/tinygpt.py b/tinytorch/applications/tinygpt.py
index 24ccef7b..80dabc9a 100644
--- a/tinytorch/applications/tinygpt.py
+++ b/tinytorch/applications/tinygpt.py
@@ -1,679 +1,8 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_tinygpt/tinygpt_dev.py               ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/20_capstone/capstone_dev.ipynb.
+
 # %% auto 0
-__all__ = ['TinyGPT', 'test_unit_tinygpt_init', 'TinyGPTTrainer', 'test_unit_training_pipeline', 'CompleteTinyGPTPipeline',
-           'test_unit_complete_pipeline']
+__all__ = []
 
 # %% ../../modules/source/20_capstone/capstone_dev.ipynb 2
 #| default_exp applications.tinygpt
 #| export
-
-# %% ../../modules/source/20_capstone/capstone_dev.ipynb 7
-class TinyGPT:
-    """
-    Complete GPT implementation integrating all TinyTorch modules.
-
-    This class demonstrates how framework components compose into real applications.
-    Built using modules 01,02,03,11,12,13 as core architecture.
-
-    Architecture:
-    - Token Embeddings (Module 11)
-    - Positional Encoding (Module 11)
-    - Transformer Blocks (Module 13)
-    - Output Linear Layer (Module 03)
-    - Language Modeling Head (Module 04)
-    """
-
-    def __init__(self, vocab_size: int, embed_dim: int = 128, num_layers: int = 4,
-                 num_heads: int = 4, max_seq_len: int = 256, dropout: float = 0.1):
-        """
-        Initialize TinyGPT with production-inspired architecture.
-
-        TODO: Build a complete GPT model using TinyTorch components
-
-        APPROACH:
-        1. Create token embeddings (vocab_size × embed_dim)
-        2. Create positional encoding (max_seq_len × embed_dim)
-        3. Build transformer layers using TransformerBlock
-        4. Add output projection layer
-        5. Calculate and report parameter count
-
-        ARCHITECTURE DECISIONS:
-        - embed_dim=128: Small enough for fast training, large enough for learning
-        - num_layers=4: Sufficient depth without excessive memory
-        - num_heads=4: Multi-head attention without head_dim being too small
-        - max_seq_len=256: Reasonable context length for character-level modeling
-
-        EXAMPLE:
-        >>> model = TinyGPT(vocab_size=50, embed_dim=128, num_layers=4)
-        >>> print(f"Parameters: {model.count_parameters():,}")
-        Parameters: 1,234,567
-
-        HINTS:
-        - Use Embedding class for token embeddings
-        - Use PositionalEncoding for position information
-        - Stack TransformerBlock instances in a list
-        - Final Linear layer maps embed_dim → vocab_size
-        """
-        ### BEGIN SOLUTION
-        self.vocab_size = vocab_size
-        self.embed_dim = embed_dim
-        self.num_layers = num_layers
-        self.num_heads = num_heads
-        self.max_seq_len = max_seq_len
-        self.dropout = dropout
-
-        # Token embeddings: convert token IDs to dense vectors
-        self.token_embedding = Embedding(vocab_size, embed_dim)
-
-        # Positional encoding: add position information
-        self.positional_encoding = PositionalEncoding(max_seq_len, embed_dim)
-
-        # Transformer layers: core processing
-        self.transformer_blocks = []
-        for _ in range(num_layers):
-            block = TransformerBlock(embed_dim, num_heads, mlp_ratio=4.0)
-            self.transformer_blocks.append(block)
-
-        # Output projection: map back to vocabulary
-        self.output_projection = Linear(embed_dim, vocab_size)
-
-        # Dropout for regularization
-        self.dropout_layer = Dropout(dropout)
-
-        # Calculate parameter count for systems analysis
-        self._param_count = self.count_parameters()
-        print(f"🏗️ TinyGPT initialized: {self._param_count:,} parameters")
-        print(f"📐 Architecture: {num_layers}L/{num_heads}H/{embed_dim}D")
-        print(f"💾 Estimated memory: {self._param_count * 4 / 1024 / 1024:.1f}MB")
-        ### END SOLUTION
-
-def test_unit_tinygpt_init():
-    """🔬 Test TinyGPT initialization and parameter counting."""
-    print("🔬 Unit Test: TinyGPT Initialization...")
-
-    # Create a small model for testing
-    model = TinyGPT(vocab_size=50, embed_dim=64, num_layers=2, num_heads=2, max_seq_len=128)
-
-    # Verify architecture components exist
-    assert hasattr(model, 'token_embedding')
-    assert hasattr(model, 'positional_encoding')
-    assert hasattr(model, 'transformer_blocks')
-    assert hasattr(model, 'output_projection')
-    assert len(model.transformer_blocks) == 2
-
-    # Verify parameter count is reasonable
-    param_count = model.count_parameters()
-    assert param_count > 0
-    assert param_count < 1000000  # Sanity check for small model
-
-    print(f"✅ Model created with {param_count:,} parameters")
-    print("✅ TinyGPT initialization works correctly!")
-
-# Run immediate test
-test_unit_tinygpt_init()
-
-# %% ../../modules/source/20_capstone/capstone_dev.ipynb 10
-class TinyGPTTrainer:
-    """
-    Complete training pipeline integrating optimizers, schedulers, and monitoring.
-
-    Uses modules 05 (autograd), 06 (optimizers), 07 (training) for end-to-end training.
-    """
-
-    def __init__(self, model: TinyGPT, tokenizer: CharTokenizer,
-                 learning_rate: float = 3e-4, weight_decay: float = 0.01):
-        """
-        Initialize trainer with model and optimization components.
-
-        TODO: Set up complete training infrastructure
-
-        APPROACH:
-        1. Store model and tokenizer references
-        2. Initialize AdamW optimizer (standard for transformers)
-        3. Initialize loss function (CrossEntropyLoss for language modeling)
-        4. Set up learning rate scheduler (cosine schedule)
-        5. Initialize training metrics tracking
-
-        PRODUCTION CHOICES:
-        - AdamW: Better generalization than Adam (weight decay)
-        - learning_rate=3e-4: Standard for small transformers
-        - Cosine schedule: Smooth learning rate decay
-        - CrossEntropy: Standard for classification/language modeling
-
-        EXAMPLE:
-        >>> model = TinyGPT(vocab_size=100)
-        >>> tokenizer = CharTokenizer(['a', 'b', 'c'])
-        >>> trainer = TinyGPTTrainer(model, tokenizer)
-        >>> print("Trainer ready for training")
-        Trainer ready for training
-
-        HINTS:
-        - Get all model parameters with model.parameters()
-        - Use AdamW with weight_decay for better generalization
-        - CrossEntropyLoss handles the language modeling objective
-        """
-        ### BEGIN SOLUTION
-        self.model = model
-        self.tokenizer = tokenizer
-
-        # Collect all trainable parameters
-        all_params = []
-        all_params.extend(model.token_embedding.parameters())
-        for block in model.transformer_blocks:
-            all_params.extend(block.parameters())
-        all_params.extend(model.output_projection.parameters())
-
-        # Initialize optimizer (AdamW for transformers)
-        self.optimizer = AdamW(
-            params=all_params,
-            lr=learning_rate,
-            weight_decay=weight_decay,
-            betas=(0.9, 0.95)  # Standard for language models
-        )
-
-        # Loss function for next token prediction
-        self.loss_fn = CrossEntropyLoss()
-
-        # Learning rate scheduler
-        self.scheduler = CosineSchedule(
-            optimizer=self.optimizer,
-            max_epochs=100,  # Will adjust based on actual training
-            min_lr=learning_rate * 0.1
-        )
-
-        # Training metrics
-        self.training_history = {
-            'losses': [],
-            'perplexities': [],
-            'learning_rates': [],
-            'epoch': 0
-        }
-
-        print(f"🚀 Trainer initialized:")
-        print(f"   Optimizer: AdamW (lr={learning_rate}, wd={weight_decay})")
-        print(f"   Parameters: {len(all_params):,} tensors")
-        print(f"   Loss: CrossEntropyLoss")
-        ### END SOLUTION
-
-    def prepare_batch(self, text_batch: List[str], max_length: int = 128) -> Tuple[Tensor, Tensor]:
-        """
-        Convert text batch to input/target tensors for language modeling.
-
-        TODO: Implement text-to-tensor conversion with proper targets
-
-        APPROACH:
-        1. Tokenize each text in the batch
-        2. Pad/truncate to consistent length
-        3. Create input_ids (text) and target_ids (text shifted by 1)
-        4. Convert to Tensor format
-
-        LANGUAGE MODELING OBJECTIVE:
-        - Input: [token1, token2, token3, token4]
-        - Target: [token2, token3, token4, token5]
-        - Model predicts next token at each position
-
-        EXAMPLE:
-        >>> trainer = TinyGPTTrainer(model, tokenizer)
-        >>> texts = ["hello world", "ai is fun"]
-        >>> inputs, targets = trainer.prepare_batch(texts)
-        >>> print(inputs.shape, targets.shape)
-        (2, 128) (2, 128)
-
-        HINTS:
-        - Use tokenizer.encode() for text → token conversion
-        - Pad shorter sequences with tokenizer pad token
-        - Target sequence is input sequence shifted right by 1
-        """
-        ### BEGIN SOLUTION
-        batch_size = len(text_batch)
-
-        # Tokenize all texts
-        tokenized_batch = []
-        for text in text_batch:
-            tokens = self.tokenizer.encode(text)
-
-            # Truncate or pad to max_length
-            if len(tokens) > max_length:
-                tokens = tokens[:max_length]
-            else:
-                # Pad with special token (use 0 as pad)
-                tokens.extend([0] * (max_length - len(tokens)))
-
-            tokenized_batch.append(tokens)
-
-        # Convert to numpy then Tensor
-        input_ids = Tensor(np.array(tokenized_batch))  # (batch_size, seq_len)
-
-        # Create targets (shifted input for next token prediction)
-        target_ids = Tensor(np.roll(input_ids.data, -1, axis=1))  # Shift left by 1
-
-        return input_ids, target_ids
-        ### END SOLUTION
-
-    def train_step(self, input_ids: Tensor, target_ids: Tensor) -> float:
-        """
-        Single training step with forward, backward, and optimization.
-
-        TODO: Implement complete training step
-
-        APPROACH:
-        1. Zero gradients from previous step
-        2. Forward pass to get logits
-        3. Compute loss between logits and targets
-        4. Backward pass to compute gradients
-        5. Optimizer step to update parameters
-        6. Return loss value for monitoring
-
-        MEMORY MANAGEMENT:
-        During training, memory usage = 3× model size:
-        - 1× for parameters
-        - 1× for gradients
-        - 1× for optimizer states (Adam moments)
-
-        EXAMPLE:
-        >>> loss = trainer.train_step(input_ids, target_ids)
-        >>> print(f"Training loss: {loss:.4f}")
-        Training loss: 2.3456
-
-        HINTS:
-        - Always zero_grad() before forward pass
-        - Loss should be computed on flattened logits and targets
-        - Call backward() on the loss tensor
-        """
-        ### BEGIN SOLUTION
-        # Zero gradients from previous step
-        self.optimizer.zero_grad()
-
-        # Forward pass
-        logits = self.model.forward(input_ids)  # (batch, seq_len, vocab_size)
-
-        # Reshape for loss computation
-        batch_size, seq_len, vocab_size = logits.shape
-        logits_flat = logits.reshape(batch_size * seq_len, vocab_size)
-        targets_flat = target_ids.reshape(batch_size * seq_len)
-
-        # Compute loss
-        loss = self.loss_fn.forward(logits_flat, targets_flat)
-
-        # Backward pass
-        loss.backward()
-
-        # Optimizer step
-        self.optimizer.step()
-
-        # Return scalar loss for monitoring
-        return float(loss.data.item() if hasattr(loss.data, 'item') else loss.data)
-        ### END SOLUTION
-
-def test_unit_training_pipeline():
-    """🔬 Test training pipeline components."""
-    print("🔬 Unit Test: Training Pipeline...")
-
-    # Create small model and trainer
-    model = TinyGPT(vocab_size=50, embed_dim=32, num_layers=2, num_heads=2)
-    tokenizer = CharTokenizer(['a', 'b', 'c', 'd', 'e', ' '])
-    trainer = TinyGPTTrainer(model, tokenizer, learning_rate=1e-3)
-
-    # Test batch preparation
-    texts = ["hello", "world"]
-    input_ids, target_ids = trainer.prepare_batch(texts, max_length=8)
-
-    assert input_ids.shape == (2, 8), f"Expected (2, 8), got {input_ids.shape}"
-    assert target_ids.shape == (2, 8), f"Expected (2, 8), got {target_ids.shape}"
-
-    # Test training step
-    initial_loss = trainer.train_step(input_ids, target_ids)
-    assert initial_loss > 0, "Loss should be positive"
-
-    # Second step should work (gradients computed and applied)
-    second_loss = trainer.train_step(input_ids, target_ids)
-    assert second_loss > 0, "Second loss should also be positive"
-
-    print(f"✅ Batch preparation shape: {input_ids.shape}")
-    print(f"✅ Initial loss: {initial_loss:.4f}")
-    print(f"✅ Second loss: {second_loss:.4f}")
-    print("✅ Training pipeline works correctly!")
-
-# Run immediate test
-test_unit_training_pipeline()
-
-# %% ../../modules/source/20_capstone/capstone_dev.ipynb 14
-class CompleteTinyGPTPipeline:
-    """
-    End-to-end ML pipeline demonstrating integration of all 19 modules.
-
-    Pipeline stages:
-    1. Data preparation (Module 10: Tokenization)
-    2. Model creation (Modules 01-04, 11-13: Architecture)
-    3. Training setup (Modules 05-07: Optimization)
-    4. Training loop (Module 08: DataLoader)
-    5. Optimization (Modules 17-18: Quantization, Pruning)
-    6. Evaluation (Module 19: Benchmarking)
-    7. Generation (Module 14: KV Caching)
-    """
-
-    def __init__(self, vocab_size: int = 100, embed_dim: int = 128,
-                 num_layers: int = 4, num_heads: int = 4):
-        """
-        Initialize complete end-to-end TinyGPT pipeline integrating all 19 modules.
-
-        TODO: Set up a complete ML pipeline with tokenization, model, training,
-        profiling, and benchmarking components
-
-        APPROACH:
-        1. Store model architecture parameters (vocab_size, embed_dim, num_layers, num_heads)
-        2. Initialize tokenizer using CharTokenizer from Module 10 with printable ASCII (32-127)
-        3. Create TinyGPT model instance with stored parameters and max_seq_len=256
-        4. Setup TinyGPTTrainer for training orchestration with learning_rate=3e-4
-        5. Initialize Profiler (Module 15) and Benchmark (Module 19) for performance analysis
-        6. Initialize pipeline state tracking (is_trained flag, training_history list)
-        7. Print pipeline initialization summary with parameter count and memory usage
-
-        EXAMPLE:
-        >>> pipeline = CompleteTinyGPTPipeline(vocab_size=100, embed_dim=128,
-        ...                                     num_layers=4, num_heads=4)
-        🏗️ Complete TinyGPT Pipeline Initialized
-           Model: 419,300 parameters
-           Memory: 1.6MB
-        >>> pipeline.model.count_parameters()
-        419300
-        >>> pipeline.is_trained
-        False
-        >>> len(pipeline.training_history)
-        0
-
-        HINTS:
-        - CharTokenizer needs list of characters: [chr(i) for i in range(32, 127)]
-        - TinyGPT requires vocab_size, embed_dim, num_layers, num_heads, max_seq_len
-        - TinyGPTTrainer takes model, tokenizer, and learning_rate as arguments
-        - Benchmark expects (models_list, datasets_list, metrics_list) format
-        - Memory calculation: parameters * 4 bytes / 1024 / 1024 for MB
-        """
-
-        ### BEGIN SOLUTION
-        self.vocab_size = vocab_size
-        self.embed_dim = embed_dim
-        self.num_layers = num_layers
-        self.num_heads = num_heads
-
-        # Stage 1: Initialize tokenizer (Module 10)
-        self.tokenizer = CharTokenizer([chr(i) for i in range(32, 127)])  # Printable ASCII
-
-        # Stage 2: Create model (Modules 01-04, 11-13)
-        self.model = TinyGPT(
-            vocab_size=vocab_size,
-            embed_dim=embed_dim,
-            num_layers=num_layers,
-            num_heads=num_heads,
-            max_seq_len=256
-        )
-
-        # Stage 3: Setup training (Modules 05-07)
-        self.trainer = TinyGPTTrainer(self.model, self.tokenizer, learning_rate=3e-4)
-
-        # Stage 4: Initialize profiler and benchmark (Modules 15, 19)
-        self.profiler = Profiler()
-        self.benchmark = Benchmark([self.model], [], ["perplexity", "latency"])
-
-        # Pipeline state
-        self.is_trained = False
-        self.training_history = []
-
-        print("🏗️ Complete TinyGPT Pipeline Initialized")
-        print(f"   Model: {self.model.count_parameters():,} parameters")
-        print(f"   Memory: {self.model.count_parameters() * 4 / 1024 / 1024:.1f}MB")
-        ### END SOLUTION
-
-    def prepare_training_data(self, text_corpus: List[str], batch_size: int = 8) -> DataLoader:
-        """
-        Prepare training data using DataLoader (Module 08).
-
-        TODO: Create DataLoader for training text data
-
-        APPROACH:
-        1. Tokenize all texts in corpus
-        2. Create input/target pairs for language modeling
-        3. Package into TensorDataset
-        4. Create DataLoader with batching and shuffling
-
-        EXAMPLE:
-        >>> pipeline = CompleteTinyGPTPipeline()
-        >>> corpus = ["hello world", "ai is amazing"]
-        >>> dataloader = pipeline.prepare_training_data(corpus, batch_size=2)
-        >>> print(f"Batches: {len(dataloader)}")
-        Batches: 1
-        """
-        ### BEGIN SOLUTION
-        # Tokenize and prepare training pairs
-        input_sequences = []
-        target_sequences = []
-
-        for text in text_corpus:
-            tokens = self.tokenizer.encode(text)
-            if len(tokens) < 2:
-                continue  # Skip very short texts
-
-            # Create sliding window of input/target pairs
-            for i in range(len(tokens) - 1):
-                input_seq = tokens[:i+1]
-                target_seq = tokens[i+1]
-
-                # Pad input to consistent length
-                max_len = 32  # Reasonable context window
-                if len(input_seq) > max_len:
-                    input_seq = input_seq[-max_len:]
-                else:
-                    input_seq = [0] * (max_len - len(input_seq)) + input_seq
-
-                input_sequences.append(input_seq)
-                target_sequences.append(target_seq)
-
-        # Convert to tensors
-        inputs = Tensor(np.array(input_sequences))
-        targets = Tensor(np.array(target_sequences))
-
-        # Create dataset and dataloader
-        dataset = TensorDataset(inputs, targets)
-        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
-
-        print(f"📚 Training data prepared: {len(dataset)} examples, {len(dataloader)} batches")
-        return dataloader
-        ### END SOLUTION
-
-    def train(self, dataloader: DataLoader, epochs: int = 10) -> Dict[str, List[float]]:
-        """
-        Complete training loop with monitoring.
-
-        TODO: Implement full training with progress tracking
-
-        APPROACH:
-        1. Loop through epochs
-        2. For each batch: forward, backward, optimize
-        3. Track loss and perplexity
-        4. Update learning rate schedule
-        5. Return training history
-
-        EXAMPLE:
-        >>> history = pipeline.train(dataloader, epochs=5)
-        >>> print(f"Final loss: {history['losses'][-1]:.4f}")
-        Final loss: 1.2345
-        """
-        ### BEGIN SOLUTION
-        history = {'losses': [], 'perplexities': [], 'epochs': []}
-
-        print(f"🚀 Starting training for {epochs} epochs...")
-
-        for epoch in range(epochs):
-            epoch_losses = []
-
-            for batch_idx, (inputs, targets) in enumerate(dataloader):
-                # Training step
-                loss = self.trainer.train_step(inputs, targets)
-                epoch_losses.append(loss)
-
-                # Log progress
-                if batch_idx % 10 == 0:
-                    perplexity = np.exp(loss)
-                    print(f"   Epoch {epoch+1}/{epochs}, Batch {batch_idx}: "
-                          f"Loss={loss:.4f}, PPL={perplexity:.2f}")
-
-            # Epoch summary
-            avg_loss = np.mean(epoch_losses)
-            avg_perplexity = np.exp(avg_loss)
-
-            history['losses'].append(avg_loss)
-            history['perplexities'].append(avg_perplexity)
-            history['epochs'].append(epoch + 1)
-
-            # Update learning rate
-            self.trainer.scheduler.step()
-
-            print(f"✅ Epoch {epoch+1} complete: Loss={avg_loss:.4f}, PPL={avg_perplexity:.2f}")
-
-        self.is_trained = True
-        self.training_history = history
-        print(f"🎉 Training complete! Final perplexity: {history['perplexities'][-1]:.2f}")
-
-        return history
-        ### END SOLUTION
-
-    def optimize_model(self, quantize: bool = True, prune_sparsity: float = 0.0):
-        """
-        Apply optimization techniques (Modules 17-18).
-
-        TODO: Apply quantization and pruning optimizations
-
-        APPROACH:
-        1. Optionally apply quantization to reduce precision
-        2. Optionally apply pruning to remove weights
-        3. Measure size reduction
-        4. Validate model still works
-
-        EXAMPLE:
-        >>> pipeline.optimize_model(quantize=True, prune_sparsity=0.5)
-        Model optimized: 75% size reduction
-        """
-        ### BEGIN SOLUTION
-        original_params = self.model.count_parameters()
-        original_memory = original_params * 4 / (1024 * 1024)
-
-        optimizations_applied = []
-
-        if quantize:
-            # Apply quantization (simulated)
-            # In real implementation, would use quantize_model()
-            quantized_memory = original_memory / 4  # INT8 vs FP32
-            optimizations_applied.append(f"INT8 quantization (4× memory reduction)")
-            print("   Applied INT8 quantization")
-
-        if prune_sparsity > 0:
-            # Apply pruning (simulated)
-            # In real implementation, would use magnitude_prune()
-            remaining_weights = 1 - prune_sparsity
-            optimizations_applied.append(f"{prune_sparsity:.0%} pruning ({remaining_weights:.0%} weights remain)")
-            print(f"   Applied {prune_sparsity:.0%} magnitude pruning")
-
-        # Calculate final size
-        size_reduction = 1.0
-        if quantize:
-            size_reduction *= 0.25  # 4× smaller
-        if prune_sparsity > 0:
-            size_reduction *= (1 - prune_sparsity)
-
-        final_memory = original_memory * size_reduction
-        reduction_factor = original_memory / final_memory
-
-        print(f"🔧 Model optimization complete:")
-        print(f"   Original: {original_memory:.1f}MB")
-        print(f"   Optimized: {final_memory:.1f}MB")
-        print(f"   Reduction: {reduction_factor:.1f}× smaller")
-        print(f"   Applied: {', '.join(optimizations_applied)}")
-        ### END SOLUTION
-
-    def generate_text(self, prompt: str, max_tokens: int = 50) -> str:
-        """
-        Generate text using the trained model.
-
-        TODO: Implement text generation with proper encoding/decoding
-
-        APPROACH:
-        1. Encode prompt to token IDs
-        2. Use model.generate() for autoregressive generation
-        3. Decode generated tokens back to text
-        4. Return generated text
-
-        EXAMPLE:
-        >>> text = pipeline.generate_text("Hello", max_tokens=10)
-        >>> print(f"Generated: {text}")
-        Generated: Hello world this is AI
-        """
-        ### BEGIN SOLUTION
-        if not self.is_trained:
-            print("⚠️ Model not trained yet. Generating with random weights.")
-
-        # Encode prompt
-        prompt_tokens = self.tokenizer.encode(prompt)
-        prompt_tensor = Tensor([prompt_tokens])
-
-        # Generate tokens
-        generated_tokens = self.model.generate(
-            prompt_tensor,
-            max_new_tokens=max_tokens,
-            temperature=0.8,
-            use_cache=True
-        )
-
-        # Decode to text
-        all_tokens = generated_tokens.data[0].tolist()
-        generated_text = self.tokenizer.decode(all_tokens)
-
-        return generated_text
-        ### END SOLUTION
-
-def test_unit_complete_pipeline():
-    """🔬 Test complete pipeline integration."""
-    print("🔬 Unit Test: Complete Pipeline Integration...")
-
-    # Create pipeline
-    pipeline = CompleteTinyGPTPipeline(vocab_size=50, embed_dim=32, num_layers=2)
-
-    # Test data preparation
-    corpus = ["hello world", "ai is fun", "machine learning"]
-    dataloader = pipeline.prepare_training_data(corpus, batch_size=2)
-    assert len(dataloader) > 0, "DataLoader should have batches"
-
-    # Test training (minimal)
-    history = pipeline.train(dataloader, epochs=1)
-    assert 'losses' in history, "History should contain losses"
-    assert len(history['losses']) == 1, "Should have one epoch of losses"
-
-    # Test optimization
-    pipeline.optimize_model(quantize=True, prune_sparsity=0.5)
-
-    # Test generation
-    generated = pipeline.generate_text("hello", max_tokens=5)
-    assert isinstance(generated, str), "Generated output should be string"
-    assert len(generated) > 0, "Generated text should not be empty"
-
-    print(f"✅ Pipeline stages completed successfully")
-    print(f"✅ Training history: {len(history['losses'])} epochs")
-    print(f"✅ Generated text: '{generated[:20]}...'")
-    print("✅ Complete pipeline integration works!")
-
-# Run immediate test
-test_unit_complete_pipeline()
diff --git a/tinytorch/benchmarking/benchmark.py b/tinytorch/benchmarking/benchmark.py
index f6572c55..83b81eac 100644
--- a/tinytorch/benchmarking/benchmark.py
+++ b/tinytorch/benchmarking/benchmark.py
@@ -1,22 +1,8 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_benchmark/benchmark_dev.py           ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/19_benchmarking/benchmarking_dev.ipynb.
+
 # %% auto 0
 __all__ = ['OlympicEvent', 'Benchmark', 'test_unit_benchmark', 'BenchmarkSuite', 'test_unit_benchmark_suite', 'TinyMLPerf',
-           'test_unit_tinymlperf']
+           'test_unit_tinymlperf', 'calculate_normalized_scores']
 
 # %% ../../modules/source/19_benchmarking/benchmarking_dev.ipynb 0
 #| default_exp benchmarking.benchmark
@@ -72,7 +58,7 @@ class Benchmark:
         self.measurement_runs = measurement_runs
         self.results = {}
         
-        # Use Profiler from Module 14 for measurements
+        # Use Profiler from Module 15 for measurements
         self.profiler = Profiler()
 
         # System information for metadata
@@ -1024,3 +1010,53 @@ def test_unit_tinymlperf():
     print("✅ TinyMLPerf works correctly!")
 
 test_unit_tinymlperf()
+
+# %% ../../modules/source/19_benchmarking/benchmarking_dev.ipynb 24
+def calculate_normalized_scores(baseline_results: dict, 
+                                optimized_results: dict) -> dict:
+    """
+    Calculate normalized performance metrics for fair competition comparison.
+    
+    This function converts absolute measurements into relative improvements,
+    enabling fair comparison across different hardware platforms.
+    
+    Args:
+        baseline_results: Dict with keys: 'latency', 'memory', 'accuracy'
+        optimized_results: Dict with same keys as baseline_results
+        
+    Returns:
+        Dict with normalized metrics:
+        - speedup: Relative latency improvement (higher is better)
+        - compression_ratio: Relative memory reduction (higher is better)
+        - accuracy_delta: Absolute accuracy change (closer to 0 is better)
+        - efficiency_score: Combined metric balancing all factors
+        
+    Example:
+        >>> baseline = {'latency': 100.0, 'memory': 12.0, 'accuracy': 0.89}
+        >>> optimized = {'latency': 40.0, 'memory': 3.0, 'accuracy': 0.87}
+        >>> scores = calculate_normalized_scores(baseline, optimized)
+        >>> print(f"Speedup: {scores['speedup']:.2f}x")
+        Speedup: 2.50x
+    """
+    # Calculate speedup (higher is better)
+    speedup = baseline_results['latency'] / optimized_results['latency']
+    
+    # Calculate compression ratio (higher is better)
+    compression_ratio = baseline_results['memory'] / optimized_results['memory']
+    
+    # Calculate accuracy delta (closer to 0 is better, negative means degradation)
+    accuracy_delta = optimized_results['accuracy'] - baseline_results['accuracy']
+    
+    # Calculate efficiency score (combined metric)
+    # Penalize accuracy loss: the more accuracy you lose, the lower your score
+    accuracy_penalty = max(1.0, 1.0 - accuracy_delta) if accuracy_delta < 0 else 1.0
+    efficiency_score = (speedup * compression_ratio) / accuracy_penalty
+    
+    return {
+        'speedup': speedup,
+        'compression_ratio': compression_ratio,
+        'accuracy_delta': accuracy_delta,
+        'efficiency_score': efficiency_score,
+        'baseline': baseline_results.copy(),
+        'optimized': optimized_results.copy()
+    }
diff --git a/tinytorch/competition/submit.py b/tinytorch/competition/submit.py
index da8585d6..a1a9d6d7 100644
--- a/tinytorch/competition/submit.py
+++ b/tinytorch/competition/submit.py
@@ -1,22 +1,8 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_submit/submit_dev.py                 ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/20_competition/competition_dev.ipynb.
+
 # %% auto 0
 __all__ = ['validate_installation', 'load_baseline_model', 'generate_baseline', 'worked_example_optimization',
-           'optimize_for_competition', 'generate_submission']
+           'optimize_for_competition', 'validate_submission', 'generate_submission']
 
 # %% ../../modules/source/20_competition/competition_dev.ipynb 4
 import numpy as np
@@ -24,6 +10,8 @@ import json
 import time
 from pathlib import Path
 from typing import Dict, List, Tuple, Any, Optional
+from ..benchmarking.benchmark import Benchmark, calculate_normalized_scores
+from ..profiling.profiler import Profiler
 
 def validate_installation() -> Dict[str, bool]:
     """
@@ -362,31 +350,24 @@ def worked_example_optimization():
     return submission
 
 # %% ../../modules/source/20_competition/competition_dev.ipynb 10
-def optimize_for_competition(baseline_model, event: str = "all_around"):
+def optimize_for_competition(baseline_model, event: str = "all_around", division: str = "closed"):
     """
     🏅 YOUR COMPETITION ENTRY - IMPLEMENT YOUR STRATEGY HERE!
     
-    This is where you apply optimization techniques from Modules 14-18.
-    
-    Available techniques:
-    - Module 14: KV Caching (for transformers) - enable_kv_cache()
-    - Module 16: Acceleration (vectorization, fusion)
-    - Module 17: Quantization (INT8, INT4) - quantize_model()
-    - Module 18: Compression (pruning) - magnitude_prune()
-    
     Args:
-        baseline_model: The unoptimized model
-        event: Which Olympic event you're competing in
+        baseline_model: Starting model (use for Closed, optional for Open)
+        event: Category you're competing in
             - "latency_sprint": Minimize latency
             - "memory_challenge": Minimize memory
             - "accuracy_contest": Maximize accuracy
             - "all_around": Best balance
             - "extreme_push": Most aggressive
+        division: "closed" or "open" - which track you chose
     
     Returns:
         Your optimized model
     
-    Example:
+    🔒 CLOSED DIVISION Example:
         from tinytorch.optimization.quantization import quantize_model
         from tinytorch.optimization.compression import magnitude_prune
         
@@ -394,6 +375,15 @@ def optimize_for_competition(baseline_model, event: str = "all_around"):
         optimized = quantize_model(optimized, bits=8)
         optimized = magnitude_prune(optimized, sparsity=0.7)
         return optimized
+    
+    🔓 OPEN DIVISION Example:
+        # Build your own model OR
+        # Use your improved implementations from earlier modules
+        # (after you've modified and re-exported them)
+        
+        from tinytorch.models import YourCustomArchitecture
+        optimized = YourCustomArchitecture()
+        return optimized
     """
     
     print(f"🏅 YOUR OPTIMIZATION STRATEGY FOR: {event}")
@@ -438,74 +428,201 @@ def optimize_for_competition(baseline_model, event: str = "all_around"):
     
     return optimized_model
 
+#| export
+def validate_submission(submission: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Validate competition submission with sanity checks.
+    
+    This catches honest mistakes like unrealistic speedups or accidental training.
+    Honor code system - we trust but verify basic reasonableness.
+    
+    Args:
+        submission: Submission dictionary to validate
+        
+    Returns:
+        Dict with validation results and warnings
+    """
+    checks = []
+    warnings = []
+    errors = []
+    
+    # Extract metrics
+    normalized = submission.get("normalized_scores", {})
+    speedup = normalized.get("speedup", 1.0)
+    compression = normalized.get("compression_ratio", 1.0)
+    accuracy_delta = normalized.get("accuracy_delta", 0.0)
+    
+    # Check 1: Speedup is reasonable (not claiming impossible gains)
+    if speedup > 50:
+        errors.append(f"❌ Speedup {speedup:.1f}x seems unrealistic (>50x)")
+    elif speedup > 20:
+        warnings.append(f"⚠️  Speedup {speedup:.1f}x is very high - please verify measurements")
+    else:
+        checks.append(f"✅ Speedup {speedup:.2f}x is reasonable")
+    
+    # Check 2: Compression is reasonable
+    if compression > 32:
+        errors.append(f"❌ Compression {compression:.1f}x seems unrealistic (>32x)")
+    elif compression > 16:
+        warnings.append(f"⚠️  Compression {compression:.1f}x is very high - please verify")
+    else:
+        checks.append(f"✅ Compression {compression:.2f}x is reasonable")
+    
+    # Check 3: Accuracy didn't improve (Closed Division rule - no training allowed!)
+    division = submission.get("division", "closed")
+    if division == "closed" and accuracy_delta > 1.0:
+        errors.append(f"❌ Accuracy improved by {accuracy_delta:.1f}pp - did you accidentally train the model?")
+    elif accuracy_delta > 0.5:
+        warnings.append(f"⚠️  Accuracy improved by {accuracy_delta:.1f}pp - verify no training occurred")
+    else:
+        checks.append(f"✅ Accuracy change {accuracy_delta:+.2f}pp is reasonable")
+    
+    # Check 4: GitHub repo provided
+    github_repo = submission.get("github_repo", "")
+    if not github_repo or github_repo == "":
+        warnings.append("⚠️  No GitHub repo provided - required for verification")
+    else:
+        checks.append(f"✅ GitHub repo provided: {github_repo}")
+    
+    # Check 5: Required fields present
+    required_fields = ["division", "event", "athlete_name", "baseline", "optimized", "normalized_scores"]
+    missing = [f for f in required_fields if f not in submission]
+    if missing:
+        errors.append(f"❌ Missing required fields: {', '.join(missing)}")
+    else:
+        checks.append("✅ All required fields present")
+    
+    # Check 6: Techniques documented
+    techniques = submission.get("techniques_applied", [])
+    if not techniques or "TODO" in str(techniques):
+        warnings.append("⚠️  No optimization techniques listed")
+    else:
+        checks.append(f"✅ Techniques documented: {', '.join(techniques[:3])}...")
+    
+    return {
+        "valid": len(errors) == 0,
+        "checks": checks,
+        "warnings": warnings,
+        "errors": errors
+    }
+
+#| export
 def generate_submission(baseline_model, optimized_model, 
+                       division: str = "closed",
                        event: str = "all_around",
                        athlete_name: str = "YourName",
+                       github_repo: str = "",
                        techniques: List[str] = None) -> Dict[str, Any]:
     """
-    Generate standardized competition submission.
+    Generate standardized TinyMLPerf competition submission with normalized scoring.
     
     Args:
         baseline_model: Original unoptimized model
         optimized_model: Your optimized model
-        event: Olympic event name
-        athlete_name: Your name for leaderboard
-        techniques: List of techniques applied
+        division: "closed" or "open"
+        event: Competition category (latency_sprint, memory_challenge, all_around, etc.)
+        athlete_name: Your name for submission
+        github_repo: GitHub repository URL for code verification
+        techniques: List of optimization techniques applied
     
     Returns:
         Submission dictionary (will be saved as JSON)
     """
-    print("📤 Generating Competition Submission...")
+    print("📤 Generating TinyMLPerf Competition Submission...")
     print("=" * 70)
     
     # Get baseline metrics
     baseline_metrics = generate_baseline(quick=True)
     
-    # For demonstration, estimate optimized metrics
-    # In real competition, this would benchmark the actual optimized model
+    # Benchmark optimized model
     print("🔬 Benchmarking optimized model...")
     
-    # Placeholder: Students' actual optimizations would be measured here
+    # Use Profiler and Benchmark from Module 19
+    profiler = Profiler()
+    
+    # For demonstration, we'll use placeholder metrics
+    # In real competition, students would measure their actual optimized model
     optimized_metrics = {
-        "model": "Your_Optimized_Model",
-        "accuracy": 84.0,  # Measured
-        "latency_ms": 28.0,  # Measured
-        "memory_mb": 4.0,  # Measured
-        "parameters": 2000000,  # Measured
+        "model": getattr(optimized_model, 'name', 'Optimized_Model'),
+        "accuracy": 84.0,  # Would be measured with actual test set
+        "latency_ms": 28.0,  # Would be measured with profiler
+        "memory_mb": 4.0,  # Would be measured with profiler
+        "parameters": 2000000,  # Would be counted
     }
     
-    # Calculate improvements
-    improvements = {
-        "accuracy_change": optimized_metrics["accuracy"] - baseline_metrics["accuracy"],
-        "latency_speedup": baseline_metrics["latency_ms"] / optimized_metrics["latency_ms"],
-        "memory_reduction": baseline_metrics["memory_mb"] / optimized_metrics["memory_mb"],
+    # Calculate normalized scores using Module 19's function
+    baseline_for_norm = {
+        "latency": baseline_metrics["latency_ms"],
+        "memory": baseline_metrics["memory_mb"],
+        "accuracy": baseline_metrics["accuracy"]
     }
     
-    # Create submission
+    optimized_for_norm = {
+        "latency": optimized_metrics["latency_ms"],
+        "memory": optimized_metrics["memory_mb"],
+        "accuracy": optimized_metrics["accuracy"]
+    }
+    
+    normalized_scores = calculate_normalized_scores(baseline_for_norm, optimized_for_norm)
+    
+    # Create submission with all required fields
     submission = {
+        "division": division,
         "event": event,
         "athlete_name": athlete_name,
+        "github_repo": github_repo,
         "baseline": baseline_metrics,
         "optimized": optimized_metrics,
-        "improvements": improvements,
-        "techniques_applied": techniques or ["TODO: List your techniques"],
+        "normalized_scores": {
+            "speedup": normalized_scores["speedup"],
+            "compression_ratio": normalized_scores["compression_ratio"],
+            "accuracy_delta": normalized_scores["accuracy_delta"],
+            "efficiency_score": normalized_scores["efficiency_score"]
+        },
+        "techniques_applied": techniques or ["TODO: Document your optimization techniques"],
         "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+        "tinytorch_version": "0.1.0",
+        "honor_code": False  # Must be explicitly set to True after validation
     }
     
+    # Validate submission
+    print("\n🔍 Validating submission...")
+    validation = validate_submission(submission)
+    
+    # Display validation results
+    print("\n📋 Validation Results:")
+    for check in validation["checks"]:
+        print(f"  {check}")
+    for warning in validation["warnings"]:
+        print(f"  {warning}")
+    for error in validation["errors"]:
+        print(f"  {error}")
+    
+    if not validation["valid"]:
+        print("\n❌ Submission has errors - please fix before submitting")
+        return submission
+    
     # Save to JSON
     output_file = Path("submission.json")
     with open(output_file, "w") as f:
         json.dump(submission, f, indent=2)
     
-    print(f"✅ Submission saved to: {output_file}")
+    print(f"\n✅ Submission saved to: {output_file}")
     print()
-    print("📊 Your Results:")
-    print(f"  Event:           {event}")
-    print(f"  Accuracy:        {optimized_metrics['accuracy']:.1f}% (Δ {improvements['accuracy_change']:+.1f}pp)")
-    print(f"  Latency:         {optimized_metrics['latency_ms']:.1f}ms ({improvements['latency_speedup']:.2f}x faster)")
-    print(f"  Memory:          {optimized_metrics['memory_mb']:.2f}MB ({improvements['memory_reduction']:.2f}x smaller)")
+    print("📊 Your Normalized Scores (MLPerf-style):")
+    print(f"  Division:        {division.upper()}")
+    print(f"  Event:           {event.replace('_', ' ').title()}")
+    print(f"  Speedup:         {normalized_scores['speedup']:.2f}x faster ⚡")
+    print(f"  Compression:     {normalized_scores['compression_ratio']:.2f}x smaller 💾")
+    print(f"  Accuracy:        {optimized_metrics['accuracy']:.1f}% (Δ {normalized_scores['accuracy_delta']:+.2f}pp)")
+    print(f"  Efficiency:      {normalized_scores['efficiency_score']:.2f}")
+    print()
+    print("📤 Next Steps:")
+    print("  1. Verify all metrics are correct")
+    print("  2. Push your code to GitHub (if not done)")
+    print("  3. Run: tito submit submission.json")
+    print("     (This will validate and prepare final submission)")
     print()
-    print("📤 Upload submission.json to TorchPerf Olympics platform!")
     print("=" * 70)
     
     return submission
diff --git a/tinytorch/core/activations.py b/tinytorch/core/activations.py
index 849ad752..c7fcb702 100644
--- a/tinytorch/core/activations.py
+++ b/tinytorch/core/activations.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/03_activations/activations_dev.py       ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/02_activations/activations_dev.ipynb.
+
 # %% auto 0
 __all__ = ['Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax']
 
diff --git a/tinytorch/core/attention.py b/tinytorch/core/attention.py
index 14743a7b..fd17103a 100644
--- a/tinytorch/core/attention.py
+++ b/tinytorch/core/attention.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/07_attention/attention_dev.py           ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/12_attention/attention_dev.ipynb.
+
 # %% auto 0
 __all__ = ['scaled_dot_product_attention', 'MultiHeadAttention']
 
@@ -293,6 +279,10 @@ class MultiHeadAttention:
         return output
         ### END SOLUTION
 
+    def __call__(self, x: Tensor, mask: Optional[Tensor] = None) -> Tensor:
+        """Allows the attention layer to be called like a function."""
+        return self.forward(x, mask)
+
     def parameters(self) -> List[Tensor]:
         """
         Return all trainable parameters.
diff --git a/tinytorch/core/autograd.py b/tinytorch/core/autograd.py
index 1a71c287..ce0be66f 100644
--- a/tinytorch/core/autograd.py
+++ b/tinytorch/core/autograd.py
@@ -1,23 +1,8 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/09_autograd/autograd_dev.py             ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/05_autograd/autograd_dev.ipynb.
+
 # %% auto 0
-__all__ = ['Function', 'AddBackward', 'MulBackward', 'SubBackward', 'DivBackward', 'MatmulBackward', 'TransposeBackward',
-           'PermuteBackward', 'EmbeddingBackward', 'ReshapeBackward', 'SumBackward', 'ReLUBackward', 'SigmoidBackward',
-           'SoftmaxBackward', 'GELUBackward', 'MSEBackward', 'BCEBackward', 'CrossEntropyBackward', 'enable_autograd']
+__all__ = ['Function', 'AddBackward', 'MulBackward', 'MatmulBackward', 'SumBackward', 'ReLUBackward', 'SigmoidBackward',
+           'MSEBackward', 'BCEBackward', 'CrossEntropyBackward', 'enable_autograd']
 
 # %% ../../modules/source/05_autograd/autograd_dev.ipynb 1
 import numpy as np
@@ -164,66 +149,7 @@ class MulBackward(Function):
 
         return grad_a, grad_b
 
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 13
-class SubBackward(Function):
-    """
-    Gradient computation for tensor subtraction.
-    
-    **Mathematical Rule:** If z = a - b, then ∂z/∂a = 1 and ∂z/∂b = -1
-    """
-
-    def apply(self, grad_output):
-        """
-        Compute gradients for subtraction.
-        
-        Returns:
-            Tuple of (grad_a, grad_b) where grad_b is negated
-        """
-        a, b = self.saved_tensors
-        grad_a = grad_b = None
-
-        if isinstance(a, Tensor) and a.requires_grad:
-            grad_a = grad_output  # ∂(a-b)/∂a = 1
-
-        if isinstance(b, Tensor) and b.requires_grad:
-            grad_b = -grad_output  # ∂(a-b)/∂b = -1 (note the negative!)
-
-        return grad_a, grad_b
-
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 15
-class DivBackward(Function):
-    """
-    Gradient computation for tensor division.
-    
-    **Mathematical Rule:** If z = a / b, then:
-    - ∂z/∂a = 1/b
-    - ∂z/∂b = -a/b²
-    """
-
-    def apply(self, grad_output):
-        """
-        Compute gradients for division using quotient rule.
-        
-        Returns:
-            Tuple of (grad_a, grad_b)
-        """
-        a, b = self.saved_tensors
-        grad_a = grad_b = None
-
-        if isinstance(a, Tensor) and a.requires_grad:
-            # ∂(a/b)/∂a = 1/b
-            if isinstance(b, Tensor):
-                grad_a = grad_output / b.data
-            else:
-                grad_a = grad_output / b
-
-        if isinstance(b, Tensor) and b.requires_grad:
-            # ∂(a/b)/∂b = -a/b²
-            grad_b = -grad_output * a.data / (b.data ** 2)
-
-        return grad_a, grad_b
-
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 17
+# %% ../../modules/source/05_autograd/autograd_dev.ipynb 14
 class MatmulBackward(Function):
     """
     Gradient computation for matrix multiplication.
@@ -252,242 +178,21 @@ class MatmulBackward(Function):
         **Mathematical Foundation:**
         - ∂(A@B)/∂A = grad_output @ B.T
         - ∂(A@B)/∂B = A.T @ grad_output
-        
-        **Batched Operation:** For 3D+ tensors, we transpose only the last two
-        dimensions using np.swapaxes, preserving batch dimensions.
         """
         a, b = self.saved_tensors
         grad_a = grad_b = None
 
         # Gradient for first input: grad_output @ b.T
         if isinstance(a, Tensor) and a.requires_grad:
-            # For batched tensors, transpose only last two dims
-            if b.data.ndim >= 2:
-                b_T = np.swapaxes(b.data, -2, -1)
-            else:
-                b_T = b.data.T
-            grad_a = np.matmul(grad_output, b_T)
+            grad_a = np.dot(grad_output, b.data.T)
 
         # Gradient for second input: a.T @ grad_output
         if isinstance(b, Tensor) and b.requires_grad:
-            # For batched tensors, transpose only last two dims
-            if a.data.ndim >= 2:
-                a_T = np.swapaxes(a.data, -2, -1)
-            else:
-                a_T = a.data.T
-            grad_b = np.matmul(a_T, grad_output)
+            grad_b = np.dot(a.data.T, grad_output)
 
         return grad_a, grad_b
 
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 18
-class TransposeBackward(Function):
-    """
-    Gradient computation for transpose operation.
-    
-    **Mathematical Rule:** If Y = X.T, then:
-    - ∂Y/∂X = grad_Y.T
-    
-    **Key Insight:** The gradient of transpose is just transpose the gradient!
-    This is because transpose is a linear operation that just rearranges elements.
-    
-    **Applications:** Used in attention (K.T for scores), weight gradients (W.T),
-    and any operation that needs to swap matrix dimensions.
-    """
-
-    def __init__(self, tensor, dim0, dim1):
-        """
-        Args:
-            tensor: Input tensor
-            dim0: First dimension to swap (None for default)
-            dim1: Second dimension to swap (None for default)
-        """
-        super().__init__(tensor)
-        self.dim0 = dim0
-        self.dim1 = dim1
-
-    def apply(self, grad_output):
-        """
-        Compute gradient for transpose.
-        
-        Args:
-            grad_output: Gradient flowing backward from output
-            
-        Returns:
-            Tuple with single gradient for input tensor
-            
-        **Mathematical Foundation:**
-        - ∂(X.T)/∂X = grad_output.T
-        - Just transpose the gradient back!
-        """
-        x, = self.saved_tensors
-        grad_x = None
-
-        if isinstance(x, Tensor) and x.requires_grad:
-            # Transpose gradient using the same dims
-            if self.dim0 is None and self.dim1 is None:
-                # Default: transpose last two dimensions
-                if grad_output.ndim < 2:
-                    grad_x = grad_output.copy()
-                else:
-                    axes = list(range(grad_output.ndim))
-                    axes[-2], axes[-1] = axes[-1], axes[-2]
-                    grad_x = np.transpose(grad_output, axes)
-            else:
-                # Specific dimensions: swap them back
-                axes = list(range(grad_output.ndim))
-                axes[self.dim0], axes[self.dim1] = axes[self.dim1], axes[self.dim0]
-                grad_x = np.transpose(grad_output, axes)
-
-        return (grad_x,)
-
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 19
-class PermuteBackward(Function):
-    """
-    Gradient computation for arbitrary axis permutation (general transpose).
-    
-    **Mathematical Rule:** If Y = X.permute(axes), then:
-    - ∂Y/∂X = grad_Y.permute(inverse_axes)
-    
-    **Example:** If axes = (0, 2, 1, 3), the inverse is (0, 2, 1, 3) (self-inverse).
-    More generally, if axes = (2, 0, 1), the inverse is (1, 2, 0).
-    
-    **Key Insight:** To reverse a permutation, we need to know where each axis went.
-    If axis i went to position axes[i], then in the inverse, position axes[i] should go to i.
-    
-    **Applications:** Multi-head attention uses (0, 2, 1, 3) to rearrange heads.
-    """
-
-    def __init__(self, tensor, axes):
-        """
-        Args:
-            tensor: Input tensor
-            axes: Tuple of axis indices defining the permutation
-        """
-        super().__init__(tensor)
-        self.axes = axes
-        # Compute inverse permutation: if axes[i] = j, then inverse_axes[j] = i
-        self.inverse_axes = tuple(np.argsort(axes))
-
-    def apply(self, grad_output):
-        """
-        Compute gradient for permutation.
-        
-        The gradient is permuted back using the inverse permutation.
-        
-        **Mathematical Foundation:**
-        - ∂(X.permute(axes))/∂X = grad_output.permute(inverse_axes)
-        """
-        x, = self.saved_tensors
-        grad_x = None
-
-        if isinstance(x, Tensor) and x.requires_grad:
-            # Permute gradient back to original axis order
-            grad_x = np.transpose(grad_output, self.inverse_axes)
-
-        return (grad_x,)
-
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 20
-class EmbeddingBackward(Function):
-    """
-    Gradient computation for embedding lookup operation.
-    
-    **Mathematical Rule:** If Y = Embedding[indices], then:
-    - ∂Loss/∂Embedding[i] = sum of all gradients where index==i
-    
-    **Key Insight:** Embedding lookup is a gather operation. The backward
-    is a scatter operation that accumulates gradients to the embedding weights.
-    
-    **Applications:** Word embeddings, positional embeddings, token embeddings
-    in transformers.
-    """
-
-    def __init__(self, weight, indices):
-        """
-        Args:
-            weight: Embedding weight matrix
-            indices: Indices used for lookup
-        """
-        super().__init__(weight)
-        self.indices = indices
-
-    def apply(self, grad_output):
-        """
-        Compute gradient for embedding lookup.
-        
-        Args:
-            grad_output: Gradient flowing backward from output
-            
-        Returns:
-            Tuple with single gradient for weight tensor
-            
-        **Mathematical Foundation:**
-        - ∂(Embedding[indices])/∂Embedding = scatter gradients to selected rows
-        - Multiple indices can point to same embedding → gradients accumulate
-        """
-        weight, = self.saved_tensors
-        grad_weight = None
-
-        if isinstance(weight, Tensor) and weight.requires_grad:
-            # Initialize gradient with zeros
-            grad_weight = np.zeros_like(weight.data)
-            
-            # Scatter gradients back to embedding weights
-            # np.add.at accumulates gradients for repeated indices
-            indices_flat = self.indices.data.astype(int).flatten()
-            grad_output_reshaped = grad_output.reshape(-1, grad_output.shape[-1])
-            
-            np.add.at(grad_weight, indices_flat, grad_output_reshaped)
-
-        return (grad_weight,)
-
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 21
-class ReshapeBackward(Function):
-    """
-    Gradient computation for reshape operation.
-    
-    **Mathematical Rule:** If Y = X.reshape(new_shape), then:
-    - ∂Y/∂X = grad_Y.reshape(X.shape)
-    
-    **Key Insight:** Reshape just rearranges the same elements.
-    The gradient is simply reshaped back to the original shape!
-    
-    **Applications:** Flattening tensors for linear layers, reshaping
-    between convolutional and dense layers.
-    """
-
-    def __init__(self, tensor, original_shape):
-        """
-        Args:
-            tensor: Input tensor
-            original_shape: Shape before reshape
-        """
-        super().__init__(tensor)
-        self.original_shape = original_shape
-
-    def apply(self, grad_output):
-        """
-        Compute gradient for reshape.
-        
-        Args:
-            grad_output: Gradient flowing backward from output
-            
-        Returns:
-            Tuple with single gradient for input tensor
-            
-        **Mathematical Foundation:**
-        - ∂(X.reshape(...))/∂X = grad_output.reshape(X.shape)
-        - Just reshape the gradient back!
-        """
-        x, = self.saved_tensors
-        grad_x = None
-
-        if isinstance(x, Tensor) and x.requires_grad:
-            # Reshape gradient back to original shape
-            grad_x = grad_output.reshape(self.original_shape)
-
-        return (grad_x,)
-
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 23
+# %% ../../modules/source/05_autograd/autograd_dev.ipynb 16
 class SumBackward(Function):
     """
     Gradient computation for tensor sum.
@@ -521,7 +226,7 @@ class SumBackward(Function):
             return np.ones_like(tensor.data) * grad_output,
         return None,
 
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 28
+# %% ../../modules/source/05_autograd/autograd_dev.ipynb 23
 class ReLUBackward(Function):
     """
     Gradient computation for ReLU activation.
@@ -544,7 +249,7 @@ class ReLUBackward(Function):
             return grad_output * relu_grad,
         return None,
 
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 29
+# %% ../../modules/source/05_autograd/autograd_dev.ipynb 25
 class SigmoidBackward(Function):
     """
     Gradient computation for sigmoid activation.
@@ -574,101 +279,7 @@ class SigmoidBackward(Function):
             return grad_output * sigmoid_grad,
         return None,
 
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 30
-class SoftmaxBackward(Function):
-    """
-    Gradient computation for softmax activation.
-    
-    Softmax: softmax(x)[i] = exp(x[i]) / sum(exp(x))
-    Derivative: ∂softmax/∂x[i] = softmax[i] * (δ[i,j] - softmax[j])
-    
-    For gradient computation:
-    grad_x[i] = softmax[i] * (grad_y[i] - sum(grad_y * softmax))
-    
-    **Key Insight:** The gradient depends on all elements of softmax due to
-    the normalization, not just the element being differentiated.
-    """
-    
-    def __init__(self, input_tensor, output_tensor, dim=-1):
-        """
-        Initialize with input, output, and dimension.
-        
-        Args:
-            input_tensor: Original input to softmax
-            output_tensor: Output of softmax (needed for gradient)
-            dim: Dimension along which softmax was applied
-        """
-        super().__init__(input_tensor)
-        self.output_data = output_tensor.data
-        self.dim = dim
-    
-    def apply(self, grad_output):
-        """
-        Compute gradient for softmax.
-        
-        Mathematical formula:
-        ∂L/∂x[i] = softmax[i] * (∂L/∂y[i] - sum_j(∂L/∂y[j] * softmax[j]))
-        
-        This can be vectorized as:
-        grad_x = softmax * (grad_y - sum(grad_y * softmax, keepdims=True))
-        """
-        tensor, = self.saved_tensors
-        
-        if isinstance(tensor, Tensor) and tensor.requires_grad:
-            # Compute sum(grad_output * softmax) along the softmax dimension
-            sum_term = np.sum(grad_output * self.output_data, axis=self.dim, keepdims=True)
-            
-            # Softmax gradient: softmax * (grad_output - sum_term)
-            grad_x = self.output_data * (grad_output - sum_term)
-            
-            return (grad_x,)
-        return (None,)
-
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 31
-class GELUBackward(Function):
-    """
-    Gradient computation for GELU activation.
-    
-    GELU: f(x) = x * Φ(x) where Φ is the CDF of standard normal
-    Approximation: gelu(x) ≈ 0.5 * x * (1 + tanh(√(2/π) * (x + 0.044715 * x³)))
-    
-    **Key Insight:** GELU is smoother than ReLU, providing non-zero gradients
-    for negative values, which helps training deep networks.
-    """
-    
-    def __init__(self, input_tensor):
-        """Initialize with input tensor."""
-        super().__init__(input_tensor)
-    
-    def apply(self, grad_output):
-        """
-        Compute gradient for GELU.
-        
-        Mathematical formula (using approximation):
-        ∂gelu/∂x ≈ 0.5 * (1 + tanh(...)) + 0.5 * x * sech²(...) * (...)
-        
-        Simplified: We compute the derivative numerically or use the formula.
-        """
-        tensor, = self.saved_tensors
-        
-        if isinstance(tensor, Tensor) and tensor.requires_grad:
-            x = tensor.data
-            # GELU derivative approximation
-            # Using the tanh approximation: gelu(x) ≈ 0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^3)))
-            sqrt_2_over_pi = np.sqrt(2.0 / np.pi)
-            x_cubed = x ** 3
-            tanh_arg = sqrt_2_over_pi * (x + 0.044715 * x_cubed)
-            tanh_out = np.tanh(tanh_arg)
-            sech_squared = 1 - tanh_out ** 2
-            
-            # Derivative: 0.5 * (1 + tanh(...)) + 0.5 * x * sech²(...) * d(tanh_arg)/dx
-            d_tanh_arg = sqrt_2_over_pi * (1 + 0.134145 * x ** 2)
-            gelu_grad = 0.5 * (1 + tanh_out) + 0.5 * x * sech_squared * d_tanh_arg
-            
-            return (grad_output * gelu_grad,)
-        return (None,)
-
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 32
+# %% ../../modules/source/05_autograd/autograd_dev.ipynb 26
 class MSEBackward(Function):
     """
     Gradient computation for Mean Squared Error Loss.
@@ -694,7 +305,7 @@ class MSEBackward(Function):
             return grad * grad_output,
         return None,
 
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 33
+# %% ../../modules/source/05_autograd/autograd_dev.ipynb 27
 class BCEBackward(Function):
     """
     Gradient computation for Binary Cross-Entropy Loss.
@@ -724,7 +335,7 @@ class BCEBackward(Function):
             return grad * grad_output,
         return None,
 
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 34
+# %% ../../modules/source/05_autograd/autograd_dev.ipynb 28
 class CrossEntropyBackward(Function):
     """
     Gradient computation for Cross-Entropy Loss.
@@ -769,7 +380,7 @@ class CrossEntropyBackward(Function):
             return grad * grad_output,
         return None,
 
-# %% ../../modules/source/05_autograd/autograd_dev.ipynb 35
+# %% ../../modules/source/05_autograd/autograd_dev.ipynb 29
 def enable_autograd():
     """
     Enable gradient tracking for all Tensor operations.
@@ -806,12 +417,8 @@ def enable_autograd():
 
     # Store original operations
     _original_add = Tensor.__add__
-    _original_sub = Tensor.__sub__
     _original_mul = Tensor.__mul__
-    _original_div = Tensor.__truediv__
     _original_matmul = Tensor.matmul if hasattr(Tensor, 'matmul') else None
-    _original_transpose = Tensor.transpose if hasattr(Tensor, 'transpose') else None
-    _original_reshape = Tensor.reshape if hasattr(Tensor, 'reshape') else None
 
     # Enhanced operations that track gradients
     def tracked_add(self, other):
@@ -878,98 +485,6 @@ def enable_autograd():
 
         return result
 
-    def tracked_transpose(self, dim0=None, dim1=None):
-        """
-        Transpose with gradient tracking.
-        
-        Enhances the original transpose method to build computation graphs
-        when requires_grad=True for the input.
-        """
-        if _original_transpose:
-            result = _original_transpose(self, dim0, dim1)
-        else:
-            # Fallback if transpose doesn't exist
-            if dim0 is None and dim1 is None:
-                axes = list(range(len(self.shape)))
-                if len(axes) >= 2:
-                    axes[-2], axes[-1] = axes[-1], axes[-2]
-                result = Tensor(np.transpose(self.data, axes))
-            else:
-                axes = list(range(len(self.shape)))
-                axes[dim0], axes[dim1] = axes[dim1], axes[dim0]
-                result = Tensor(np.transpose(self.data, axes))
-
-        # Track gradient if needed
-        if self.requires_grad:
-            result.requires_grad = True
-            result._grad_fn = TransposeBackward(self, dim0, dim1)
-
-        return result
-
-    def tracked_reshape(self, *shape):
-        """
-        Reshape with gradient tracking.
-        
-        Enhances the original reshape method to build computation graphs
-        when requires_grad=True for the input.
-        """
-        original_shape = self.shape
-        
-        if _original_reshape:
-            result = _original_reshape(self, *shape)
-        else:
-            # Fallback if reshape doesn't exist
-            result = Tensor(self.data.reshape(*shape))
-
-        # Track gradient if needed
-        if self.requires_grad:
-            result.requires_grad = True
-            result._grad_fn = ReshapeBackward(self, original_shape)
-
-        return result
-
-    def tracked_sub(self, other):
-        """
-        Subtraction with gradient tracking.
-        
-        Enhances the original __sub__ method to build computation graphs
-        when requires_grad=True for any input.
-        """
-        # Convert scalar to Tensor if needed
-        if not isinstance(other, Tensor):
-            other = Tensor(other)
-
-        # Call original operation
-        result = _original_sub(self, other)
-
-        # Track gradient if needed
-        if self.requires_grad or other.requires_grad:
-            result.requires_grad = True
-            result._grad_fn = SubBackward(self, other)
-
-        return result
-
-    def tracked_div(self, other):
-        """
-        Division with gradient tracking.
-        
-        Enhances the original __truediv__ method to build computation graphs
-        when requires_grad=True for any input.
-        """
-        # Convert scalar to Tensor if needed
-        if not isinstance(other, Tensor):
-            other = Tensor(other)
-
-        # Call original operation
-        result = _original_div(self, other)
-
-        # Track gradient if needed
-        if self.requires_grad or other.requires_grad:
-            result.requires_grad = True
-            result._grad_fn = DivBackward(self, other)
-
-        return result
-
     def sum_op(self, axis=None, keepdims=False):
         """
         Sum operation with gradient tracking.
@@ -1058,26 +573,20 @@ def enable_autograd():
 
     # Install enhanced operations
     Tensor.__add__ = tracked_add
-    Tensor.__sub__ = tracked_sub
     Tensor.__mul__ = tracked_mul
-    Tensor.__truediv__ = tracked_div
     Tensor.matmul = tracked_matmul
-    Tensor.transpose = tracked_transpose
-    Tensor.reshape = tracked_reshape
     Tensor.sum = sum_op
     Tensor.backward = backward
     Tensor.zero_grad = zero_grad
 
     # Patch activations and losses to track gradients
     try:
-        from tinytorch.core.activations import Sigmoid, ReLU, Softmax, GELU
+        from tinytorch.core.activations import Sigmoid, ReLU
         from tinytorch.core.losses import BinaryCrossEntropyLoss, MSELoss, CrossEntropyLoss
         
         # Store original methods
         _original_sigmoid_forward = Sigmoid.forward
         _original_relu_forward = ReLU.forward
-        _original_softmax_forward = Softmax.forward
-        _original_gelu_forward = GELU.forward
         _original_bce_forward = BinaryCrossEntropyLoss.forward
         _original_mse_forward = MSELoss.forward
         _original_ce_forward = CrossEntropyLoss.forward
@@ -1104,30 +613,6 @@ def enable_autograd():
             
             return result
         
-        def tracked_softmax_forward(self, x, dim=-1):
-            """Softmax with gradient tracking."""
-            # Call original forward to get result using Tensor operations
-            result = _original_softmax_forward(self, x, dim=dim)
-            
-            # Attach the correct gradient function
-            if x.requires_grad:
-                result.requires_grad = True
-                result._grad_fn = SoftmaxBackward(x, result, dim)
-            
-            return result
-        
-        def tracked_gelu_forward(self, x):
-            """GELU with gradient tracking."""
-            # Call original forward to get result
-            result = _original_gelu_forward(self, x)
-            
-            # Attach the correct gradient function
-            if x.requires_grad:
-                result.requires_grad = True
-                result._grad_fn = GELUBackward(x)
-            
-            return result
-        
         def tracked_bce_forward(self, predictions, targets):
             """Binary cross-entropy with gradient tracking."""
             # Compute BCE loss
@@ -1187,8 +672,6 @@ def enable_autograd():
         # Install patched methods
         Sigmoid.forward = tracked_sigmoid_forward
         ReLU.forward = tracked_relu_forward
-        Softmax.forward = tracked_softmax_forward
-        GELU.forward = tracked_gelu_forward
         BinaryCrossEntropyLoss.forward = tracked_bce_forward
         MSELoss.forward = tracked_mse_forward
         CrossEntropyLoss.forward = tracked_ce_forward
diff --git a/tinytorch/core/layers.py b/tinytorch/core/layers.py
index 8b142253..1289ad68 100644
--- a/tinytorch/core/layers.py
+++ b/tinytorch/core/layers.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/04_layers/layers_dev.py                 ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/03_layers/layers_dev.ipynb.
+
 # %% auto 0
 __all__ = ['Linear', 'Dropout']
 
diff --git a/tinytorch/core/losses.py b/tinytorch/core/losses.py
index dd12532a..8f4369ba 100644
--- a/tinytorch/core/losses.py
+++ b/tinytorch/core/losses.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_losses/losses_dev.py                 ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_losses/losses_dev.ipynb.
+
 # %% auto 0
 __all__ = ['import_previous_module', 'log_softmax', 'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss']
 
diff --git a/tinytorch/core/optimizers.py b/tinytorch/core/optimizers.py
index 314a8db9..6a4a8ecd 100644
--- a/tinytorch/core/optimizers.py
+++ b/tinytorch/core/optimizers.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/10_optimizers/optimizers_dev.py         ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/06_optimizers/optimizers_dev.ipynb.
+
 # %% auto 0
 __all__ = ['Optimizer', 'SGD', 'Adam', 'AdamW']
 
diff --git a/tinytorch/core/spatial.py b/tinytorch/core/spatial.py
index 0c64c1b3..928fd8c7 100644
--- a/tinytorch/core/spatial.py
+++ b/tinytorch/core/spatial.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/06_spatial/spatial_dev.py               ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/09_spatial/spatial_dev.ipynb.
+
 # %% auto 0
 __all__ = ['Conv2d', 'MaxPool2d', 'AvgPool2d', 'SimpleCNN']
 
diff --git a/tinytorch/core/tensor.py b/tinytorch/core/tensor.py
index 82e681fa..dfd03466 100644
--- a/tinytorch/core/tensor.py
+++ b/tinytorch/core/tensor.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/02_tensor/tensor_dev.py                 ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/01_tensor/tensor_dev.ipynb.
+
 # %% auto 0
 __all__ = ['Tensor']
 
diff --git a/tinytorch/core/training.py b/tinytorch/core/training.py
index e4082b8f..dd393f81 100644
--- a/tinytorch/core/training.py
+++ b/tinytorch/core/training.py
@@ -1,21 +1,7 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/11_training/training_dev.py             ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/07_training/training_dev.ipynb.
+
 # %% auto 0
-__all__ = ['CosineSchedule', 'Trainer']
+__all__ = ['CosineSchedule', 'save_checkpoint', 'load_checkpoint', 'Trainer']
 
 # %% ../../modules/source/07_training/training_dev.ipynb 1
 import numpy as np
@@ -72,6 +58,90 @@ class CosineSchedule:
     ### END SOLUTION
 
 # %% ../../modules/source/07_training/training_dev.ipynb 14
+def save_checkpoint(checkpoint_dict: Dict[str, Any], path: str):
+    """
+    Save checkpoint dictionary to disk using pickle.
+    
+    This is a low-level utility for saving model state. Use this when you have
+    a custom training loop and want to save just what you need (model params,
+    config, metadata).
+    
+    For complete training state with optimizer and scheduler, use 
+    Trainer.save_checkpoint() instead.
+    
+    TODO: Implement checkpoint saving with pickle
+    
+    APPROACH:
+    1. Create parent directory if it doesn't exist (Path(path).parent.mkdir)
+    2. Open file in binary write mode ('wb')
+    3. Use pickle.dump() to serialize the checkpoint dictionary
+    4. Print confirmation message
+    
+    EXAMPLE:
+    >>> model = SimpleModel()
+    >>> checkpoint = {
+    ...     'model_params': [p.data.copy() for p in model.parameters()],
+    ...     'config': {'embed_dim': 32, 'num_layers': 2},
+    ...     'metadata': {'final_loss': 0.089, 'training_steps': 5000}
+    ... }
+    >>> save_checkpoint(checkpoint, 'checkpoints/model.pkl')
+    ✓ Checkpoint saved: checkpoints/model.pkl
+    
+    HINTS:
+    - Use Path(path).parent.mkdir(parents=True, exist_ok=True)
+    - pickle.dump(obj, file) writes the object to file
+    - Always print a success message so users know it worked
+    """
+    ### BEGIN SOLUTION
+    # Create parent directory if needed
+    Path(path).parent.mkdir(parents=True, exist_ok=True)
+    
+    # Save checkpoint using pickle
+    with open(path, 'wb') as f:
+        pickle.dump(checkpoint_dict, f)
+    
+    print(f"✓ Checkpoint saved: {path}")
+    ### END SOLUTION
+
+# %% ../../modules/source/07_training/training_dev.ipynb 15
+def load_checkpoint(path: str) -> Dict[str, Any]:
+    """
+    Load checkpoint dictionary from disk using pickle.
+    
+    Companion function to save_checkpoint(). Restores the checkpoint dictionary
+    so you can rebuild your model, resume training, or inspect saved metadata.
+    
+    TODO: Implement checkpoint loading with pickle
+    
+    APPROACH:
+    1. Open file in binary read mode ('rb')
+    2. Use pickle.load() to deserialize the checkpoint
+    3. Print confirmation message
+    4. Return the loaded dictionary
+    
+    EXAMPLE:
+    >>> checkpoint = load_checkpoint('checkpoints/model.pkl')
+    ✓ Checkpoint loaded: checkpoints/model.pkl
+    >>> print(checkpoint['metadata']['final_loss'])
+    0.089
+    >>> model_params = checkpoint['model_params']
+    >>> # Now restore model: for param, data in zip(model.parameters(), model_params)...
+    
+    HINTS:
+    - pickle.load(file) reads and deserializes the object
+    - Return the loaded dictionary
+    - Print a success message for user feedback
+    """
+    ### BEGIN SOLUTION
+    # Load checkpoint using pickle
+    with open(path, 'rb') as f:
+        checkpoint = pickle.load(f)
+    
+    print(f"✓ Checkpoint loaded: {path}")
+    return checkpoint
+    ### END SOLUTION
+
+# %% ../../modules/source/07_training/training_dev.ipynb 19
 class Trainer:
     """
     Complete training orchestrator for neural networks.
@@ -246,6 +316,11 @@ class Trainer:
     def save_checkpoint(self, path: str):
         """
         Save complete training state for resumption.
+        
+        This high-level method saves everything needed to resume training:
+        model parameters, optimizer state, scheduler state, and training history.
+        
+        Uses the low-level save_checkpoint() function internally.
 
         Args:
             path: File path to save checkpoint
@@ -260,19 +335,23 @@ class Trainer:
             'training_mode': self.training_mode
         }
 
-        Path(path).parent.mkdir(parents=True, exist_ok=True)
-        with open(path, 'wb') as f:
-            pickle.dump(checkpoint, f)
+        # Use the standalone save_checkpoint function
+        save_checkpoint(checkpoint, path)
 
     def load_checkpoint(self, path: str):
         """
         Load training state from checkpoint.
+        
+        This high-level method restores complete training state including
+        model parameters, optimizer state, scheduler state, and history.
+        
+        Uses the low-level load_checkpoint() function internally.
 
         Args:
             path: File path to load checkpoint from
         """
-        with open(path, 'rb') as f:
-            checkpoint = pickle.load(f)
+        # Use the standalone load_checkpoint function
+        checkpoint = load_checkpoint(path)
 
         self.epoch = checkpoint['epoch']
         self.step = checkpoint['step']
diff --git a/tinytorch/data/loader.py b/tinytorch/data/loader.py
index 1018e73f..09ea90a2 100644
--- a/tinytorch/data/loader.py
+++ b/tinytorch/data/loader.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_loader/loader_dev.py                 ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/08_dataloader/dataloader_dev.ipynb.
+
 # %% auto 0
 __all__ = ['Dataset', 'TensorDataset', 'DataLoader']
 
diff --git a/tinytorch/generation/kv_cache.py b/tinytorch/generation/kv_cache.py
index 55d8504b..f6f411a6 100644
--- a/tinytorch/generation/kv_cache.py
+++ b/tinytorch/generation/kv_cache.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_kv_cache/kv_cache_dev.py             ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/15_memoization/memoization_dev.ipynb.
+
 # %% auto 0
 __all__ = ['KVCache', 'enable_kv_cache', 'disable_kv_cache']
 
diff --git a/tinytorch/models/transformer.py b/tinytorch/models/transformer.py
index 4bf34131..0fdd20ea 100644
--- a/tinytorch/models/transformer.py
+++ b/tinytorch/models/transformer.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_transformer/transformer_dev.py       ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/13_transformers/transformers_dev.ipynb.
+
 # %% auto 0
 __all__ = ['LayerNorm', 'MLP', 'TransformerBlock', 'GPT']
 
@@ -23,7 +9,6 @@ from ..core.tensor import Tensor
 from ..core.layers import Linear
 from ..core.attention import MultiHeadAttention
 from ..core.activations import GELU
-from ..text.embeddings import Embedding, PositionalEncoding
 
 # %% ../../modules/source/13_transformers/transformers_dev.ipynb 9
 class LayerNorm:
@@ -61,6 +46,7 @@ class LayerNorm:
         self.eps = eps
 
         # Learnable parameters: scale and shift
+        # CRITICAL: requires_grad=True so optimizer can train these!
         self.gamma = Tensor(np.ones(normalized_shape), requires_grad=True)  # Scale parameter
         self.beta = Tensor(np.zeros(normalized_shape), requires_grad=True)  # Shift parameter
         ### END SOLUTION
@@ -83,19 +69,18 @@ class LayerNorm:
         HINT: Use keepdims=True to maintain tensor dimensions for broadcasting
         """
         ### BEGIN SOLUTION
+        # CRITICAL: Use Tensor operations (not .data) to maintain gradient flow!
         # Compute statistics across last dimension (features)
         mean = x.mean(axis=-1, keepdims=True)
 
         # Compute variance: E[(x - μ)²]
-        # Use Tensor operations to preserve computation graph!
-        diff = x - mean
-        variance = (diff * diff).mean(axis=-1, keepdims=True)
+        diff = x - mean  # Tensor subtraction maintains gradient
+        variance = (diff * diff).mean(axis=-1, keepdims=True)  # Tensor ops maintain gradient
 
-        # Normalize - use Tensor operations to preserve gradients!
-        # Add eps as a Tensor for proper gradient flow
-        eps_tensor = Tensor(np.array(self.eps), requires_grad=False)
-        std = Tensor(np.sqrt(variance.data + self.eps), requires_grad=variance.requires_grad)
-        normalized = (x - mean) / std
+        # Normalize: (x - mean) / sqrt(variance + eps)
+        # Note: sqrt and division need to preserve gradient flow
+        std_data = np.sqrt(variance.data + self.eps)
+        normalized = diff * Tensor(1.0 / std_data)  # Scale by reciprocal to maintain gradient
 
         # Apply learnable transformation
         output = normalized * self.gamma + self.beta
@@ -103,7 +88,7 @@ class LayerNorm:
         ### END SOLUTION
 
     def __call__(self, x):
-        """Allows the layer norm to be called like a function."""
+        """Allows the layer to be called like a function."""
         return self.forward(x)
 
     def parameters(self):
@@ -147,7 +132,7 @@ class MLP:
 
         # Two-layer feed-forward network
         self.linear1 = Linear(embed_dim, hidden_dim)
-        self.gelu = GELU()  # Use GELU activation from activations module
+        self.gelu = GELU()
         self.linear2 = Linear(hidden_dim, embed_dim)
         ### END SOLUTION
 
@@ -171,7 +156,7 @@ class MLP:
         # First linear layer with expansion
         hidden = self.linear1.forward(x)
 
-        # GELU activation (YOUR activation from Module 03!)
+        # GELU activation
         hidden = self.gelu.forward(hidden)
 
         # Second linear layer back to original size
@@ -404,10 +389,6 @@ class GPT:
         return logits
         ### END SOLUTION
 
-    def __call__(self, tokens):
-        """Allows the GPT model to be called like a function."""
-        return self.forward(tokens)
-
     def _create_causal_mask(self, seq_len):
         """Create causal mask to prevent attending to future positions."""
         ### BEGIN SOLUTION
diff --git a/tinytorch/optimization/acceleration.py b/tinytorch/optimization/acceleration.py
index e59fe00f..fd53282e 100644
--- a/tinytorch/optimization/acceleration.py
+++ b/tinytorch/optimization/acceleration.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_acceleration/acceleration_dev.py     ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/18_acceleration/acceleration_dev.ipynb.
+
 # %% auto 0
 __all__ = []
 
diff --git a/tinytorch/optimization/compression.py b/tinytorch/optimization/compression.py
index 20c318fa..7f43ee68 100644
--- a/tinytorch/optimization/compression.py
+++ b/tinytorch/optimization/compression.py
@@ -1,22 +1,7 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_compression/compression_dev.py       ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/17_compression/compression_dev.ipynb.
+
 # %% auto 0
-__all__ = ['Sequential', 'KnowledgeDistillation', 'test_unit_knowledge_distillation', 'CompressionComplete', 'measure_sparsity',
-           'magnitude_prune', 'structured_prune', 'compress_model']
+__all__ = ['Tensor', 'Linear', 'Sequential']
 
 # %% ../../modules/source/17_compression/compression_dev.ipynb 1
 import numpy as np
@@ -24,277 +9,77 @@ import copy
 from typing import List, Dict, Any, Tuple, Optional
 import time
 
-# Import from TinyTorch modules
-from ..core.tensor import Tensor
-from ..core.layers import Linear
+# Import from previous modules
+# Note: In the full package, these would be imports like:
+# from tinytorch.core.tensor import Tensor
+# from tinytorch.core.layers import Linear
+# For development, we'll create minimal implementations
+
+class Tensor:
+    """Minimal Tensor class for compression development - imports from Module 01 in practice."""
+    def __init__(self, data, requires_grad=False):
+        self.data = np.array(data)
+        self.shape = self.data.shape
+        self.size = self.data.size
+        self.requires_grad = requires_grad
+        self.grad = None
+
+    def __add__(self, other):
+        if isinstance(other, Tensor):
+            return Tensor(self.data + other.data)
+        return Tensor(self.data + other)
+
+    def __mul__(self, other):
+        if isinstance(other, Tensor):
+            return Tensor(self.data * other.data)
+        return Tensor(self.data * other)
+
+    def matmul(self, other):
+        return Tensor(np.dot(self.data, other.data))
+
+    def abs(self):
+        return Tensor(np.abs(self.data))
+
+    def sum(self, axis=None):
+        return Tensor(self.data.sum(axis=axis))
+
+    def __repr__(self):
+        return f"Tensor(shape={self.shape})"
+
+class Linear:
+    """Minimal Linear layer for compression development - imports from Module 03 in practice."""
+    def __init__(self, in_features, out_features, bias=True):
+        self.in_features = in_features
+        self.out_features = out_features
+        # Initialize with He initialization
+        self.weight = Tensor(np.random.randn(in_features, out_features) * np.sqrt(2.0 / in_features))
+        self.bias = Tensor(np.zeros(out_features)) if bias else None
+
+    def forward(self, x):
+        output = x.matmul(self.weight)
+        if self.bias is not None:
+            output = output + self.bias
+        return output
+
+    def parameters(self):
+        params = [self.weight]
+        if self.bias is not None:
+            params.append(self.bias)
+        return params
 
-# Sequential container for model compression
 class Sequential:
-    """Sequential container for compression (not exported from core layers)."""
+    """Minimal Sequential container for model compression."""
     def __init__(self, *layers):
         self.layers = list(layers)
 
     def forward(self, x):
         for layer in self.layers:
-            x = layer.forward(x) if hasattr(layer, 'forward') else layer(x)
+            x = layer.forward(x)
         return x
 
-    def __call__(self, x):
-        return self.forward(x)
-
     def parameters(self):
         params = []
         for layer in self.layers:
             if hasattr(layer, 'parameters'):
                 params.extend(layer.parameters())
         return params
-
-# %% ../../modules/source/17_compression/compression_dev.ipynb 15
-class KnowledgeDistillation:
-    """
-    Knowledge distillation for model compression.
-
-    Train a smaller student model to mimic a larger teacher model.
-    """
-
-    def __init__(self, teacher_model, student_model, temperature=3.0, alpha=0.7):
-        """
-        Initialize knowledge distillation.
-
-        TODO: Set up teacher and student models with distillation parameters
-
-        APPROACH:
-        1. Store teacher and student models
-        2. Set temperature for softening probability distributions
-        3. Set alpha for balancing hard vs soft targets
-
-        EXAMPLE:
-        >>> teacher = Sequential(Linear(100, 200), Linear(200, 50))
-        >>> student = Sequential(Linear(100, 50))
-        >>> kd = KnowledgeDistillation(teacher, student, temperature=4.0, alpha=0.8)
-        >>> print(f"Temperature: {kd.temperature}, Alpha: {kd.alpha}")
-        Temperature: 4.0, Alpha: 0.8
-
-        HINTS:
-        - Simply assign the parameters to instance variables
-        - Temperature typically ranges from 3-5 for effective softening
-        - Alpha of 0.7 means 70% soft targets, 30% hard targets
-
-        Args:
-            teacher_model: Large, pre-trained model
-            student_model: Smaller model to train
-            temperature: Softening parameter for distributions
-            alpha: Weight for soft target loss (1-alpha for hard targets)
-        """
-        ### BEGIN SOLUTION
-        self.teacher_model = teacher_model
-        self.student_model = student_model
-        self.temperature = temperature
-        self.alpha = alpha
-        ### END SOLUTION
-
-    def distillation_loss(self, student_logits, teacher_logits, true_labels):
-        """
-        Calculate combined distillation loss.
-
-        TODO: Implement knowledge distillation loss function
-
-        APPROACH:
-        1. Calculate hard target loss (student vs true labels)
-        2. Calculate soft target loss (student vs teacher, with temperature)
-        3. Combine losses: alpha * soft_loss + (1-alpha) * hard_loss
-
-        EXAMPLE:
-        >>> kd = KnowledgeDistillation(teacher, student)
-        >>> loss = kd.distillation_loss(student_out, teacher_out, labels)
-        >>> print(f"Distillation loss: {loss:.4f}")
-
-        HINTS:
-        - Use temperature to soften distributions: logits/temperature
-        - Soft targets use KL divergence or cross-entropy
-        - Hard targets use standard classification loss
-        """
-        ### BEGIN SOLUTION
-        # Convert to numpy for this implementation
-        if hasattr(student_logits, 'data'):
-            student_logits = student_logits.data
-        if hasattr(teacher_logits, 'data'):
-            teacher_logits = teacher_logits.data
-        if hasattr(true_labels, 'data'):
-            true_labels = true_labels.data
-
-        # Soften distributions with temperature
-        student_soft = self._softmax(student_logits / self.temperature)
-        teacher_soft = self._softmax(teacher_logits / self.temperature)
-
-        # Soft target loss (KL divergence)
-        soft_loss = self._kl_divergence(student_soft, teacher_soft)
-
-        # Hard target loss (cross-entropy)
-        student_hard = self._softmax(student_logits)
-        hard_loss = self._cross_entropy(student_hard, true_labels)
-
-        # Combined loss
-        total_loss = self.alpha * soft_loss + (1 - self.alpha) * hard_loss
-
-        return total_loss
-        ### END SOLUTION
-
-    def _softmax(self, logits):
-        """Compute softmax with numerical stability."""
-        exp_logits = np.exp(logits - np.max(logits, axis=-1, keepdims=True))
-        return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
-
-    def _kl_divergence(self, p, q):
-        """Compute KL divergence between distributions."""
-        return np.sum(p * np.log(p / (q + 1e-8) + 1e-8))
-
-    def _cross_entropy(self, predictions, labels):
-        """Compute cross-entropy loss."""
-        # Simple implementation for integer labels
-        if labels.ndim == 1:
-            return -np.mean(np.log(predictions[np.arange(len(labels)), labels] + 1e-8))
-        else:
-            return -np.mean(np.sum(labels * np.log(predictions + 1e-8), axis=1))
-
-def test_unit_knowledge_distillation():
-    """🔬 Test knowledge distillation functionality."""
-    print("🔬 Unit Test: Knowledge Distillation...")
-
-    # Create teacher and student models
-    teacher = Sequential(Linear(10, 20), Linear(20, 5))
-    student = Sequential(Linear(10, 5))  # Smaller model
-
-    # Initialize knowledge distillation
-    kd = KnowledgeDistillation(teacher, student, temperature=3.0, alpha=0.7)
-
-    # Create dummy data
-    input_data = Tensor(np.random.randn(8, 10))  # Batch of 8
-    true_labels = np.array([0, 1, 2, 3, 4, 0, 1, 2])  # Class labels
-
-    # Forward passes
-    teacher_output = teacher.forward(input_data)
-    student_output = student.forward(input_data)
-
-    # Calculate distillation loss
-    loss = kd.distillation_loss(student_output, teacher_output, true_labels)
-
-    # Verify loss is reasonable
-    assert isinstance(loss, (float, np.floating)), f"Loss should be float, got {type(loss)}"
-    assert loss > 0, f"Loss should be positive, got {loss}"
-    assert not np.isnan(loss), "Loss should not be NaN"
-
-    print("✅ knowledge_distillation works correctly!")
-
-test_unit_knowledge_distillation()
-
-# %% ../../modules/source/17_compression/compression_dev.ipynb 29
-class CompressionComplete:
-    """
-    Complete compression system for milestone use.
-    
-    Provides pruning, distillation, and low-rank approximation techniques.
-    """
-    
-    @staticmethod
-    def measure_sparsity(model) -> float:
-        """Measure the sparsity of a model (fraction of zero weights)."""
-        total_params = 0
-        zero_params = 0
-        
-        if hasattr(model, 'parameters'):
-            for param in model.parameters():
-                total_params += param.size
-                zero_params += np.sum(param.data == 0)
-        
-        return zero_params / total_params if total_params > 0 else 0.0
-    
-    @staticmethod
-    def magnitude_prune(model, sparsity=0.5):
-        """
-        Prune model weights by magnitude (smallest weights set to zero).
-        
-        Args:
-            model: Model with parameters() method
-            sparsity: Fraction of weights to prune (0-1)
-        """
-        if hasattr(model, 'parameters'):
-            for param in model.parameters():
-                threshold = np.percentile(np.abs(param.data), sparsity * 100)
-                param.data[np.abs(param.data) < threshold] = 0
-        
-        return model
-    
-    @staticmethod
-    def structured_prune(model, prune_ratio=0.5):
-        """
-        Prune entire neurons/channels (structured pruning).
-        
-        Args:
-            model: Model to prune
-            prune_ratio: Fraction of structures to prune (0-1)
-        """
-        if hasattr(model, 'parameters'):
-            params = list(model.parameters())
-            if len(params) > 0 and hasattr(params[0], 'data'):
-                weight = params[0]
-                if len(weight.shape) == 2:  # Linear layer
-                    # Prune output neurons
-                    neuron_norms = np.linalg.norm(weight.data, axis=0)
-                    threshold = np.percentile(neuron_norms, prune_ratio * 100)
-                    mask = neuron_norms >= threshold
-                    weight.data[:, ~mask] = 0
-        
-        return model
-    
-    @staticmethod
-    def compress_model(model, compression_config: Dict[str, Any]):
-        """
-        Apply complete compression pipeline to a model.
-        
-        Args:
-            model: Model to compress
-            compression_config: Dictionary with compression settings
-                - 'magnitude_sparsity': float (0-1)
-                - 'structured_prune_ratio': float (0-1)
-        
-        Returns:
-            Compressed model with sparsity stats
-        """
-        stats = {
-            'original_sparsity': CompressionComplete.measure_sparsity(model)
-        }
-        
-        # Apply magnitude pruning
-        if 'magnitude_sparsity' in compression_config:
-            model = CompressionComplete.magnitude_prune(
-                model, compression_config['magnitude_sparsity']
-            )
-        
-        # Apply structured pruning
-        if 'structured_prune_ratio' in compression_config:
-            model = CompressionComplete.structured_prune(
-                model, compression_config['structured_prune_ratio']
-            )
-        
-        stats['final_sparsity'] = CompressionComplete.measure_sparsity(model)
-        stats['compression_ratio'] = 1.0 / (1.0 - stats['final_sparsity']) if stats['final_sparsity'] < 1.0 else float('inf')
-        
-        return model, stats
-
-# Convenience functions for backward compatibility
-def measure_sparsity(model) -> float:
-    """Measure model sparsity."""
-    return CompressionComplete.measure_sparsity(model)
-
-def magnitude_prune(model, sparsity=0.5):
-    """Apply magnitude-based pruning."""
-    return CompressionComplete.magnitude_prune(model, sparsity)
-
-def structured_prune(model, prune_ratio=0.5):
-    """Apply structured pruning."""
-    return CompressionComplete.structured_prune(model, prune_ratio)
-
-def compress_model(model, compression_config: Dict[str, Any]):
-    """Apply complete compression pipeline."""
-    return CompressionComplete.compress_model(model, compression_config)
diff --git a/tinytorch/optimization/quantization.py b/tinytorch/optimization/quantization.py
index c30509d3..872b359f 100644
--- a/tinytorch/optimization/quantization.py
+++ b/tinytorch/optimization/quantization.py
@@ -1,21 +1,7 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_quantization/quantization_dev.py     ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/16_quantization/quantization_dev.ipynb.
+
 # %% auto 0
-__all__ = []
+__all__ = ['QuantizationComplete', 'quantize_int8', 'dequantize_int8', 'quantize_model']
 
 # %% ../../modules/source/16_quantization/quantization_dev.ipynb 3
 import numpy as np
@@ -29,3 +15,94 @@ from ..core.layers import Linear
 from ..core.activations import ReLU
 
 print("✅ Quantization module imports complete")
+
+# %% ../../modules/source/16_quantization/quantization_dev.ipynb 34
+class QuantizationComplete:
+    """
+    Complete quantization system for milestone use.
+    
+    Provides INT8 quantization with calibration for 4× memory reduction.
+    """
+    
+    @staticmethod
+    def quantize_tensor(tensor: Tensor) -> Tuple[Tensor, float, int]:
+        """Quantize FP32 tensor to INT8."""
+        data = tensor.data
+        min_val = float(np.min(data))
+        max_val = float(np.max(data))
+        
+        if abs(max_val - min_val) < 1e-8:
+            return Tensor(np.zeros_like(data, dtype=np.int8)), 1.0, 0
+        
+        scale = (max_val - min_val) / 255.0
+        zero_point = int(np.round(-128 - min_val / scale))
+        zero_point = int(np.clip(zero_point, -128, 127))
+        
+        quantized_data = np.round(data / scale + zero_point)
+        quantized_data = np.clip(quantized_data, -128, 127).astype(np.int8)
+        
+        return Tensor(quantized_data), scale, zero_point
+    
+    @staticmethod
+    def dequantize_tensor(q_tensor: Tensor, scale: float, zero_point: int) -> Tensor:
+        """Dequantize INT8 tensor back to FP32."""
+        dequantized_data = (q_tensor.data.astype(np.float32) - zero_point) * scale
+        return Tensor(dequantized_data)
+    
+    @staticmethod
+    def quantize_model(model, calibration_data: Optional[List[Tensor]] = None) -> Dict[str, any]:
+        """
+        Quantize all Linear layers in a model.
+        
+        Returns dictionary with quantization info and memory savings.
+        """
+        quantized_layers = {}
+        original_size = 0
+        quantized_size = 0
+        
+        # Iterate through model parameters
+        if hasattr(model, 'parameters'):
+            for i, param in enumerate(model.parameters()):
+                param_size = param.data.nbytes
+                original_size += param_size
+                
+                # Quantize parameter
+                q_param, scale, zp = QuantizationComplete.quantize_tensor(param)
+                quantized_size += q_param.data.nbytes
+                
+                quantized_layers[f'param_{i}'] = {
+                    'quantized': q_param,
+                    'scale': scale,
+                    'zero_point': zp,
+                    'original_shape': param.data.shape
+                }
+        
+        return {
+            'quantized_layers': quantized_layers,
+            'original_size_mb': original_size / (1024 * 1024),
+            'quantized_size_mb': quantized_size / (1024 * 1024),
+            'compression_ratio': original_size / quantized_size if quantized_size > 0 else 1.0
+        }
+    
+    @staticmethod
+    def compare_models(original_model, quantized_info: Dict) -> Dict[str, float]:
+        """Compare memory usage between original and quantized models."""
+        return {
+            'original_mb': quantized_info['original_size_mb'],
+            'quantized_mb': quantized_info['quantized_size_mb'],
+            'compression_ratio': quantized_info['compression_ratio'],
+            'memory_saved_mb': quantized_info['original_size_mb'] - quantized_info['quantized_size_mb']
+        }
+
+# Convenience functions for backward compatibility
+def quantize_int8(tensor: Tensor) -> Tuple[Tensor, float, int]:
+    """Quantize FP32 tensor to INT8."""
+    return QuantizationComplete.quantize_tensor(tensor)
+
+def dequantize_int8(q_tensor: Tensor, scale: float, zero_point: int) -> Tensor:
+    """Dequantize INT8 tensor back to FP32."""
+    return QuantizationComplete.dequantize_tensor(q_tensor, scale, zero_point)
+
+def quantize_model(model, calibration_data: Optional[List[Tensor]] = None) -> Dict[str, any]:
+    """Quantize entire model to INT8."""
+    return QuantizationComplete.quantize_model(model, calibration_data)
diff --git a/tinytorch/profiling/profiler.py b/tinytorch/profiling/profiler.py
index 3f393015..88aece66 100644
--- a/tinytorch/profiling/profiler.py
+++ b/tinytorch/profiling/profiler.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_profiler/profiler_dev.py             ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/14_profiling/profiling_dev.ipynb.
+
 # %% auto 0
 __all__ = ['Profiler', 'quick_profile', 'analyze_weight_distribution']
 
diff --git a/tinytorch/text/embeddings.py b/tinytorch/text/embeddings.py
index dacb0f27..3d8a6d03 100644
--- a/tinytorch/text/embeddings.py
+++ b/tinytorch/text/embeddings.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_embeddings/embeddings_dev.py         ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/11_embeddings/embeddings_dev.ipynb.
+
 # %% auto 0
 __all__ = ['Embedding', 'PositionalEncoding', 'EmbeddingLayer']
 
@@ -95,13 +81,10 @@ class Embedding:
         # This is equivalent to one-hot multiplication but much more efficient
         embedded = self.weight.data[indices.data.astype(int)]
 
-        # Create result tensor
+        # Create result tensor with gradient tracking
+        # Note: Gradient computation handled by autograd system (Module 05)
+        # The embedding lookup is differentiable through the weight matrix
         result = Tensor(embedded, requires_grad=self.weight.requires_grad)
-        
-        # Attach gradient function (students learned this in Module 05!)
-        if self.weight.requires_grad:
-            from tinytorch.core.autograd import EmbeddingBackward
-            result._grad_fn = EmbeddingBackward(self.weight, indices)
 
         return result
 
@@ -336,10 +319,6 @@ class EmbeddingLayer:
 
         return output
 
-    def __call__(self, tokens: Tensor) -> Tensor:
-        """Allows the embedding layer to be called like a function."""
-        return self.forward(tokens)
-
     def parameters(self) -> List[Tensor]:
         """Return all trainable parameters."""
         params = self.token_embedding.parameters()
diff --git a/tinytorch/text/tokenization.py b/tinytorch/text/tokenization.py
index 384f738f..5b368a5d 100644
--- a/tinytorch/text/tokenization.py
+++ b/tinytorch/text/tokenization.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_tokenization/tokenization_dev.py     ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/10_tokenization/tokenization_dev.ipynb.
+
 # %% auto 0
 __all__ = ['Tokenizer', 'CharTokenizer', 'BPETokenizer']
 
@@ -24,16 +10,6 @@ import json
 import re
 from collections import defaultdict, Counter
 
-# %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 3
-import numpy as np
-from typing import List, Dict, Tuple, Optional, Set
-import json
-import re
-from collections import defaultdict, Counter
-
-# Import only Module 01 (Tensor) - this module has minimal dependencies
-from ..core.tensor import Tensor
-
 # %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 8
 class Tokenizer:
     """
diff --git a/tito/__pycache__/main.cpython-313.pyc b/tito/__pycache__/main.cpython-313.pyc
index d62f5066..776ffbb9 100644
Binary files a/tito/__pycache__/main.cpython-313.pyc and b/tito/__pycache__/main.cpython-313.pyc differ
diff --git a/tito/commands/__pycache__/__init__.cpython-313.pyc b/tito/commands/__pycache__/__init__.cpython-313.pyc
index 0eb73da9..49efb838 100644
Binary files a/tito/commands/__pycache__/__init__.cpython-313.pyc and b/tito/commands/__pycache__/__init__.cpython-313.pyc differ
diff --git a/tito/commands/__pycache__/base.cpython-313.pyc b/tito/commands/__pycache__/base.cpython-313.pyc
index e7d0935c..f4b2a61b 100644
Binary files a/tito/commands/__pycache__/base.cpython-313.pyc and b/tito/commands/__pycache__/base.cpython-313.pyc differ
diff --git a/tito/commands/__pycache__/book.cpython-313.pyc b/tito/commands/__pycache__/book.cpython-313.pyc
index 4821def6..ed1e0c62 100644
Binary files a/tito/commands/__pycache__/book.cpython-313.pyc and b/tito/commands/__pycache__/book.cpython-313.pyc differ
diff --git a/tito/commands/__pycache__/doctor.cpython-313.pyc b/tito/commands/__pycache__/doctor.cpython-313.pyc
index b852f96b..01dcc471 100644
Binary files a/tito/commands/__pycache__/doctor.cpython-313.pyc and b/tito/commands/__pycache__/doctor.cpython-313.pyc differ
diff --git a/tito/commands/__pycache__/export.cpython-313.pyc b/tito/commands/__pycache__/export.cpython-313.pyc
index ac4d9754..80a7a167 100644
Binary files a/tito/commands/__pycache__/export.cpython-313.pyc and b/tito/commands/__pycache__/export.cpython-313.pyc differ
diff --git a/tito/commands/__pycache__/info.cpython-313.pyc b/tito/commands/__pycache__/info.cpython-313.pyc
index e694c5dc..77f5b891 100644
Binary files a/tito/commands/__pycache__/info.cpython-313.pyc and b/tito/commands/__pycache__/info.cpython-313.pyc differ
diff --git a/tito/commands/__pycache__/protect.cpython-313.pyc b/tito/commands/__pycache__/protect.cpython-313.pyc
index 2bad6fbe..811829ac 100644
Binary files a/tito/commands/__pycache__/protect.cpython-313.pyc and b/tito/commands/__pycache__/protect.cpython-313.pyc differ
diff --git a/tito/commands/__pycache__/setup.cpython-313.pyc b/tito/commands/__pycache__/setup.cpython-313.pyc
index d263f56f..4bd81fc8 100644
Binary files a/tito/commands/__pycache__/setup.cpython-313.pyc and b/tito/commands/__pycache__/setup.cpython-313.pyc differ
diff --git a/tito/commands/__pycache__/test.cpython-313.pyc b/tito/commands/__pycache__/test.cpython-313.pyc
index 2e18a8de..3ab227c9 100644
Binary files a/tito/commands/__pycache__/test.cpython-313.pyc and b/tito/commands/__pycache__/test.cpython-313.pyc differ
diff --git a/tito/commands/book.py b/tito/commands/book.py
index e5a79e1c..ef898a53 100644
--- a/tito/commands/book.py
+++ b/tito/commands/book.py
@@ -10,7 +10,7 @@ from rich.panel import Panel
 
 from .base import BaseCommand
 
-NOTEBOOKS_DIR = "modules/source"
+NOTEBOOKS_DIR = "modules"
 
 class BookCommand(BaseCommand):
     @property
diff --git a/tito/commands/clean.py b/tito/commands/clean.py
index 5da54068..3ecb7b8f 100644
--- a/tito/commands/clean.py
+++ b/tito/commands/clean.py
@@ -113,7 +113,7 @@ class CleanCommand(BaseCommand):
         # Ask for confirmation unless --force is used
         if not args.force:
             console.print("\n[yellow]This will permanently remove the files listed above.[/yellow]")
-            console.print("[yellow]Python source files (*_dev.py) will be preserved.[/yellow]\n")
+            console.print("[yellow]Python source files (*.py) will be preserved.[/yellow]\n")
             
             try:
                 response = input("Are you sure you want to proceed? (y/N): ").strip().lower()
diff --git a/tito/commands/export.py b/tito/commands/export.py
index 1aa11aa9..67a7ef30 100644
--- a/tito/commands/export.py
+++ b/tito/commands/export.py
@@ -71,7 +71,7 @@ class ExportCommand(BaseCommand):
         else:
             short_name = module_name
         
-        dev_file = module_path / f"{short_name}_dev.py"
+        dev_file = module_path / f"{short_name}.py"
         if not dev_file.exists():
             return "unknown"
         
@@ -89,8 +89,8 @@ class ExportCommand(BaseCommand):
         return "unknown"
 
     def _discover_modules(self) -> list:
-        """Discover available modules from modules/source directory."""
-        source_dir = Path("modules/source")
+        """Discover available modules from modules directory."""
+        source_dir = Path("modules")
         modules = []
         
         if source_dir.exists():
@@ -266,7 +266,7 @@ class ExportCommand(BaseCommand):
 # ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
 # ║     Editing it directly may break module functionality and training.         ║
 # ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
+# ║  🎓 LEARNING TIP: Work in modules/ - that's where real development    ║
 # ║     happens! The tinytorch/ directory is just the compiled output.           ║
 # ╚═══════════════════════════════════════════════════════════════════════════════╝
 
@@ -305,20 +305,20 @@ class ExportCommand(BaseCommand):
         
         # Common mappings
         source_mappings = {
-            ('core', 'tensor'): 'modules/source/02_tensor/tensor_dev.py',
-            ('core', 'activations'): 'modules/source/03_activations/activations_dev.py', 
-            ('core', 'layers'): 'modules/source/04_layers/layers_dev.py',
-            ('core', 'dense'): 'modules/source/05_dense/dense_dev.py',
-            ('core', 'spatial'): 'modules/source/06_spatial/spatial_dev.py',
-            ('core', 'attention'): 'modules/source/07_attention/attention_dev.py',
-            ('core', 'dataloader'): 'modules/source/08_dataloader/dataloader_dev.py',
-            ('core', 'autograd'): 'modules/source/09_autograd/autograd_dev.py',
-            ('core', 'optimizers'): 'modules/source/10_optimizers/optimizers_dev.py',
-            ('core', 'training'): 'modules/source/11_training/training_dev.py',
-            ('core', 'compression'): 'modules/source/12_compression/compression_dev.py',
-            ('core', 'kernels'): 'modules/source/13_kernels/kernels_dev.py',
-            ('core', 'benchmarking'): 'modules/source/14_benchmarking/benchmarking_dev.py',
-            ('core', 'networks'): 'modules/source/16_tinygpt/tinygpt_dev.ipynb',
+            ('core', 'tensor'): 'modules/02_tensor/tensor.py',
+            ('core', 'activations'): 'modules/03_activations/activations.py', 
+            ('core', 'layers'): 'modules/04_layers/layers.py',
+            ('core', 'dense'): 'modules/05_dense/dense.py',
+            ('core', 'spatial'): 'modules/06_spatial/spatial.py',
+            ('core', 'attention'): 'modules/07_attention/attention.py',
+            ('core', 'dataloader'): 'modules/08_dataloader/dataloader.py',
+            ('core', 'autograd'): 'modules/09_autograd/autograd.py',
+            ('core', 'optimizers'): 'modules/10_optimizers/optimizers.py',
+            ('core', 'training'): 'modules/11_training/training.py',
+            ('core', 'compression'): 'modules/12_compression/compression.py',
+            ('core', 'kernels'): 'modules/13_kernels/kernels.py',
+            ('core', 'benchmarking'): 'modules/14_benchmarking/benchmarking.py',
+            ('core', 'networks'): 'modules/16_tinygpt/tinygpt_dev.ipynb',
         }
         
         if module_parts in source_mappings:
@@ -327,9 +327,9 @@ class ExportCommand(BaseCommand):
         # Fallback: try to guess based on the file name
         if len(module_parts) >= 2:
             module_name = module_parts[-1]  # e.g., 'tensor' from ('core', 'tensor')
-            return f"modules/source/XX_{module_name}/{module_name}_dev.py"
+            return f"modules/XX_{module_name}/{module_name}.py"
         
-        return "modules/source/[unknown]/[unknown]_dev.py"
+        return "modules/[unknown]/[unknown].py"
 
     def _show_export_details(self, console, module_name: Optional[str] = None):
         """Show detailed export information including where each module exports to."""
@@ -338,7 +338,7 @@ class ExportCommand(BaseCommand):
         
         if module_name:
             # Single module export
-            module_path = Path(f"modules/source/{module_name}")
+            module_path = Path(f"modules/{module_name}")
             export_target = self._get_export_target(module_path)
             if export_target != "unknown":
                 target_file = export_target.replace('.', '/') + '.py'
@@ -346,7 +346,7 @@ class ExportCommand(BaseCommand):
                 
                 # Extract the short name for display
                 short_name = module_name[3:] if module_name.startswith(tuple(f"{i:02d}_" for i in range(100))) else module_name
-                exports_text.append(f"     Source: modules/source/{module_name}/{short_name}_dev.py\n", style="dim")
+                exports_text.append(f"     Source: modules/{module_name}/{short_name}.py\n", style="dim")
                 exports_text.append(f"     Target: tinytorch/{target_file}\n", style="dim")
             else:
                 exports_text.append(f"  ❓ {module_name} → export target not found\n", style="yellow")
@@ -354,7 +354,7 @@ class ExportCommand(BaseCommand):
             # All modules export
             modules = self._discover_modules()
             for module_name in modules:
-                module_path = Path(f"modules/source/{module_name}")
+                module_path = Path(f"modules/{module_name}")
                 export_target = self._get_export_target(module_path)
                 if export_target != "unknown":
                     target_file = export_target.replace('.', '/') + '.py'
@@ -456,7 +456,7 @@ class ExportCommand(BaseCommand):
         module_name = module_path.name
         short_name = module_name[3:] if module_name.startswith(tuple(f"{i:02d}_" for i in range(100))) else module_name
         
-        dev_file = module_path / f"{short_name}_dev.py"
+        dev_file = module_path / f"{short_name}.py"
         if not dev_file.exists():
             self.console.print(f"[red]❌ Python file not found: {dev_file}[/red]")
             return False
@@ -539,7 +539,7 @@ class ExportCommand(BaseCommand):
         converted = []
         
         for module_name in modules:
-            module_path = Path(f"modules/source/{module_name}")
+            module_path = Path(f"modules/{module_name}")
             if self._convert_py_to_notebook(module_path):
                 converted.append(module_name)
         
@@ -563,9 +563,9 @@ class ExportCommand(BaseCommand):
             # Process each module
             for module_name in modules_to_export:
                 logger.debug(f"Processing module: {module_name}")
-                module_path = Path(f"modules/source/{module_name}")
+                module_path = Path(f"modules/{module_name}")
                 if not module_path.exists():
-                    console.print(Panel(f"[red]❌ Module '{module_name}' not found in modules/source/[/red]", 
+                    console.print(Panel(f"[red]❌ Module '{module_name}' not found in modules/[/red]", 
                                       title="Module Not Found", border_style="red"))
                     
                     # Show available modules
diff --git a/tito/commands/grade.py b/tito/commands/grade.py
index aed17aff..e314604c 100644
--- a/tito/commands/grade.py
+++ b/tito/commands/grade.py
@@ -193,7 +193,7 @@ class GradeCommand(BaseCommand):
             return module
         
         # Try to find the module by short name
-        source_dir = Path("modules/source")
+        source_dir = Path("modules")
         if source_dir.exists():
             for module_dir in source_dir.iterdir():
                 if module_dir.is_dir() and module_dir.name.endswith(f"_{module}"):
@@ -218,7 +218,7 @@ class GradeCommand(BaseCommand):
                 # Step 1: Generate assignment first
                 result = subprocess.run(
                     ["nbgrader", "generate_assignment", module,
-                     "--source", f"modules/source/{module}",
+                     "--source", f"modules/{module}",
                      "--force"],
                     capture_output=True,
                     text=True
@@ -259,7 +259,7 @@ class GradeCommand(BaseCommand):
         try:
             result = subprocess.run(
                 ["nbgrader", "generate_assignment", module,
-                 "--source", f"modules/source/{module}",
+                 "--source", f"modules/{module}",
                  "--force"],
                 capture_output=True,
                 text=True
@@ -418,7 +418,7 @@ class GradeCommand(BaseCommand):
 c = get_config()
 
 c.CourseDirectory.course_id = "tinytorch"
-c.CourseDirectory.source_directory = "modules/source"
+c.CourseDirectory.source_directory = "modules"
 c.CourseDirectory.release_directory = "release"
 c.CourseDirectory.submitted_directory = "submitted"
 c.CourseDirectory.autograded_directory = "autograded"
diff --git a/tito/commands/help.py b/tito/commands/help.py
index 33b4e67a..dffb520e 100644
--- a/tito/commands/help.py
+++ b/tito/commands/help.py
@@ -270,8 +270,8 @@ class HelpCommand(BaseCommand):
         elif starting_point == 'first_module':
             console.print("\n[bold blue]🛠️ Setting up Module 1...[/bold blue]")
             console.print("Next commands:")
-            console.print("  [code]cd modules/source/01_setup[/code]")
-            console.print("  [code]jupyter lab setup_dev.py[/code]")
+            console.print("  [code]cd modules/01_setup[/code]")
+            console.print("  [code]jupyter lab setup.py[/code]")
             
         elif starting_point == 'milestone_project':
             console.print("\n[bold blue]🎯 Weekend Project Recommendations...[/bold blue]")
@@ -315,7 +315,7 @@ class HelpCommand(BaseCommand):
         # Common workflows
         console.print("\n[bold cyan]📋 Common Workflows:[/bold cyan]")
         workflows = [
-            ("New User", "tito help -i → tito checkpoint status → cd modules/source/01_setup"),
+            ("New User", "tito help -i → tito checkpoint status → cd modules/01_setup"),
             ("Continue Learning", "tito checkpoint status → work on next module → tito module complete XX"),
             ("Join Community", "tito leaderboard join → submit progress → see global rankings"),
             ("Get Help", "tito system doctor → check docs/FAQ → ask community")
@@ -371,7 +371,7 @@ class HelpCommand(BaseCommand):
         
         # Simplified implementation for now
         checkpoints_dir = Path("tests/checkpoints")
-        modules_dir = Path("modules/source")
+        modules_dir = Path("modules")
         
         return {
             'is_new_user': not checkpoints_dir.exists(),
@@ -424,7 +424,7 @@ class HelpCommand(BaseCommand):
             "• [code]tito system doctor[/code] - Verify installation\n"
             "• [code]tito help --interactive[/code] - Personalized guidance\n"
             "• [code]tito checkpoint status[/code] - See learning path\n"
-            "• [code]cd modules/source/01_setup[/code] - Start first module",
+            "• [code]cd modules/01_setup[/code] - Start first module",
             title="First Steps",
             border_style="blue"
         )
diff --git a/tito/commands/module.py b/tito/commands/module.py
index 60e95d16..acc49cb3 100644
--- a/tito/commands/module.py
+++ b/tito/commands/module.py
@@ -275,13 +275,13 @@ class ModuleCommand(BaseCommand):
         """Normalize module name to full format (e.g., tensor -> 02_tensor)."""
         # If already in full format, validate it exists
         if module_name.startswith(tuple(f"{i:02d}_" for i in range(100))):
-            module_path = Path(f"modules/source/{module_name}")
+            module_path = Path(f"modules/{module_name}")
             if module_path.exists():
                 return module_name
             return ""
         
         # Try to find the module by short name
-        source_dir = Path("modules/source")
+        source_dir = Path("modules")
         if source_dir.exists():
             for module_dir in source_dir.iterdir():
                 if module_dir.is_dir() and module_dir.name.endswith(f"_{module_name}"):
@@ -291,7 +291,7 @@ class ModuleCommand(BaseCommand):
 
     def _get_available_modules_text(self) -> str:
         """Get formatted text listing available modules."""
-        source_dir = Path("modules/source")
+        source_dir = Path("modules")
         modules = []
         
         if source_dir.exists():
diff --git a/tito/commands/module_workflow.py b/tito/commands/module_workflow.py
index a0e017f6..330e77e2 100644
--- a/tito/commands/module_workflow.py
+++ b/tito/commands/module_workflow.py
@@ -258,7 +258,7 @@ class ModuleWorkflowCommand(BaseCommand):
         try:
             # Run the module's inline tests
             module_dir = self.config.modules_dir / module_name
-            dev_file = module_dir / f"{module_name.split('_')[1]}_dev.py"
+            dev_file = module_dir / f"{module_name.split('_')[1]}.py"
             
             if not dev_file.exists():
                 self.console.print(f"[yellow]⚠️  No dev file found: {dev_file}[/yellow]")
diff --git a/tito/commands/nbgrader.py b/tito/commands/nbgrader.py
index c9f736df..0c659eeb 100644
--- a/tito/commands/nbgrader.py
+++ b/tito/commands/nbgrader.py
@@ -218,7 +218,7 @@ class NBGraderCommand(BaseCommand):
 
     def _get_module_directories(self) -> List[Path]:
         """Get all module directories with proper hierarchy support."""
-        source_dir = Path("modules/source")
+        source_dir = Path("modules")
         if not source_dir.exists():
             return []
         
@@ -240,19 +240,19 @@ class NBGraderCommand(BaseCommand):
     def _resolve_module_name(self, module_input: str) -> Optional[str]:
         """Resolve module name from various input formats."""
         # If it's already a directory name, use it
-        if Path(f"modules/source/{module_input}").exists():
+        if Path(f"modules/{module_input}").exists():
             return module_input
         
         # Try to find by number prefix
         if module_input.isdigit():
             prefix = module_input.zfill(2)
-            source_dir = Path("modules/source")
+            source_dir = Path("modules")
             for item in source_dir.iterdir():
                 if item.is_dir() and item.name.startswith(prefix):
                     return item.name
         
         # Try to find by name suffix
-        source_dir = Path("modules/source")
+        source_dir = Path("modules")
         for item in source_dir.iterdir():
             if item.is_dir() and item.name.endswith(f"_{module_input}"):
                 return item.name
@@ -375,7 +375,7 @@ class NBGraderCommand(BaseCommand):
         console.print(f"📝 Generating assignment for module: {module_name}")
         
         # Find the module development file in TinyTorch modules directory
-        module_dir = Path("modules/source") / module_name
+        module_dir = Path("modules") / module_name
         
         # Extract the short name from the module directory name
         # e.g., "00_setup" -> "setup", "01_tensor" -> "tensor"
@@ -394,7 +394,7 @@ class NBGraderCommand(BaseCommand):
         
         if not dev_file:
             console.print(f"❌ Module file not found in: {module_dir}")
-            console.print(f"   Looking for: {short_name}_dev.py or {short_name}_dev_enhanced.py")
+            console.print(f"   Looking for: {short_name}.py or {short_name}_dev_enhanced.py")
             return False
         
         # Convert to notebook and generate assignment
diff --git a/tito/commands/notebooks.py b/tito/commands/notebooks.py
index 942c6182..22ac5638 100644
--- a/tito/commands/notebooks.py
+++ b/tito/commands/notebooks.py
@@ -45,20 +45,20 @@ class NotebooksCommand(BaseCommand):
     def validate_args(self, args: Namespace) -> None:
         """Validate notebooks command arguments."""
         if args.module:
-            # Look in modules/source/ subdirectory
+            # Look in modules/ subdirectory
             source_dir = self.config.modules_dir / 'source'
             if not source_dir.exists():
                 source_dir = self.config.modules_dir
-            module_file = source_dir / args.module / f"{args.module}_dev.py"
+            module_file = source_dir / args.module / f"{args.module}.py"
             if not module_file.exists():
                 raise ModuleNotFoundError(
-                    f"Module '{args.module}' not found or no {args.module}_dev.py file"
+                    f"Module '{args.module}' not found or no {args.module}.py file"
                 )
     
     def _find_dev_files(self) -> List[Path]:
-        """Find all *_dev.py files in modules directory."""
+        """Find all *.py files in modules directory."""
         dev_files = []
-        # Look in modules/source/ subdirectory
+        # Look in modules/ subdirectory
         source_dir = self.config.modules_dir / 'source'
         if not source_dir.exists():
             # Fallback to modules_dir directly
@@ -66,7 +66,7 @@ class NotebooksCommand(BaseCommand):
         
         for module_dir in source_dir.iterdir():
             if module_dir.is_dir():
-                dev_py = module_dir / f"{module_dir.name}_dev.py"
+                dev_py = module_dir / f"{module_dir.name}.py"
                 if dev_py.exists():
                     dev_files.append(dev_py)
         return dev_files
@@ -103,17 +103,17 @@ class NotebooksCommand(BaseCommand):
         
         # Find files to convert
         if args.module:
-            # Look in modules/source/ subdirectory
+            # Look in modules/ subdirectory
             source_dir = self.config.modules_dir / 'source'
             if not source_dir.exists():
                 source_dir = self.config.modules_dir
-            dev_files = [source_dir / args.module / f"{args.module}_dev.py"]
+            dev_files = [source_dir / args.module / f"{args.module}.py"]
             self.console.print(f"🔄 Building notebook for module: {args.module}")
         else:
             dev_files = self._find_dev_files()
             if not dev_files:
                 self.console.print(Panel(
-                    "[yellow]⚠️  No *_dev.py files found in modules/[/yellow]", 
+                    "[yellow]⚠️  No *.py files found in modules/[/yellow]", 
                     title="Nothing to Convert", 
                     border_style="yellow"
                 ))
diff --git a/tito/commands/protect.py b/tito/commands/protect.py
index ed3d5898..29979726 100644
--- a/tito/commands/protect.py
+++ b/tito/commands/protect.py
@@ -167,11 +167,11 @@ if [ ! -z "$CORE_FILES_MODIFIED" ]; then
     echo "The following auto-generated files are staged:"
     echo "$CORE_FILES_MODIFIED"
     echo ""
-    echo "🛡️ PROTECTION TRIGGERED: These files are auto-generated from modules/source/"
+    echo "🛡️ PROTECTION TRIGGERED: These files are auto-generated from modules/"
     echo ""
     echo "TO FIX:"
     echo "1. Unstage these files: git reset HEAD tinytorch/core/"
-    echo "2. Make changes in modules/source/ instead"
+    echo "2. Make changes in modules/ instead"
     echo "3. Run: tito module complete <module_name>"
     echo "4. Commit the source changes, not the generated files"
     echo ""
diff --git a/tito/commands/status.py b/tito/commands/status.py
index 4d7058ec..032f5f6e 100644
--- a/tito/commands/status.py
+++ b/tito/commands/status.py
@@ -45,7 +45,7 @@ class StatusCommand(BaseCommand):
             short_name = module_name[3:]  # Remove "00_" prefix
         else:
             short_name = module_name
-        dev_file = module_path / f"{short_name}_dev.py"
+        dev_file = module_path / f"{short_name}.py"
         if not dev_file.exists():
             return "not_found"
         
@@ -125,9 +125,9 @@ class StatusCommand(BaseCommand):
         console = self.console
         
         # Scan modules directory
-        modules_dir = Path("modules/source")
+        modules_dir = Path("modules")
         if not modules_dir.exists():
-            console.print(Panel("[red]❌ modules/source/ directory not found[/red]", 
+            console.print(Panel("[red]❌ modules/ directory not found[/red]", 
                               title="Error", border_style="red"))
             return 1
         
@@ -141,7 +141,7 @@ class StatusCommand(BaseCommand):
                               title="Warning", border_style="yellow"))
             return 0
         
-        console.print(Panel(f"📋 Found {len(module_dirs)} modules in modules/source directory", 
+        console.print(Panel(f"📋 Found {len(module_dirs)} modules in modules directory", 
                           title="Module Status Check", border_style="bright_cyan"))
         
         # Create status table
@@ -209,7 +209,7 @@ class StatusCommand(BaseCommand):
         console.print(f"   [bold cyan]tito status --comprehensive[/bold cyan]      # Full system health dashboard")
         console.print(f"   [bold cyan]tito module test --all[/bold cyan]           # Test all modules")
         console.print(f"   [bold cyan]tito module test MODULE_NAME[/bold cyan]     # Test specific module")
-        console.print(f"   [bold cyan]pytest modules/source/*/  -k test_[/bold cyan]  # Run pytest on inline tests")
+        console.print(f"   [bold cyan]pytest modules/*/  -k test_[/bold cyan]  # Run pytest on inline tests")
         console.print(f"   [bold cyan]pytest tests/test_*.py[/bold cyan]           # Run external tests")
         
         # Detailed view
@@ -243,7 +243,7 @@ class StatusCommand(BaseCommand):
             short_name = module_name[3:]  # Remove "00_" prefix
         else:
             short_name = module_name
-        dev_file = module_dir / f"{short_name}_dev.py"
+        dev_file = module_dir / f"{short_name}.py"
         readme_file = module_dir / "README.md"
         metadata_file = module_dir / "module.yaml"
         
@@ -386,7 +386,7 @@ class StatusCommand(BaseCommand):
         if status['dev_file']:
             dev_status += f" ({status.get('export_count', 0)} exports, {status.get('inline_test_count', 0)} inline tests)"
         
-        files_table.add_row(f"{module_name}_dev.py", dev_status)
+        files_table.add_row(f"{module_name}.py", dev_status)
         files_table.add_row("tests/test_*.py", "✅ Found" if status['external_tests'] else "❌ Missing")
         files_table.add_row("README.md", "✅ Found" if status['readme'] else "❌ Missing")
         
@@ -396,7 +396,7 @@ class StatusCommand(BaseCommand):
         if status['dev_file'] or status['external_tests']:
             console.print("\n[dim]💡 Test commands:[/dim]")
             if status['dev_file']:
-                console.print(f"[dim]   pytest modules/source/{module_name}/{module_name}_dev.py -k test_[/dim]")
+                console.print(f"[dim]   pytest modules/{module_name}/{module_name}.py -k test_[/dim]")
             if status['external_tests']:
                 short_name = module_name[3:] if module_name.startswith(tuple(f"{i:02d}_" for i in range(100))) else module_name
                 console.print(f"[dim]   pytest tests/test_{short_name}.py -v[/dim]")
@@ -415,7 +415,7 @@ class StatusCommand(BaseCommand):
             console.print(f"📝 {metadata['description']}")
         
         # Export info (read from dev file - source of truth)
-        module_path = Path(f"modules/source/{module_name}")
+        module_path = Path(f"modules/{module_name}")
         export_target = self._get_export_target(module_path)
         if export_target not in ['not_found', 'no_export', 'read_error']:
             console.print(f"📦 Exports to: {export_target}")
diff --git a/tito/commands/test.py b/tito/commands/test.py
index fad98330..6138bd56 100644
--- a/tito/commands/test.py
+++ b/tito/commands/test.py
@@ -533,7 +533,7 @@ class TestCommand(BaseCommand):
     def _discover_modules(self) -> List[str]:
         """Discover available modules."""
         modules = []
-        source_dir = Path("modules/source")
+        source_dir = Path("modules")
         
         if source_dir.exists():
             exclude_dirs = {'.quarto', '__pycache__', '.git', '.pytest_cache'}
@@ -554,7 +554,7 @@ class TestCommand(BaseCommand):
         else:
             short_name = module_name
         
-        return Path("modules/source") / module_name / f"{short_name}_dev.py"
+        return Path("modules") / module_name / f"{short_name}.py"
     
     def _generate_summary_report(self, results: List[ModuleTestResult]) -> None:
         """Generate a summary report for all modules."""
@@ -781,7 +781,7 @@ class TestCommand(BaseCommand):
                               f"[dim]  tito module test --all --summary - Summary report[/dim]", 
                               title="Module Required", border_style="red"))
         else:
-            console.print(Panel("[red]❌ No modules found in modules/source directory[/red]", 
+            console.print(Panel("[red]❌ No modules found in modules directory[/red]", 
                               title="Error", border_style="red"))
         
         return 1 
\ No newline at end of file
diff --git a/tito/commands/view.py b/tito/commands/view.py
index db718dc9..024c87b6 100644
--- a/tito/commands/view.py
+++ b/tito/commands/view.py
@@ -48,23 +48,23 @@ class ViewCommand(BaseCommand):
             # Look for the specific dev file for this module
             # Extract module name (e.g., "tensor" from "01_tensor")
             module_name = args.module.split('_', 1)[1] if '_' in args.module else args.module
-            dev_file = module_dir / f"{module_name}_dev.py"
+            dev_file = module_dir / f"{module_name}.py"
             
             if not dev_file.exists():
-                # Fallback: look for any *_dev.py file
-                dev_files = list(module_dir.glob("*_dev.py"))
+                # Fallback: look for any *.py file
+                dev_files = list(module_dir.glob("*.py"))
                 if not dev_files:
                     raise ModuleNotFoundError(
                         f"No dev file found in module '{args.module}'. Expected: {dev_file}"
                     )
     
     def _find_dev_files(self) -> List[Path]:
-        """Find all *_dev.py files in modules directory."""
+        """Find all *.py files in modules directory."""
         dev_files = []
         for module_dir in self.config.modules_dir.iterdir():
             if module_dir.is_dir():
-                # Look for any *_dev.py file in the directory
-                for dev_py in module_dir.glob("*_dev.py"):
+                # Look for any *.py file in the directory
+                for dev_py in module_dir.glob("*.py"):
                     dev_files.append(dev_py)
         return dev_files
     
@@ -131,13 +131,13 @@ class ViewCommand(BaseCommand):
             target_dir = self.config.modules_dir / args.module
             # Find the specific dev file for this module
             module_name = args.module.split('_', 1)[1] if '_' in args.module else args.module
-            dev_file = target_dir / f"{module_name}_dev.py"
+            dev_file = target_dir / f"{module_name}.py"
             
             if dev_file.exists():
                 dev_files = [dev_file]
             else:
                 # Fallback: find any dev files
-                dev_files = list(target_dir.glob("*_dev.py"))
+                dev_files = list(target_dir.glob("*.py"))
             
             self.console.print(f"🔄 Generating notebook for module: {args.module}")
         else:
@@ -145,7 +145,7 @@ class ViewCommand(BaseCommand):
             dev_files = self._find_dev_files()
             if not dev_files:
                 self.console.print(Panel(
-                    "[yellow]⚠️  No *_dev.py files found in modules/[/yellow]", 
+                    "[yellow]⚠️  No *.py files found in modules/[/yellow]", 
                     title="Nothing to Convert", 
                     border_style="yellow"
                 ))
diff --git a/tito/core/__pycache__/config.cpython-313.pyc b/tito/core/__pycache__/config.cpython-313.pyc
index 4f92b07a..e253f601 100644
Binary files a/tito/core/__pycache__/config.cpython-313.pyc and b/tito/core/__pycache__/config.cpython-313.pyc differ
diff --git a/tito/core/__pycache__/virtual_env_manager.cpython-313.pyc b/tito/core/__pycache__/virtual_env_manager.cpython-313.pyc
index e762b8e5..2e718d77 100644
Binary files a/tito/core/__pycache__/virtual_env_manager.cpython-313.pyc and b/tito/core/__pycache__/virtual_env_manager.cpython-313.pyc differ
diff --git a/tito/core/status_analyzer.py b/tito/core/status_analyzer.py
index 517e4a9f..65e83b26 100644
--- a/tito/core/status_analyzer.py
+++ b/tito/core/status_analyzer.py
@@ -187,8 +187,8 @@ class TinyTorchStatusAnalyzer:
         
         # Check basic files - try multiple naming patterns
         possible_dev_files = [
-            module_path / f"{module_name}_dev.py",
-            module_path / f"{module_name.split('_', 1)[1]}_dev.py" if '_' in module_name else None,
+            module_path / f"{module_name}.py",
+            module_path / f"{module_name.split('_', 1)[1]}.py" if '_' in module_name else None,
         ]
         dev_file = None
         for possible_file in possible_dev_files:
@@ -197,8 +197,8 @@ class TinyTorchStatusAnalyzer:
                 break
         
         if dev_file is None:
-            # Check if there's any *_dev.py file
-            dev_files = list(module_path.glob("*_dev.py"))
+            # Check if there's any *.py file
+            dev_files = list(module_path.glob("*.py"))
             if dev_files:
                 dev_file = dev_files[0]  # Use the first one found