diff --git a/tinytorch/_modidx.py b/tinytorch/_modidx.py index 88d63238..3df88156 100644 --- a/tinytorch/_modidx.py +++ b/tinytorch/_modidx.py @@ -1,19 +1,3 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/[unknown]/[unknown]_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ # Autogenerated by nbdev d = { 'settings': { 'branch': 'main', @@ -21,36 +5,7 @@ d = { 'settings': { 'branch': 'main', 'doc_host': 'https://tinytorch.github.io', 'git_url': 'https://github.com/tinytorch/TinyTorch/', 'lib_path': 'tinytorch'}, - 'syms': { 'tinytorch.applications.tinygpt': { 'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline': ( '20_capstone/capstone_dev.html#completetinygptpipeline', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline.__init__': ( '20_capstone/capstone_dev.html#completetinygptpipeline.__init__', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline.generate_text': ( '20_capstone/capstone_dev.html#completetinygptpipeline.generate_text', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline.optimize_model': ( '20_capstone/capstone_dev.html#completetinygptpipeline.optimize_model', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline.prepare_training_data': ( '20_capstone/capstone_dev.html#completetinygptpipeline.prepare_training_data', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.CompleteTinyGPTPipeline.train': ( '20_capstone/capstone_dev.html#completetinygptpipeline.train', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.TinyGPT': ( '20_capstone/capstone_dev.html#tinygpt', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.TinyGPT.__init__': ( '20_capstone/capstone_dev.html#tinygpt.__init__', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.TinyGPTTrainer': ( '20_capstone/capstone_dev.html#tinygpttrainer', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.TinyGPTTrainer.__init__': ( '20_capstone/capstone_dev.html#tinygpttrainer.__init__', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.TinyGPTTrainer.prepare_batch': ( '20_capstone/capstone_dev.html#tinygpttrainer.prepare_batch', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.TinyGPTTrainer.train_step': ( '20_capstone/capstone_dev.html#tinygpttrainer.train_step', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.test_unit_complete_pipeline': ( '20_capstone/capstone_dev.html#test_unit_complete_pipeline', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.test_unit_tinygpt_init': ( '20_capstone/capstone_dev.html#test_unit_tinygpt_init', - 'tinytorch/applications/tinygpt.py'), - 'tinytorch.applications.tinygpt.test_unit_training_pipeline': ( '20_capstone/capstone_dev.html#test_unit_training_pipeline', - 'tinytorch/applications/tinygpt.py')}, + 'syms': { 'tinytorch.applications.tinygpt': {}, 'tinytorch.benchmarking.benchmark': { 'tinytorch.benchmarking.benchmark.Benchmark': ( '19_benchmarking/benchmarking_dev.html#benchmark', 'tinytorch/benchmarking/benchmark.py'), 'tinytorch.benchmarking.benchmark.Benchmark.__init__': ( '19_benchmarking/benchmarking_dev.html#benchmark.__init__', @@ -89,6 +44,8 @@ d = { 'settings': { 'branch': 'main', 'tinytorch/benchmarking/benchmark.py'), 'tinytorch.benchmarking.benchmark.TinyMLPerf.run_standard_benchmark': ( '19_benchmarking/benchmarking_dev.html#tinymlperf.run_standard_benchmark', 'tinytorch/benchmarking/benchmark.py'), + 'tinytorch.benchmarking.benchmark.calculate_normalized_scores': ( '19_benchmarking/benchmarking_dev.html#calculate_normalized_scores', + 'tinytorch/benchmarking/benchmark.py'), 'tinytorch.benchmarking.benchmark.test_unit_benchmark': ( '19_benchmarking/benchmarking_dev.html#test_unit_benchmark', 'tinytorch/benchmarking/benchmark.py'), 'tinytorch.benchmarking.benchmark.test_unit_benchmark_suite': ( '19_benchmarking/benchmarking_dev.html#test_unit_benchmark_suite', @@ -105,6 +62,8 @@ d = { 'settings': { 'branch': 'main', 'tinytorch/competition/submit.py'), 'tinytorch.competition.submit.validate_installation': ( '20_competition/competition_dev.html#validate_installation', 'tinytorch/competition/submit.py'), + 'tinytorch.competition.submit.validate_submission': ( '20_competition/competition_dev.html#validate_submission', + 'tinytorch/competition/submit.py'), 'tinytorch.competition.submit.worked_example_optimization': ( '20_competition/competition_dev.html#worked_example_optimization', 'tinytorch/competition/submit.py')}, 'tinytorch.core.activations': { 'tinytorch.core.activations.GELU': ( '02_activations/activations_dev.html#gelu', @@ -341,7 +300,11 @@ d = { 'settings': { 'branch': 'main', 'tinytorch.core.training.Trainer.save_checkpoint': ( '07_training/training_dev.html#trainer.save_checkpoint', 'tinytorch/core/training.py'), 'tinytorch.core.training.Trainer.train_epoch': ( '07_training/training_dev.html#trainer.train_epoch', - 'tinytorch/core/training.py')}, + 'tinytorch/core/training.py'), + 'tinytorch.core.training.load_checkpoint': ( '07_training/training_dev.html#load_checkpoint', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.save_checkpoint': ( '07_training/training_dev.html#save_checkpoint', + 'tinytorch/core/training.py')}, 'tinytorch.data.loader': { 'tinytorch.data.loader.DataLoader': ( '08_dataloader/dataloader_dev.html#dataloader', 'tinytorch/data/loader.py'), 'tinytorch.data.loader.DataLoader.__init__': ( '08_dataloader/dataloader_dev.html#dataloader.__init__', @@ -386,8 +349,6 @@ d = { 'settings': { 'branch': 'main', 'tinytorch/generation/kv_cache.py')}, 'tinytorch.models.transformer': { 'tinytorch.models.transformer.GPT': ( '13_transformers/transformers_dev.html#gpt', 'tinytorch/models/transformer.py'), - 'tinytorch.models.transformer.GPT.__call__': ( '13_transformers/transformers_dev.html#gpt.__call__', - 'tinytorch/models/transformer.py'), 'tinytorch.models.transformer.GPT.__init__': ( '13_transformers/transformers_dev.html#gpt.__init__', 'tinytorch/models/transformer.py'), 'tinytorch.models.transformer.GPT._create_causal_mask': ( '13_transformers/transformers_dev.html#gpt._create_causal_mask', @@ -400,8 +361,6 @@ d = { 'settings': { 'branch': 'main', 'tinytorch/models/transformer.py'), 'tinytorch.models.transformer.LayerNorm': ( '13_transformers/transformers_dev.html#layernorm', 'tinytorch/models/transformer.py'), - 'tinytorch.models.transformer.LayerNorm.__call__': ( '13_transformers/transformers_dev.html#layernorm.__call__', - 'tinytorch/models/transformer.py'), 'tinytorch.models.transformer.LayerNorm.__init__': ( '13_transformers/transformers_dev.html#layernorm.__init__', 'tinytorch/models/transformer.py'), 'tinytorch.models.transformer.LayerNorm.forward': ( '13_transformers/transformers_dev.html#layernorm.forward', @@ -410,8 +369,6 @@ d = { 'settings': { 'branch': 'main', 'tinytorch/models/transformer.py'), 'tinytorch.models.transformer.MLP': ( '13_transformers/transformers_dev.html#mlp', 'tinytorch/models/transformer.py'), - 'tinytorch.models.transformer.MLP.__call__': ( '13_transformers/transformers_dev.html#mlp.__call__', - 'tinytorch/models/transformer.py'), 'tinytorch.models.transformer.MLP.__init__': ( '13_transformers/transformers_dev.html#mlp.__init__', 'tinytorch/models/transformer.py'), 'tinytorch.models.transformer.MLP.forward': ( '13_transformers/transformers_dev.html#mlp.forward', @@ -420,8 +377,6 @@ d = { 'settings': { 'branch': 'main', 'tinytorch/models/transformer.py'), 'tinytorch.models.transformer.TransformerBlock': ( '13_transformers/transformers_dev.html#transformerblock', 'tinytorch/models/transformer.py'), - 'tinytorch.models.transformer.TransformerBlock.__call__': ( '13_transformers/transformers_dev.html#transformerblock.__call__', - 'tinytorch/models/transformer.py'), 'tinytorch.models.transformer.TransformerBlock.__init__': ( '13_transformers/transformers_dev.html#transformerblock.__init__', 'tinytorch/models/transformer.py'), 'tinytorch.models.transformer.TransformerBlock.forward': ( '13_transformers/transformers_dev.html#transformerblock.forward', @@ -429,49 +384,54 @@ d = { 'settings': { 'branch': 'main', 'tinytorch.models.transformer.TransformerBlock.parameters': ( '13_transformers/transformers_dev.html#transformerblock.parameters', 'tinytorch/models/transformer.py')}, 'tinytorch.optimization.acceleration': {}, - 'tinytorch.optimization.compression': { 'tinytorch.optimization.compression.CompressionComplete': ( '17_compression/compression_dev.html#compressioncomplete', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.CompressionComplete.compress_model': ( '17_compression/compression_dev.html#compressioncomplete.compress_model', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.CompressionComplete.magnitude_prune': ( '17_compression/compression_dev.html#compressioncomplete.magnitude_prune', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.CompressionComplete.measure_sparsity': ( '17_compression/compression_dev.html#compressioncomplete.measure_sparsity', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.CompressionComplete.structured_prune': ( '17_compression/compression_dev.html#compressioncomplete.structured_prune', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.KnowledgeDistillation': ( '17_compression/compression_dev.html#knowledgedistillation', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.KnowledgeDistillation.__init__': ( '17_compression/compression_dev.html#knowledgedistillation.__init__', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.KnowledgeDistillation._cross_entropy': ( '17_compression/compression_dev.html#knowledgedistillation._cross_entropy', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.KnowledgeDistillation._kl_divergence': ( '17_compression/compression_dev.html#knowledgedistillation._kl_divergence', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.KnowledgeDistillation._softmax': ( '17_compression/compression_dev.html#knowledgedistillation._softmax', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.KnowledgeDistillation.distillation_loss': ( '17_compression/compression_dev.html#knowledgedistillation.distillation_loss', - 'tinytorch/optimization/compression.py'), + 'tinytorch.optimization.compression': { 'tinytorch.optimization.compression.Linear': ( '17_compression/compression_dev.html#linear', + 'tinytorch/optimization/compression.py'), + 'tinytorch.optimization.compression.Linear.__init__': ( '17_compression/compression_dev.html#linear.__init__', + 'tinytorch/optimization/compression.py'), + 'tinytorch.optimization.compression.Linear.forward': ( '17_compression/compression_dev.html#linear.forward', + 'tinytorch/optimization/compression.py'), + 'tinytorch.optimization.compression.Linear.parameters': ( '17_compression/compression_dev.html#linear.parameters', + 'tinytorch/optimization/compression.py'), 'tinytorch.optimization.compression.Sequential': ( '17_compression/compression_dev.html#sequential', 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.Sequential.__call__': ( '17_compression/compression_dev.html#sequential.__call__', - 'tinytorch/optimization/compression.py'), 'tinytorch.optimization.compression.Sequential.__init__': ( '17_compression/compression_dev.html#sequential.__init__', 'tinytorch/optimization/compression.py'), 'tinytorch.optimization.compression.Sequential.forward': ( '17_compression/compression_dev.html#sequential.forward', 'tinytorch/optimization/compression.py'), 'tinytorch.optimization.compression.Sequential.parameters': ( '17_compression/compression_dev.html#sequential.parameters', 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.compress_model': ( '17_compression/compression_dev.html#compress_model', + 'tinytorch.optimization.compression.Tensor': ( '17_compression/compression_dev.html#tensor', + 'tinytorch/optimization/compression.py'), + 'tinytorch.optimization.compression.Tensor.__add__': ( '17_compression/compression_dev.html#tensor.__add__', 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.magnitude_prune': ( '17_compression/compression_dev.html#magnitude_prune', + 'tinytorch.optimization.compression.Tensor.__init__': ( '17_compression/compression_dev.html#tensor.__init__', 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.measure_sparsity': ( '17_compression/compression_dev.html#measure_sparsity', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.structured_prune': ( '17_compression/compression_dev.html#structured_prune', - 'tinytorch/optimization/compression.py'), - 'tinytorch.optimization.compression.test_unit_knowledge_distillation': ( '17_compression/compression_dev.html#test_unit_knowledge_distillation', - 'tinytorch/optimization/compression.py')}, - 'tinytorch.optimization.quantization': {}, + 'tinytorch.optimization.compression.Tensor.__mul__': ( '17_compression/compression_dev.html#tensor.__mul__', + 'tinytorch/optimization/compression.py'), + 'tinytorch.optimization.compression.Tensor.__repr__': ( '17_compression/compression_dev.html#tensor.__repr__', + 'tinytorch/optimization/compression.py'), + 'tinytorch.optimization.compression.Tensor.abs': ( '17_compression/compression_dev.html#tensor.abs', + 'tinytorch/optimization/compression.py'), + 'tinytorch.optimization.compression.Tensor.matmul': ( '17_compression/compression_dev.html#tensor.matmul', + 'tinytorch/optimization/compression.py'), + 'tinytorch.optimization.compression.Tensor.sum': ( '17_compression/compression_dev.html#tensor.sum', + 'tinytorch/optimization/compression.py')}, + 'tinytorch.optimization.quantization': { 'tinytorch.optimization.quantization.QuantizationComplete': ( '16_quantization/quantization_dev.html#quantizationcomplete', + 'tinytorch/optimization/quantization.py'), + 'tinytorch.optimization.quantization.QuantizationComplete.compare_models': ( '16_quantization/quantization_dev.html#quantizationcomplete.compare_models', + 'tinytorch/optimization/quantization.py'), + 'tinytorch.optimization.quantization.QuantizationComplete.dequantize_tensor': ( '16_quantization/quantization_dev.html#quantizationcomplete.dequantize_tensor', + 'tinytorch/optimization/quantization.py'), + 'tinytorch.optimization.quantization.QuantizationComplete.quantize_model': ( '16_quantization/quantization_dev.html#quantizationcomplete.quantize_model', + 'tinytorch/optimization/quantization.py'), + 'tinytorch.optimization.quantization.QuantizationComplete.quantize_tensor': ( '16_quantization/quantization_dev.html#quantizationcomplete.quantize_tensor', + 'tinytorch/optimization/quantization.py'), + 'tinytorch.optimization.quantization.dequantize_int8': ( '16_quantization/quantization_dev.html#dequantize_int8', + 'tinytorch/optimization/quantization.py'), + 'tinytorch.optimization.quantization.quantize_int8': ( '16_quantization/quantization_dev.html#quantize_int8', + 'tinytorch/optimization/quantization.py'), + 'tinytorch.optimization.quantization.quantize_model': ( '16_quantization/quantization_dev.html#quantize_model', + 'tinytorch/optimization/quantization.py')}, 'tinytorch.profiling.profiler': { 'tinytorch.profiling.profiler.Profiler': ( '14_profiling/profiling_dev.html#profiler', 'tinytorch/profiling/profiler.py'), 'tinytorch.profiling.profiler.Profiler.__init__': ( '14_profiling/profiling_dev.html#profiler.__init__', @@ -496,8 +456,6 @@ d = { 'settings': { 'branch': 'main', 'tinytorch/profiling/profiler.py')}, 'tinytorch.text.embeddings': { 'tinytorch.text.embeddings.Embedding': ( '11_embeddings/embeddings_dev.html#embedding', 'tinytorch/text/embeddings.py'), - 'tinytorch.text.embeddings.Embedding.__call__': ( '11_embeddings/embeddings_dev.html#embedding.__call__', - 'tinytorch/text/embeddings.py'), 'tinytorch.text.embeddings.Embedding.__init__': ( '11_embeddings/embeddings_dev.html#embedding.__init__', 'tinytorch/text/embeddings.py'), 'tinytorch.text.embeddings.Embedding.__repr__': ( '11_embeddings/embeddings_dev.html#embedding.__repr__', @@ -508,8 +466,6 @@ d = { 'settings': { 'branch': 'main', 'tinytorch/text/embeddings.py'), 'tinytorch.text.embeddings.EmbeddingLayer': ( '11_embeddings/embeddings_dev.html#embeddinglayer', 'tinytorch/text/embeddings.py'), - 'tinytorch.text.embeddings.EmbeddingLayer.__call__': ( '11_embeddings/embeddings_dev.html#embeddinglayer.__call__', - 'tinytorch/text/embeddings.py'), 'tinytorch.text.embeddings.EmbeddingLayer.__init__': ( '11_embeddings/embeddings_dev.html#embeddinglayer.__init__', 'tinytorch/text/embeddings.py'), 'tinytorch.text.embeddings.EmbeddingLayer.__repr__': ( '11_embeddings/embeddings_dev.html#embeddinglayer.__repr__', @@ -520,8 +476,6 @@ d = { 'settings': { 'branch': 'main', 'tinytorch/text/embeddings.py'), 'tinytorch.text.embeddings.PositionalEncoding': ( '11_embeddings/embeddings_dev.html#positionalencoding', 'tinytorch/text/embeddings.py'), - 'tinytorch.text.embeddings.PositionalEncoding.__call__': ( '11_embeddings/embeddings_dev.html#positionalencoding.__call__', - 'tinytorch/text/embeddings.py'), 'tinytorch.text.embeddings.PositionalEncoding.__init__': ( '11_embeddings/embeddings_dev.html#positionalencoding.__init__', 'tinytorch/text/embeddings.py'), 'tinytorch.text.embeddings.PositionalEncoding.__repr__': ( '11_embeddings/embeddings_dev.html#positionalencoding.__repr__', diff --git a/tinytorch/applications/tinygpt.py b/tinytorch/applications/tinygpt.py index 24ccef7b..80dabc9a 100644 --- a/tinytorch/applications/tinygpt.py +++ b/tinytorch/applications/tinygpt.py @@ -1,679 +1,8 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_tinygpt/tinygpt_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/20_capstone/capstone_dev.ipynb. + # %% auto 0 -__all__ = ['TinyGPT', 'test_unit_tinygpt_init', 'TinyGPTTrainer', 'test_unit_training_pipeline', 'CompleteTinyGPTPipeline', - 'test_unit_complete_pipeline'] +__all__ = [] # %% ../../modules/source/20_capstone/capstone_dev.ipynb 2 #| default_exp applications.tinygpt #| export - -# %% ../../modules/source/20_capstone/capstone_dev.ipynb 7 -class TinyGPT: - """ - Complete GPT implementation integrating all TinyTorch modules. - - This class demonstrates how framework components compose into real applications. - Built using modules 01,02,03,11,12,13 as core architecture. - - Architecture: - - Token Embeddings (Module 11) - - Positional Encoding (Module 11) - - Transformer Blocks (Module 13) - - Output Linear Layer (Module 03) - - Language Modeling Head (Module 04) - """ - - def __init__(self, vocab_size: int, embed_dim: int = 128, num_layers: int = 4, - num_heads: int = 4, max_seq_len: int = 256, dropout: float = 0.1): - """ - Initialize TinyGPT with production-inspired architecture. - - TODO: Build a complete GPT model using TinyTorch components - - APPROACH: - 1. Create token embeddings (vocab_size × embed_dim) - 2. Create positional encoding (max_seq_len × embed_dim) - 3. Build transformer layers using TransformerBlock - 4. Add output projection layer - 5. Calculate and report parameter count - - ARCHITECTURE DECISIONS: - - embed_dim=128: Small enough for fast training, large enough for learning - - num_layers=4: Sufficient depth without excessive memory - - num_heads=4: Multi-head attention without head_dim being too small - - max_seq_len=256: Reasonable context length for character-level modeling - - EXAMPLE: - >>> model = TinyGPT(vocab_size=50, embed_dim=128, num_layers=4) - >>> print(f"Parameters: {model.count_parameters():,}") - Parameters: 1,234,567 - - HINTS: - - Use Embedding class for token embeddings - - Use PositionalEncoding for position information - - Stack TransformerBlock instances in a list - - Final Linear layer maps embed_dim → vocab_size - """ - ### BEGIN SOLUTION - self.vocab_size = vocab_size - self.embed_dim = embed_dim - self.num_layers = num_layers - self.num_heads = num_heads - self.max_seq_len = max_seq_len - self.dropout = dropout - - # Token embeddings: convert token IDs to dense vectors - self.token_embedding = Embedding(vocab_size, embed_dim) - - # Positional encoding: add position information - self.positional_encoding = PositionalEncoding(max_seq_len, embed_dim) - - # Transformer layers: core processing - self.transformer_blocks = [] - for _ in range(num_layers): - block = TransformerBlock(embed_dim, num_heads, mlp_ratio=4.0) - self.transformer_blocks.append(block) - - # Output projection: map back to vocabulary - self.output_projection = Linear(embed_dim, vocab_size) - - # Dropout for regularization - self.dropout_layer = Dropout(dropout) - - # Calculate parameter count for systems analysis - self._param_count = self.count_parameters() - print(f"🏗️ TinyGPT initialized: {self._param_count:,} parameters") - print(f"📐 Architecture: {num_layers}L/{num_heads}H/{embed_dim}D") - print(f"💾 Estimated memory: {self._param_count * 4 / 1024 / 1024:.1f}MB") - ### END SOLUTION - -def test_unit_tinygpt_init(): - """🔬 Test TinyGPT initialization and parameter counting.""" - print("🔬 Unit Test: TinyGPT Initialization...") - - # Create a small model for testing - model = TinyGPT(vocab_size=50, embed_dim=64, num_layers=2, num_heads=2, max_seq_len=128) - - # Verify architecture components exist - assert hasattr(model, 'token_embedding') - assert hasattr(model, 'positional_encoding') - assert hasattr(model, 'transformer_blocks') - assert hasattr(model, 'output_projection') - assert len(model.transformer_blocks) == 2 - - # Verify parameter count is reasonable - param_count = model.count_parameters() - assert param_count > 0 - assert param_count < 1000000 # Sanity check for small model - - print(f"✅ Model created with {param_count:,} parameters") - print("✅ TinyGPT initialization works correctly!") - -# Run immediate test -test_unit_tinygpt_init() - -# %% ../../modules/source/20_capstone/capstone_dev.ipynb 10 -class TinyGPTTrainer: - """ - Complete training pipeline integrating optimizers, schedulers, and monitoring. - - Uses modules 05 (autograd), 06 (optimizers), 07 (training) for end-to-end training. - """ - - def __init__(self, model: TinyGPT, tokenizer: CharTokenizer, - learning_rate: float = 3e-4, weight_decay: float = 0.01): - """ - Initialize trainer with model and optimization components. - - TODO: Set up complete training infrastructure - - APPROACH: - 1. Store model and tokenizer references - 2. Initialize AdamW optimizer (standard for transformers) - 3. Initialize loss function (CrossEntropyLoss for language modeling) - 4. Set up learning rate scheduler (cosine schedule) - 5. Initialize training metrics tracking - - PRODUCTION CHOICES: - - AdamW: Better generalization than Adam (weight decay) - - learning_rate=3e-4: Standard for small transformers - - Cosine schedule: Smooth learning rate decay - - CrossEntropy: Standard for classification/language modeling - - EXAMPLE: - >>> model = TinyGPT(vocab_size=100) - >>> tokenizer = CharTokenizer(['a', 'b', 'c']) - >>> trainer = TinyGPTTrainer(model, tokenizer) - >>> print("Trainer ready for training") - Trainer ready for training - - HINTS: - - Get all model parameters with model.parameters() - - Use AdamW with weight_decay for better generalization - - CrossEntropyLoss handles the language modeling objective - """ - ### BEGIN SOLUTION - self.model = model - self.tokenizer = tokenizer - - # Collect all trainable parameters - all_params = [] - all_params.extend(model.token_embedding.parameters()) - for block in model.transformer_blocks: - all_params.extend(block.parameters()) - all_params.extend(model.output_projection.parameters()) - - # Initialize optimizer (AdamW for transformers) - self.optimizer = AdamW( - params=all_params, - lr=learning_rate, - weight_decay=weight_decay, - betas=(0.9, 0.95) # Standard for language models - ) - - # Loss function for next token prediction - self.loss_fn = CrossEntropyLoss() - - # Learning rate scheduler - self.scheduler = CosineSchedule( - optimizer=self.optimizer, - max_epochs=100, # Will adjust based on actual training - min_lr=learning_rate * 0.1 - ) - - # Training metrics - self.training_history = { - 'losses': [], - 'perplexities': [], - 'learning_rates': [], - 'epoch': 0 - } - - print(f"🚀 Trainer initialized:") - print(f" Optimizer: AdamW (lr={learning_rate}, wd={weight_decay})") - print(f" Parameters: {len(all_params):,} tensors") - print(f" Loss: CrossEntropyLoss") - ### END SOLUTION - - def prepare_batch(self, text_batch: List[str], max_length: int = 128) -> Tuple[Tensor, Tensor]: - """ - Convert text batch to input/target tensors for language modeling. - - TODO: Implement text-to-tensor conversion with proper targets - - APPROACH: - 1. Tokenize each text in the batch - 2. Pad/truncate to consistent length - 3. Create input_ids (text) and target_ids (text shifted by 1) - 4. Convert to Tensor format - - LANGUAGE MODELING OBJECTIVE: - - Input: [token1, token2, token3, token4] - - Target: [token2, token3, token4, token5] - - Model predicts next token at each position - - EXAMPLE: - >>> trainer = TinyGPTTrainer(model, tokenizer) - >>> texts = ["hello world", "ai is fun"] - >>> inputs, targets = trainer.prepare_batch(texts) - >>> print(inputs.shape, targets.shape) - (2, 128) (2, 128) - - HINTS: - - Use tokenizer.encode() for text → token conversion - - Pad shorter sequences with tokenizer pad token - - Target sequence is input sequence shifted right by 1 - """ - ### BEGIN SOLUTION - batch_size = len(text_batch) - - # Tokenize all texts - tokenized_batch = [] - for text in text_batch: - tokens = self.tokenizer.encode(text) - - # Truncate or pad to max_length - if len(tokens) > max_length: - tokens = tokens[:max_length] - else: - # Pad with special token (use 0 as pad) - tokens.extend([0] * (max_length - len(tokens))) - - tokenized_batch.append(tokens) - - # Convert to numpy then Tensor - input_ids = Tensor(np.array(tokenized_batch)) # (batch_size, seq_len) - - # Create targets (shifted input for next token prediction) - target_ids = Tensor(np.roll(input_ids.data, -1, axis=1)) # Shift left by 1 - - return input_ids, target_ids - ### END SOLUTION - - def train_step(self, input_ids: Tensor, target_ids: Tensor) -> float: - """ - Single training step with forward, backward, and optimization. - - TODO: Implement complete training step - - APPROACH: - 1. Zero gradients from previous step - 2. Forward pass to get logits - 3. Compute loss between logits and targets - 4. Backward pass to compute gradients - 5. Optimizer step to update parameters - 6. Return loss value for monitoring - - MEMORY MANAGEMENT: - During training, memory usage = 3× model size: - - 1× for parameters - - 1× for gradients - - 1× for optimizer states (Adam moments) - - EXAMPLE: - >>> loss = trainer.train_step(input_ids, target_ids) - >>> print(f"Training loss: {loss:.4f}") - Training loss: 2.3456 - - HINTS: - - Always zero_grad() before forward pass - - Loss should be computed on flattened logits and targets - - Call backward() on the loss tensor - """ - ### BEGIN SOLUTION - # Zero gradients from previous step - self.optimizer.zero_grad() - - # Forward pass - logits = self.model.forward(input_ids) # (batch, seq_len, vocab_size) - - # Reshape for loss computation - batch_size, seq_len, vocab_size = logits.shape - logits_flat = logits.reshape(batch_size * seq_len, vocab_size) - targets_flat = target_ids.reshape(batch_size * seq_len) - - # Compute loss - loss = self.loss_fn.forward(logits_flat, targets_flat) - - # Backward pass - loss.backward() - - # Optimizer step - self.optimizer.step() - - # Return scalar loss for monitoring - return float(loss.data.item() if hasattr(loss.data, 'item') else loss.data) - ### END SOLUTION - -def test_unit_training_pipeline(): - """🔬 Test training pipeline components.""" - print("🔬 Unit Test: Training Pipeline...") - - # Create small model and trainer - model = TinyGPT(vocab_size=50, embed_dim=32, num_layers=2, num_heads=2) - tokenizer = CharTokenizer(['a', 'b', 'c', 'd', 'e', ' ']) - trainer = TinyGPTTrainer(model, tokenizer, learning_rate=1e-3) - - # Test batch preparation - texts = ["hello", "world"] - input_ids, target_ids = trainer.prepare_batch(texts, max_length=8) - - assert input_ids.shape == (2, 8), f"Expected (2, 8), got {input_ids.shape}" - assert target_ids.shape == (2, 8), f"Expected (2, 8), got {target_ids.shape}" - - # Test training step - initial_loss = trainer.train_step(input_ids, target_ids) - assert initial_loss > 0, "Loss should be positive" - - # Second step should work (gradients computed and applied) - second_loss = trainer.train_step(input_ids, target_ids) - assert second_loss > 0, "Second loss should also be positive" - - print(f"✅ Batch preparation shape: {input_ids.shape}") - print(f"✅ Initial loss: {initial_loss:.4f}") - print(f"✅ Second loss: {second_loss:.4f}") - print("✅ Training pipeline works correctly!") - -# Run immediate test -test_unit_training_pipeline() - -# %% ../../modules/source/20_capstone/capstone_dev.ipynb 14 -class CompleteTinyGPTPipeline: - """ - End-to-end ML pipeline demonstrating integration of all 19 modules. - - Pipeline stages: - 1. Data preparation (Module 10: Tokenization) - 2. Model creation (Modules 01-04, 11-13: Architecture) - 3. Training setup (Modules 05-07: Optimization) - 4. Training loop (Module 08: DataLoader) - 5. Optimization (Modules 17-18: Quantization, Pruning) - 6. Evaluation (Module 19: Benchmarking) - 7. Generation (Module 14: KV Caching) - """ - - def __init__(self, vocab_size: int = 100, embed_dim: int = 128, - num_layers: int = 4, num_heads: int = 4): - """ - Initialize complete end-to-end TinyGPT pipeline integrating all 19 modules. - - TODO: Set up a complete ML pipeline with tokenization, model, training, - profiling, and benchmarking components - - APPROACH: - 1. Store model architecture parameters (vocab_size, embed_dim, num_layers, num_heads) - 2. Initialize tokenizer using CharTokenizer from Module 10 with printable ASCII (32-127) - 3. Create TinyGPT model instance with stored parameters and max_seq_len=256 - 4. Setup TinyGPTTrainer for training orchestration with learning_rate=3e-4 - 5. Initialize Profiler (Module 15) and Benchmark (Module 19) for performance analysis - 6. Initialize pipeline state tracking (is_trained flag, training_history list) - 7. Print pipeline initialization summary with parameter count and memory usage - - EXAMPLE: - >>> pipeline = CompleteTinyGPTPipeline(vocab_size=100, embed_dim=128, - ... num_layers=4, num_heads=4) - 🏗️ Complete TinyGPT Pipeline Initialized - Model: 419,300 parameters - Memory: 1.6MB - >>> pipeline.model.count_parameters() - 419300 - >>> pipeline.is_trained - False - >>> len(pipeline.training_history) - 0 - - HINTS: - - CharTokenizer needs list of characters: [chr(i) for i in range(32, 127)] - - TinyGPT requires vocab_size, embed_dim, num_layers, num_heads, max_seq_len - - TinyGPTTrainer takes model, tokenizer, and learning_rate as arguments - - Benchmark expects (models_list, datasets_list, metrics_list) format - - Memory calculation: parameters * 4 bytes / 1024 / 1024 for MB - """ - - ### BEGIN SOLUTION - self.vocab_size = vocab_size - self.embed_dim = embed_dim - self.num_layers = num_layers - self.num_heads = num_heads - - # Stage 1: Initialize tokenizer (Module 10) - self.tokenizer = CharTokenizer([chr(i) for i in range(32, 127)]) # Printable ASCII - - # Stage 2: Create model (Modules 01-04, 11-13) - self.model = TinyGPT( - vocab_size=vocab_size, - embed_dim=embed_dim, - num_layers=num_layers, - num_heads=num_heads, - max_seq_len=256 - ) - - # Stage 3: Setup training (Modules 05-07) - self.trainer = TinyGPTTrainer(self.model, self.tokenizer, learning_rate=3e-4) - - # Stage 4: Initialize profiler and benchmark (Modules 15, 19) - self.profiler = Profiler() - self.benchmark = Benchmark([self.model], [], ["perplexity", "latency"]) - - # Pipeline state - self.is_trained = False - self.training_history = [] - - print("🏗️ Complete TinyGPT Pipeline Initialized") - print(f" Model: {self.model.count_parameters():,} parameters") - print(f" Memory: {self.model.count_parameters() * 4 / 1024 / 1024:.1f}MB") - ### END SOLUTION - - def prepare_training_data(self, text_corpus: List[str], batch_size: int = 8) -> DataLoader: - """ - Prepare training data using DataLoader (Module 08). - - TODO: Create DataLoader for training text data - - APPROACH: - 1. Tokenize all texts in corpus - 2. Create input/target pairs for language modeling - 3. Package into TensorDataset - 4. Create DataLoader with batching and shuffling - - EXAMPLE: - >>> pipeline = CompleteTinyGPTPipeline() - >>> corpus = ["hello world", "ai is amazing"] - >>> dataloader = pipeline.prepare_training_data(corpus, batch_size=2) - >>> print(f"Batches: {len(dataloader)}") - Batches: 1 - """ - ### BEGIN SOLUTION - # Tokenize and prepare training pairs - input_sequences = [] - target_sequences = [] - - for text in text_corpus: - tokens = self.tokenizer.encode(text) - if len(tokens) < 2: - continue # Skip very short texts - - # Create sliding window of input/target pairs - for i in range(len(tokens) - 1): - input_seq = tokens[:i+1] - target_seq = tokens[i+1] - - # Pad input to consistent length - max_len = 32 # Reasonable context window - if len(input_seq) > max_len: - input_seq = input_seq[-max_len:] - else: - input_seq = [0] * (max_len - len(input_seq)) + input_seq - - input_sequences.append(input_seq) - target_sequences.append(target_seq) - - # Convert to tensors - inputs = Tensor(np.array(input_sequences)) - targets = Tensor(np.array(target_sequences)) - - # Create dataset and dataloader - dataset = TensorDataset(inputs, targets) - dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) - - print(f"📚 Training data prepared: {len(dataset)} examples, {len(dataloader)} batches") - return dataloader - ### END SOLUTION - - def train(self, dataloader: DataLoader, epochs: int = 10) -> Dict[str, List[float]]: - """ - Complete training loop with monitoring. - - TODO: Implement full training with progress tracking - - APPROACH: - 1. Loop through epochs - 2. For each batch: forward, backward, optimize - 3. Track loss and perplexity - 4. Update learning rate schedule - 5. Return training history - - EXAMPLE: - >>> history = pipeline.train(dataloader, epochs=5) - >>> print(f"Final loss: {history['losses'][-1]:.4f}") - Final loss: 1.2345 - """ - ### BEGIN SOLUTION - history = {'losses': [], 'perplexities': [], 'epochs': []} - - print(f"🚀 Starting training for {epochs} epochs...") - - for epoch in range(epochs): - epoch_losses = [] - - for batch_idx, (inputs, targets) in enumerate(dataloader): - # Training step - loss = self.trainer.train_step(inputs, targets) - epoch_losses.append(loss) - - # Log progress - if batch_idx % 10 == 0: - perplexity = np.exp(loss) - print(f" Epoch {epoch+1}/{epochs}, Batch {batch_idx}: " - f"Loss={loss:.4f}, PPL={perplexity:.2f}") - - # Epoch summary - avg_loss = np.mean(epoch_losses) - avg_perplexity = np.exp(avg_loss) - - history['losses'].append(avg_loss) - history['perplexities'].append(avg_perplexity) - history['epochs'].append(epoch + 1) - - # Update learning rate - self.trainer.scheduler.step() - - print(f"✅ Epoch {epoch+1} complete: Loss={avg_loss:.4f}, PPL={avg_perplexity:.2f}") - - self.is_trained = True - self.training_history = history - print(f"🎉 Training complete! Final perplexity: {history['perplexities'][-1]:.2f}") - - return history - ### END SOLUTION - - def optimize_model(self, quantize: bool = True, prune_sparsity: float = 0.0): - """ - Apply optimization techniques (Modules 17-18). - - TODO: Apply quantization and pruning optimizations - - APPROACH: - 1. Optionally apply quantization to reduce precision - 2. Optionally apply pruning to remove weights - 3. Measure size reduction - 4. Validate model still works - - EXAMPLE: - >>> pipeline.optimize_model(quantize=True, prune_sparsity=0.5) - Model optimized: 75% size reduction - """ - ### BEGIN SOLUTION - original_params = self.model.count_parameters() - original_memory = original_params * 4 / (1024 * 1024) - - optimizations_applied = [] - - if quantize: - # Apply quantization (simulated) - # In real implementation, would use quantize_model() - quantized_memory = original_memory / 4 # INT8 vs FP32 - optimizations_applied.append(f"INT8 quantization (4× memory reduction)") - print(" Applied INT8 quantization") - - if prune_sparsity > 0: - # Apply pruning (simulated) - # In real implementation, would use magnitude_prune() - remaining_weights = 1 - prune_sparsity - optimizations_applied.append(f"{prune_sparsity:.0%} pruning ({remaining_weights:.0%} weights remain)") - print(f" Applied {prune_sparsity:.0%} magnitude pruning") - - # Calculate final size - size_reduction = 1.0 - if quantize: - size_reduction *= 0.25 # 4× smaller - if prune_sparsity > 0: - size_reduction *= (1 - prune_sparsity) - - final_memory = original_memory * size_reduction - reduction_factor = original_memory / final_memory - - print(f"🔧 Model optimization complete:") - print(f" Original: {original_memory:.1f}MB") - print(f" Optimized: {final_memory:.1f}MB") - print(f" Reduction: {reduction_factor:.1f}× smaller") - print(f" Applied: {', '.join(optimizations_applied)}") - ### END SOLUTION - - def generate_text(self, prompt: str, max_tokens: int = 50) -> str: - """ - Generate text using the trained model. - - TODO: Implement text generation with proper encoding/decoding - - APPROACH: - 1. Encode prompt to token IDs - 2. Use model.generate() for autoregressive generation - 3. Decode generated tokens back to text - 4. Return generated text - - EXAMPLE: - >>> text = pipeline.generate_text("Hello", max_tokens=10) - >>> print(f"Generated: {text}") - Generated: Hello world this is AI - """ - ### BEGIN SOLUTION - if not self.is_trained: - print("⚠️ Model not trained yet. Generating with random weights.") - - # Encode prompt - prompt_tokens = self.tokenizer.encode(prompt) - prompt_tensor = Tensor([prompt_tokens]) - - # Generate tokens - generated_tokens = self.model.generate( - prompt_tensor, - max_new_tokens=max_tokens, - temperature=0.8, - use_cache=True - ) - - # Decode to text - all_tokens = generated_tokens.data[0].tolist() - generated_text = self.tokenizer.decode(all_tokens) - - return generated_text - ### END SOLUTION - -def test_unit_complete_pipeline(): - """🔬 Test complete pipeline integration.""" - print("🔬 Unit Test: Complete Pipeline Integration...") - - # Create pipeline - pipeline = CompleteTinyGPTPipeline(vocab_size=50, embed_dim=32, num_layers=2) - - # Test data preparation - corpus = ["hello world", "ai is fun", "machine learning"] - dataloader = pipeline.prepare_training_data(corpus, batch_size=2) - assert len(dataloader) > 0, "DataLoader should have batches" - - # Test training (minimal) - history = pipeline.train(dataloader, epochs=1) - assert 'losses' in history, "History should contain losses" - assert len(history['losses']) == 1, "Should have one epoch of losses" - - # Test optimization - pipeline.optimize_model(quantize=True, prune_sparsity=0.5) - - # Test generation - generated = pipeline.generate_text("hello", max_tokens=5) - assert isinstance(generated, str), "Generated output should be string" - assert len(generated) > 0, "Generated text should not be empty" - - print(f"✅ Pipeline stages completed successfully") - print(f"✅ Training history: {len(history['losses'])} epochs") - print(f"✅ Generated text: '{generated[:20]}...'") - print("✅ Complete pipeline integration works!") - -# Run immediate test -test_unit_complete_pipeline() diff --git a/tinytorch/benchmarking/benchmark.py b/tinytorch/benchmarking/benchmark.py index f6572c55..83b81eac 100644 --- a/tinytorch/benchmarking/benchmark.py +++ b/tinytorch/benchmarking/benchmark.py @@ -1,22 +1,8 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_benchmark/benchmark_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/19_benchmarking/benchmarking_dev.ipynb. + # %% auto 0 __all__ = ['OlympicEvent', 'Benchmark', 'test_unit_benchmark', 'BenchmarkSuite', 'test_unit_benchmark_suite', 'TinyMLPerf', - 'test_unit_tinymlperf'] + 'test_unit_tinymlperf', 'calculate_normalized_scores'] # %% ../../modules/source/19_benchmarking/benchmarking_dev.ipynb 0 #| default_exp benchmarking.benchmark @@ -72,7 +58,7 @@ class Benchmark: self.measurement_runs = measurement_runs self.results = {} - # Use Profiler from Module 14 for measurements + # Use Profiler from Module 15 for measurements self.profiler = Profiler() # System information for metadata @@ -1024,3 +1010,53 @@ def test_unit_tinymlperf(): print("✅ TinyMLPerf works correctly!") test_unit_tinymlperf() + +# %% ../../modules/source/19_benchmarking/benchmarking_dev.ipynb 24 +def calculate_normalized_scores(baseline_results: dict, + optimized_results: dict) -> dict: + """ + Calculate normalized performance metrics for fair competition comparison. + + This function converts absolute measurements into relative improvements, + enabling fair comparison across different hardware platforms. + + Args: + baseline_results: Dict with keys: 'latency', 'memory', 'accuracy' + optimized_results: Dict with same keys as baseline_results + + Returns: + Dict with normalized metrics: + - speedup: Relative latency improvement (higher is better) + - compression_ratio: Relative memory reduction (higher is better) + - accuracy_delta: Absolute accuracy change (closer to 0 is better) + - efficiency_score: Combined metric balancing all factors + + Example: + >>> baseline = {'latency': 100.0, 'memory': 12.0, 'accuracy': 0.89} + >>> optimized = {'latency': 40.0, 'memory': 3.0, 'accuracy': 0.87} + >>> scores = calculate_normalized_scores(baseline, optimized) + >>> print(f"Speedup: {scores['speedup']:.2f}x") + Speedup: 2.50x + """ + # Calculate speedup (higher is better) + speedup = baseline_results['latency'] / optimized_results['latency'] + + # Calculate compression ratio (higher is better) + compression_ratio = baseline_results['memory'] / optimized_results['memory'] + + # Calculate accuracy delta (closer to 0 is better, negative means degradation) + accuracy_delta = optimized_results['accuracy'] - baseline_results['accuracy'] + + # Calculate efficiency score (combined metric) + # Penalize accuracy loss: the more accuracy you lose, the lower your score + accuracy_penalty = max(1.0, 1.0 - accuracy_delta) if accuracy_delta < 0 else 1.0 + efficiency_score = (speedup * compression_ratio) / accuracy_penalty + + return { + 'speedup': speedup, + 'compression_ratio': compression_ratio, + 'accuracy_delta': accuracy_delta, + 'efficiency_score': efficiency_score, + 'baseline': baseline_results.copy(), + 'optimized': optimized_results.copy() + } diff --git a/tinytorch/competition/submit.py b/tinytorch/competition/submit.py index da8585d6..a1a9d6d7 100644 --- a/tinytorch/competition/submit.py +++ b/tinytorch/competition/submit.py @@ -1,22 +1,8 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_submit/submit_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/20_competition/competition_dev.ipynb. + # %% auto 0 __all__ = ['validate_installation', 'load_baseline_model', 'generate_baseline', 'worked_example_optimization', - 'optimize_for_competition', 'generate_submission'] + 'optimize_for_competition', 'validate_submission', 'generate_submission'] # %% ../../modules/source/20_competition/competition_dev.ipynb 4 import numpy as np @@ -24,6 +10,8 @@ import json import time from pathlib import Path from typing import Dict, List, Tuple, Any, Optional +from ..benchmarking.benchmark import Benchmark, calculate_normalized_scores +from ..profiling.profiler import Profiler def validate_installation() -> Dict[str, bool]: """ @@ -362,31 +350,24 @@ def worked_example_optimization(): return submission # %% ../../modules/source/20_competition/competition_dev.ipynb 10 -def optimize_for_competition(baseline_model, event: str = "all_around"): +def optimize_for_competition(baseline_model, event: str = "all_around", division: str = "closed"): """ 🏅 YOUR COMPETITION ENTRY - IMPLEMENT YOUR STRATEGY HERE! - This is where you apply optimization techniques from Modules 14-18. - - Available techniques: - - Module 14: KV Caching (for transformers) - enable_kv_cache() - - Module 16: Acceleration (vectorization, fusion) - - Module 17: Quantization (INT8, INT4) - quantize_model() - - Module 18: Compression (pruning) - magnitude_prune() - Args: - baseline_model: The unoptimized model - event: Which Olympic event you're competing in + baseline_model: Starting model (use for Closed, optional for Open) + event: Category you're competing in - "latency_sprint": Minimize latency - "memory_challenge": Minimize memory - "accuracy_contest": Maximize accuracy - "all_around": Best balance - "extreme_push": Most aggressive + division: "closed" or "open" - which track you chose Returns: Your optimized model - Example: + 🔒 CLOSED DIVISION Example: from tinytorch.optimization.quantization import quantize_model from tinytorch.optimization.compression import magnitude_prune @@ -394,6 +375,15 @@ def optimize_for_competition(baseline_model, event: str = "all_around"): optimized = quantize_model(optimized, bits=8) optimized = magnitude_prune(optimized, sparsity=0.7) return optimized + + 🔓 OPEN DIVISION Example: + # Build your own model OR + # Use your improved implementations from earlier modules + # (after you've modified and re-exported them) + + from tinytorch.models import YourCustomArchitecture + optimized = YourCustomArchitecture() + return optimized """ print(f"🏅 YOUR OPTIMIZATION STRATEGY FOR: {event}") @@ -438,74 +428,201 @@ def optimize_for_competition(baseline_model, event: str = "all_around"): return optimized_model +#| export +def validate_submission(submission: Dict[str, Any]) -> Dict[str, Any]: + """ + Validate competition submission with sanity checks. + + This catches honest mistakes like unrealistic speedups or accidental training. + Honor code system - we trust but verify basic reasonableness. + + Args: + submission: Submission dictionary to validate + + Returns: + Dict with validation results and warnings + """ + checks = [] + warnings = [] + errors = [] + + # Extract metrics + normalized = submission.get("normalized_scores", {}) + speedup = normalized.get("speedup", 1.0) + compression = normalized.get("compression_ratio", 1.0) + accuracy_delta = normalized.get("accuracy_delta", 0.0) + + # Check 1: Speedup is reasonable (not claiming impossible gains) + if speedup > 50: + errors.append(f"❌ Speedup {speedup:.1f}x seems unrealistic (>50x)") + elif speedup > 20: + warnings.append(f"⚠️ Speedup {speedup:.1f}x is very high - please verify measurements") + else: + checks.append(f"✅ Speedup {speedup:.2f}x is reasonable") + + # Check 2: Compression is reasonable + if compression > 32: + errors.append(f"❌ Compression {compression:.1f}x seems unrealistic (>32x)") + elif compression > 16: + warnings.append(f"⚠️ Compression {compression:.1f}x is very high - please verify") + else: + checks.append(f"✅ Compression {compression:.2f}x is reasonable") + + # Check 3: Accuracy didn't improve (Closed Division rule - no training allowed!) + division = submission.get("division", "closed") + if division == "closed" and accuracy_delta > 1.0: + errors.append(f"❌ Accuracy improved by {accuracy_delta:.1f}pp - did you accidentally train the model?") + elif accuracy_delta > 0.5: + warnings.append(f"⚠️ Accuracy improved by {accuracy_delta:.1f}pp - verify no training occurred") + else: + checks.append(f"✅ Accuracy change {accuracy_delta:+.2f}pp is reasonable") + + # Check 4: GitHub repo provided + github_repo = submission.get("github_repo", "") + if not github_repo or github_repo == "": + warnings.append("⚠️ No GitHub repo provided - required for verification") + else: + checks.append(f"✅ GitHub repo provided: {github_repo}") + + # Check 5: Required fields present + required_fields = ["division", "event", "athlete_name", "baseline", "optimized", "normalized_scores"] + missing = [f for f in required_fields if f not in submission] + if missing: + errors.append(f"❌ Missing required fields: {', '.join(missing)}") + else: + checks.append("✅ All required fields present") + + # Check 6: Techniques documented + techniques = submission.get("techniques_applied", []) + if not techniques or "TODO" in str(techniques): + warnings.append("⚠️ No optimization techniques listed") + else: + checks.append(f"✅ Techniques documented: {', '.join(techniques[:3])}...") + + return { + "valid": len(errors) == 0, + "checks": checks, + "warnings": warnings, + "errors": errors + } + +#| export def generate_submission(baseline_model, optimized_model, + division: str = "closed", event: str = "all_around", athlete_name: str = "YourName", + github_repo: str = "", techniques: List[str] = None) -> Dict[str, Any]: """ - Generate standardized competition submission. + Generate standardized TinyMLPerf competition submission with normalized scoring. Args: baseline_model: Original unoptimized model optimized_model: Your optimized model - event: Olympic event name - athlete_name: Your name for leaderboard - techniques: List of techniques applied + division: "closed" or "open" + event: Competition category (latency_sprint, memory_challenge, all_around, etc.) + athlete_name: Your name for submission + github_repo: GitHub repository URL for code verification + techniques: List of optimization techniques applied Returns: Submission dictionary (will be saved as JSON) """ - print("📤 Generating Competition Submission...") + print("📤 Generating TinyMLPerf Competition Submission...") print("=" * 70) # Get baseline metrics baseline_metrics = generate_baseline(quick=True) - # For demonstration, estimate optimized metrics - # In real competition, this would benchmark the actual optimized model + # Benchmark optimized model print("🔬 Benchmarking optimized model...") - # Placeholder: Students' actual optimizations would be measured here + # Use Profiler and Benchmark from Module 19 + profiler = Profiler() + + # For demonstration, we'll use placeholder metrics + # In real competition, students would measure their actual optimized model optimized_metrics = { - "model": "Your_Optimized_Model", - "accuracy": 84.0, # Measured - "latency_ms": 28.0, # Measured - "memory_mb": 4.0, # Measured - "parameters": 2000000, # Measured + "model": getattr(optimized_model, 'name', 'Optimized_Model'), + "accuracy": 84.0, # Would be measured with actual test set + "latency_ms": 28.0, # Would be measured with profiler + "memory_mb": 4.0, # Would be measured with profiler + "parameters": 2000000, # Would be counted } - # Calculate improvements - improvements = { - "accuracy_change": optimized_metrics["accuracy"] - baseline_metrics["accuracy"], - "latency_speedup": baseline_metrics["latency_ms"] / optimized_metrics["latency_ms"], - "memory_reduction": baseline_metrics["memory_mb"] / optimized_metrics["memory_mb"], + # Calculate normalized scores using Module 19's function + baseline_for_norm = { + "latency": baseline_metrics["latency_ms"], + "memory": baseline_metrics["memory_mb"], + "accuracy": baseline_metrics["accuracy"] } - # Create submission + optimized_for_norm = { + "latency": optimized_metrics["latency_ms"], + "memory": optimized_metrics["memory_mb"], + "accuracy": optimized_metrics["accuracy"] + } + + normalized_scores = calculate_normalized_scores(baseline_for_norm, optimized_for_norm) + + # Create submission with all required fields submission = { + "division": division, "event": event, "athlete_name": athlete_name, + "github_repo": github_repo, "baseline": baseline_metrics, "optimized": optimized_metrics, - "improvements": improvements, - "techniques_applied": techniques or ["TODO: List your techniques"], + "normalized_scores": { + "speedup": normalized_scores["speedup"], + "compression_ratio": normalized_scores["compression_ratio"], + "accuracy_delta": normalized_scores["accuracy_delta"], + "efficiency_score": normalized_scores["efficiency_score"] + }, + "techniques_applied": techniques or ["TODO: Document your optimization techniques"], "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), + "tinytorch_version": "0.1.0", + "honor_code": False # Must be explicitly set to True after validation } + # Validate submission + print("\n🔍 Validating submission...") + validation = validate_submission(submission) + + # Display validation results + print("\n📋 Validation Results:") + for check in validation["checks"]: + print(f" {check}") + for warning in validation["warnings"]: + print(f" {warning}") + for error in validation["errors"]: + print(f" {error}") + + if not validation["valid"]: + print("\n❌ Submission has errors - please fix before submitting") + return submission + # Save to JSON output_file = Path("submission.json") with open(output_file, "w") as f: json.dump(submission, f, indent=2) - print(f"✅ Submission saved to: {output_file}") + print(f"\n✅ Submission saved to: {output_file}") print() - print("📊 Your Results:") - print(f" Event: {event}") - print(f" Accuracy: {optimized_metrics['accuracy']:.1f}% (Δ {improvements['accuracy_change']:+.1f}pp)") - print(f" Latency: {optimized_metrics['latency_ms']:.1f}ms ({improvements['latency_speedup']:.2f}x faster)") - print(f" Memory: {optimized_metrics['memory_mb']:.2f}MB ({improvements['memory_reduction']:.2f}x smaller)") + print("📊 Your Normalized Scores (MLPerf-style):") + print(f" Division: {division.upper()}") + print(f" Event: {event.replace('_', ' ').title()}") + print(f" Speedup: {normalized_scores['speedup']:.2f}x faster ⚡") + print(f" Compression: {normalized_scores['compression_ratio']:.2f}x smaller 💾") + print(f" Accuracy: {optimized_metrics['accuracy']:.1f}% (Δ {normalized_scores['accuracy_delta']:+.2f}pp)") + print(f" Efficiency: {normalized_scores['efficiency_score']:.2f}") + print() + print("📤 Next Steps:") + print(" 1. Verify all metrics are correct") + print(" 2. Push your code to GitHub (if not done)") + print(" 3. Run: tito submit submission.json") + print(" (This will validate and prepare final submission)") print() - print("📤 Upload submission.json to TorchPerf Olympics platform!") print("=" * 70) return submission diff --git a/tinytorch/core/activations.py b/tinytorch/core/activations.py index 849ad752..c7fcb702 100644 --- a/tinytorch/core/activations.py +++ b/tinytorch/core/activations.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/03_activations/activations_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/02_activations/activations_dev.ipynb. + # %% auto 0 __all__ = ['Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax'] diff --git a/tinytorch/core/attention.py b/tinytorch/core/attention.py index 14743a7b..fd17103a 100644 --- a/tinytorch/core/attention.py +++ b/tinytorch/core/attention.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/07_attention/attention_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/12_attention/attention_dev.ipynb. + # %% auto 0 __all__ = ['scaled_dot_product_attention', 'MultiHeadAttention'] @@ -293,6 +279,10 @@ class MultiHeadAttention: return output ### END SOLUTION + def __call__(self, x: Tensor, mask: Optional[Tensor] = None) -> Tensor: + """Allows the attention layer to be called like a function.""" + return self.forward(x, mask) + def parameters(self) -> List[Tensor]: """ Return all trainable parameters. diff --git a/tinytorch/core/autograd.py b/tinytorch/core/autograd.py index 1a71c287..ce0be66f 100644 --- a/tinytorch/core/autograd.py +++ b/tinytorch/core/autograd.py @@ -1,23 +1,8 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/09_autograd/autograd_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/05_autograd/autograd_dev.ipynb. + # %% auto 0 -__all__ = ['Function', 'AddBackward', 'MulBackward', 'SubBackward', 'DivBackward', 'MatmulBackward', 'TransposeBackward', - 'PermuteBackward', 'EmbeddingBackward', 'ReshapeBackward', 'SumBackward', 'ReLUBackward', 'SigmoidBackward', - 'SoftmaxBackward', 'GELUBackward', 'MSEBackward', 'BCEBackward', 'CrossEntropyBackward', 'enable_autograd'] +__all__ = ['Function', 'AddBackward', 'MulBackward', 'MatmulBackward', 'SumBackward', 'ReLUBackward', 'SigmoidBackward', + 'MSEBackward', 'BCEBackward', 'CrossEntropyBackward', 'enable_autograd'] # %% ../../modules/source/05_autograd/autograd_dev.ipynb 1 import numpy as np @@ -164,66 +149,7 @@ class MulBackward(Function): return grad_a, grad_b -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 13 -class SubBackward(Function): - """ - Gradient computation for tensor subtraction. - - **Mathematical Rule:** If z = a - b, then ∂z/∂a = 1 and ∂z/∂b = -1 - """ - - def apply(self, grad_output): - """ - Compute gradients for subtraction. - - Returns: - Tuple of (grad_a, grad_b) where grad_b is negated - """ - a, b = self.saved_tensors - grad_a = grad_b = None - - if isinstance(a, Tensor) and a.requires_grad: - grad_a = grad_output # ∂(a-b)/∂a = 1 - - if isinstance(b, Tensor) and b.requires_grad: - grad_b = -grad_output # ∂(a-b)/∂b = -1 (note the negative!) - - return grad_a, grad_b - -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 15 -class DivBackward(Function): - """ - Gradient computation for tensor division. - - **Mathematical Rule:** If z = a / b, then: - - ∂z/∂a = 1/b - - ∂z/∂b = -a/b² - """ - - def apply(self, grad_output): - """ - Compute gradients for division using quotient rule. - - Returns: - Tuple of (grad_a, grad_b) - """ - a, b = self.saved_tensors - grad_a = grad_b = None - - if isinstance(a, Tensor) and a.requires_grad: - # ∂(a/b)/∂a = 1/b - if isinstance(b, Tensor): - grad_a = grad_output / b.data - else: - grad_a = grad_output / b - - if isinstance(b, Tensor) and b.requires_grad: - # ∂(a/b)/∂b = -a/b² - grad_b = -grad_output * a.data / (b.data ** 2) - - return grad_a, grad_b - -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 17 +# %% ../../modules/source/05_autograd/autograd_dev.ipynb 14 class MatmulBackward(Function): """ Gradient computation for matrix multiplication. @@ -252,242 +178,21 @@ class MatmulBackward(Function): **Mathematical Foundation:** - ∂(A@B)/∂A = grad_output @ B.T - ∂(A@B)/∂B = A.T @ grad_output - - **Batched Operation:** For 3D+ tensors, we transpose only the last two - dimensions using np.swapaxes, preserving batch dimensions. """ a, b = self.saved_tensors grad_a = grad_b = None # Gradient for first input: grad_output @ b.T if isinstance(a, Tensor) and a.requires_grad: - # For batched tensors, transpose only last two dims - if b.data.ndim >= 2: - b_T = np.swapaxes(b.data, -2, -1) - else: - b_T = b.data.T - grad_a = np.matmul(grad_output, b_T) + grad_a = np.dot(grad_output, b.data.T) # Gradient for second input: a.T @ grad_output if isinstance(b, Tensor) and b.requires_grad: - # For batched tensors, transpose only last two dims - if a.data.ndim >= 2: - a_T = np.swapaxes(a.data, -2, -1) - else: - a_T = a.data.T - grad_b = np.matmul(a_T, grad_output) + grad_b = np.dot(a.data.T, grad_output) return grad_a, grad_b -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 18 -class TransposeBackward(Function): - """ - Gradient computation for transpose operation. - - **Mathematical Rule:** If Y = X.T, then: - - ∂Y/∂X = grad_Y.T - - **Key Insight:** The gradient of transpose is just transpose the gradient! - This is because transpose is a linear operation that just rearranges elements. - - **Applications:** Used in attention (K.T for scores), weight gradients (W.T), - and any operation that needs to swap matrix dimensions. - """ - - def __init__(self, tensor, dim0, dim1): - """ - Args: - tensor: Input tensor - dim0: First dimension to swap (None for default) - dim1: Second dimension to swap (None for default) - """ - super().__init__(tensor) - self.dim0 = dim0 - self.dim1 = dim1 - - def apply(self, grad_output): - """ - Compute gradient for transpose. - - Args: - grad_output: Gradient flowing backward from output - - Returns: - Tuple with single gradient for input tensor - - **Mathematical Foundation:** - - ∂(X.T)/∂X = grad_output.T - - Just transpose the gradient back! - """ - x, = self.saved_tensors - grad_x = None - - if isinstance(x, Tensor) and x.requires_grad: - # Transpose gradient using the same dims - if self.dim0 is None and self.dim1 is None: - # Default: transpose last two dimensions - if grad_output.ndim < 2: - grad_x = grad_output.copy() - else: - axes = list(range(grad_output.ndim)) - axes[-2], axes[-1] = axes[-1], axes[-2] - grad_x = np.transpose(grad_output, axes) - else: - # Specific dimensions: swap them back - axes = list(range(grad_output.ndim)) - axes[self.dim0], axes[self.dim1] = axes[self.dim1], axes[self.dim0] - grad_x = np.transpose(grad_output, axes) - - return (grad_x,) - -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 19 -class PermuteBackward(Function): - """ - Gradient computation for arbitrary axis permutation (general transpose). - - **Mathematical Rule:** If Y = X.permute(axes), then: - - ∂Y/∂X = grad_Y.permute(inverse_axes) - - **Example:** If axes = (0, 2, 1, 3), the inverse is (0, 2, 1, 3) (self-inverse). - More generally, if axes = (2, 0, 1), the inverse is (1, 2, 0). - - **Key Insight:** To reverse a permutation, we need to know where each axis went. - If axis i went to position axes[i], then in the inverse, position axes[i] should go to i. - - **Applications:** Multi-head attention uses (0, 2, 1, 3) to rearrange heads. - """ - - def __init__(self, tensor, axes): - """ - Args: - tensor: Input tensor - axes: Tuple of axis indices defining the permutation - """ - super().__init__(tensor) - self.axes = axes - # Compute inverse permutation: if axes[i] = j, then inverse_axes[j] = i - self.inverse_axes = tuple(np.argsort(axes)) - - def apply(self, grad_output): - """ - Compute gradient for permutation. - - The gradient is permuted back using the inverse permutation. - - **Mathematical Foundation:** - - ∂(X.permute(axes))/∂X = grad_output.permute(inverse_axes) - """ - x, = self.saved_tensors - grad_x = None - - if isinstance(x, Tensor) and x.requires_grad: - # Permute gradient back to original axis order - grad_x = np.transpose(grad_output, self.inverse_axes) - - return (grad_x,) - -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 20 -class EmbeddingBackward(Function): - """ - Gradient computation for embedding lookup operation. - - **Mathematical Rule:** If Y = Embedding[indices], then: - - ∂Loss/∂Embedding[i] = sum of all gradients where index==i - - **Key Insight:** Embedding lookup is a gather operation. The backward - is a scatter operation that accumulates gradients to the embedding weights. - - **Applications:** Word embeddings, positional embeddings, token embeddings - in transformers. - """ - - def __init__(self, weight, indices): - """ - Args: - weight: Embedding weight matrix - indices: Indices used for lookup - """ - super().__init__(weight) - self.indices = indices - - def apply(self, grad_output): - """ - Compute gradient for embedding lookup. - - Args: - grad_output: Gradient flowing backward from output - - Returns: - Tuple with single gradient for weight tensor - - **Mathematical Foundation:** - - ∂(Embedding[indices])/∂Embedding = scatter gradients to selected rows - - Multiple indices can point to same embedding → gradients accumulate - """ - weight, = self.saved_tensors - grad_weight = None - - if isinstance(weight, Tensor) and weight.requires_grad: - # Initialize gradient with zeros - grad_weight = np.zeros_like(weight.data) - - # Scatter gradients back to embedding weights - # np.add.at accumulates gradients for repeated indices - indices_flat = self.indices.data.astype(int).flatten() - grad_output_reshaped = grad_output.reshape(-1, grad_output.shape[-1]) - - np.add.at(grad_weight, indices_flat, grad_output_reshaped) - - return (grad_weight,) - -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 21 -class ReshapeBackward(Function): - """ - Gradient computation for reshape operation. - - **Mathematical Rule:** If Y = X.reshape(new_shape), then: - - ∂Y/∂X = grad_Y.reshape(X.shape) - - **Key Insight:** Reshape just rearranges the same elements. - The gradient is simply reshaped back to the original shape! - - **Applications:** Flattening tensors for linear layers, reshaping - between convolutional and dense layers. - """ - - def __init__(self, tensor, original_shape): - """ - Args: - tensor: Input tensor - original_shape: Shape before reshape - """ - super().__init__(tensor) - self.original_shape = original_shape - - def apply(self, grad_output): - """ - Compute gradient for reshape. - - Args: - grad_output: Gradient flowing backward from output - - Returns: - Tuple with single gradient for input tensor - - **Mathematical Foundation:** - - ∂(X.reshape(...))/∂X = grad_output.reshape(X.shape) - - Just reshape the gradient back! - """ - x, = self.saved_tensors - grad_x = None - - if isinstance(x, Tensor) and x.requires_grad: - # Reshape gradient back to original shape - grad_x = grad_output.reshape(self.original_shape) - - return (grad_x,) - -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 23 +# %% ../../modules/source/05_autograd/autograd_dev.ipynb 16 class SumBackward(Function): """ Gradient computation for tensor sum. @@ -521,7 +226,7 @@ class SumBackward(Function): return np.ones_like(tensor.data) * grad_output, return None, -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 28 +# %% ../../modules/source/05_autograd/autograd_dev.ipynb 23 class ReLUBackward(Function): """ Gradient computation for ReLU activation. @@ -544,7 +249,7 @@ class ReLUBackward(Function): return grad_output * relu_grad, return None, -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 29 +# %% ../../modules/source/05_autograd/autograd_dev.ipynb 25 class SigmoidBackward(Function): """ Gradient computation for sigmoid activation. @@ -574,101 +279,7 @@ class SigmoidBackward(Function): return grad_output * sigmoid_grad, return None, -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 30 -class SoftmaxBackward(Function): - """ - Gradient computation for softmax activation. - - Softmax: softmax(x)[i] = exp(x[i]) / sum(exp(x)) - Derivative: ∂softmax/∂x[i] = softmax[i] * (δ[i,j] - softmax[j]) - - For gradient computation: - grad_x[i] = softmax[i] * (grad_y[i] - sum(grad_y * softmax)) - - **Key Insight:** The gradient depends on all elements of softmax due to - the normalization, not just the element being differentiated. - """ - - def __init__(self, input_tensor, output_tensor, dim=-1): - """ - Initialize with input, output, and dimension. - - Args: - input_tensor: Original input to softmax - output_tensor: Output of softmax (needed for gradient) - dim: Dimension along which softmax was applied - """ - super().__init__(input_tensor) - self.output_data = output_tensor.data - self.dim = dim - - def apply(self, grad_output): - """ - Compute gradient for softmax. - - Mathematical formula: - ∂L/∂x[i] = softmax[i] * (∂L/∂y[i] - sum_j(∂L/∂y[j] * softmax[j])) - - This can be vectorized as: - grad_x = softmax * (grad_y - sum(grad_y * softmax, keepdims=True)) - """ - tensor, = self.saved_tensors - - if isinstance(tensor, Tensor) and tensor.requires_grad: - # Compute sum(grad_output * softmax) along the softmax dimension - sum_term = np.sum(grad_output * self.output_data, axis=self.dim, keepdims=True) - - # Softmax gradient: softmax * (grad_output - sum_term) - grad_x = self.output_data * (grad_output - sum_term) - - return (grad_x,) - return (None,) - -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 31 -class GELUBackward(Function): - """ - Gradient computation for GELU activation. - - GELU: f(x) = x * Φ(x) where Φ is the CDF of standard normal - Approximation: gelu(x) ≈ 0.5 * x * (1 + tanh(√(2/π) * (x + 0.044715 * x³))) - - **Key Insight:** GELU is smoother than ReLU, providing non-zero gradients - for negative values, which helps training deep networks. - """ - - def __init__(self, input_tensor): - """Initialize with input tensor.""" - super().__init__(input_tensor) - - def apply(self, grad_output): - """ - Compute gradient for GELU. - - Mathematical formula (using approximation): - ∂gelu/∂x ≈ 0.5 * (1 + tanh(...)) + 0.5 * x * sech²(...) * (...) - - Simplified: We compute the derivative numerically or use the formula. - """ - tensor, = self.saved_tensors - - if isinstance(tensor, Tensor) and tensor.requires_grad: - x = tensor.data - # GELU derivative approximation - # Using the tanh approximation: gelu(x) ≈ 0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^3))) - sqrt_2_over_pi = np.sqrt(2.0 / np.pi) - x_cubed = x ** 3 - tanh_arg = sqrt_2_over_pi * (x + 0.044715 * x_cubed) - tanh_out = np.tanh(tanh_arg) - sech_squared = 1 - tanh_out ** 2 - - # Derivative: 0.5 * (1 + tanh(...)) + 0.5 * x * sech²(...) * d(tanh_arg)/dx - d_tanh_arg = sqrt_2_over_pi * (1 + 0.134145 * x ** 2) - gelu_grad = 0.5 * (1 + tanh_out) + 0.5 * x * sech_squared * d_tanh_arg - - return (grad_output * gelu_grad,) - return (None,) - -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 32 +# %% ../../modules/source/05_autograd/autograd_dev.ipynb 26 class MSEBackward(Function): """ Gradient computation for Mean Squared Error Loss. @@ -694,7 +305,7 @@ class MSEBackward(Function): return grad * grad_output, return None, -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 33 +# %% ../../modules/source/05_autograd/autograd_dev.ipynb 27 class BCEBackward(Function): """ Gradient computation for Binary Cross-Entropy Loss. @@ -724,7 +335,7 @@ class BCEBackward(Function): return grad * grad_output, return None, -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 34 +# %% ../../modules/source/05_autograd/autograd_dev.ipynb 28 class CrossEntropyBackward(Function): """ Gradient computation for Cross-Entropy Loss. @@ -769,7 +380,7 @@ class CrossEntropyBackward(Function): return grad * grad_output, return None, -# %% ../../modules/source/05_autograd/autograd_dev.ipynb 35 +# %% ../../modules/source/05_autograd/autograd_dev.ipynb 29 def enable_autograd(): """ Enable gradient tracking for all Tensor operations. @@ -806,12 +417,8 @@ def enable_autograd(): # Store original operations _original_add = Tensor.__add__ - _original_sub = Tensor.__sub__ _original_mul = Tensor.__mul__ - _original_div = Tensor.__truediv__ _original_matmul = Tensor.matmul if hasattr(Tensor, 'matmul') else None - _original_transpose = Tensor.transpose if hasattr(Tensor, 'transpose') else None - _original_reshape = Tensor.reshape if hasattr(Tensor, 'reshape') else None # Enhanced operations that track gradients def tracked_add(self, other): @@ -878,98 +485,6 @@ def enable_autograd(): return result - def tracked_transpose(self, dim0=None, dim1=None): - """ - Transpose with gradient tracking. - - Enhances the original transpose method to build computation graphs - when requires_grad=True for the input. - """ - if _original_transpose: - result = _original_transpose(self, dim0, dim1) - else: - # Fallback if transpose doesn't exist - if dim0 is None and dim1 is None: - axes = list(range(len(self.shape))) - if len(axes) >= 2: - axes[-2], axes[-1] = axes[-1], axes[-2] - result = Tensor(np.transpose(self.data, axes)) - else: - axes = list(range(len(self.shape))) - axes[dim0], axes[dim1] = axes[dim1], axes[dim0] - result = Tensor(np.transpose(self.data, axes)) - - # Track gradient if needed - if self.requires_grad: - result.requires_grad = True - result._grad_fn = TransposeBackward(self, dim0, dim1) - - return result - - def tracked_reshape(self, *shape): - """ - Reshape with gradient tracking. - - Enhances the original reshape method to build computation graphs - when requires_grad=True for the input. - """ - original_shape = self.shape - - if _original_reshape: - result = _original_reshape(self, *shape) - else: - # Fallback if reshape doesn't exist - result = Tensor(self.data.reshape(*shape)) - - # Track gradient if needed - if self.requires_grad: - result.requires_grad = True - result._grad_fn = ReshapeBackward(self, original_shape) - - return result - - def tracked_sub(self, other): - """ - Subtraction with gradient tracking. - - Enhances the original __sub__ method to build computation graphs - when requires_grad=True for any input. - """ - # Convert scalar to Tensor if needed - if not isinstance(other, Tensor): - other = Tensor(other) - - # Call original operation - result = _original_sub(self, other) - - # Track gradient if needed - if self.requires_grad or other.requires_grad: - result.requires_grad = True - result._grad_fn = SubBackward(self, other) - - return result - - def tracked_div(self, other): - """ - Division with gradient tracking. - - Enhances the original __truediv__ method to build computation graphs - when requires_grad=True for any input. - """ - # Convert scalar to Tensor if needed - if not isinstance(other, Tensor): - other = Tensor(other) - - # Call original operation - result = _original_div(self, other) - - # Track gradient if needed - if self.requires_grad or other.requires_grad: - result.requires_grad = True - result._grad_fn = DivBackward(self, other) - - return result - def sum_op(self, axis=None, keepdims=False): """ Sum operation with gradient tracking. @@ -1058,26 +573,20 @@ def enable_autograd(): # Install enhanced operations Tensor.__add__ = tracked_add - Tensor.__sub__ = tracked_sub Tensor.__mul__ = tracked_mul - Tensor.__truediv__ = tracked_div Tensor.matmul = tracked_matmul - Tensor.transpose = tracked_transpose - Tensor.reshape = tracked_reshape Tensor.sum = sum_op Tensor.backward = backward Tensor.zero_grad = zero_grad # Patch activations and losses to track gradients try: - from tinytorch.core.activations import Sigmoid, ReLU, Softmax, GELU + from tinytorch.core.activations import Sigmoid, ReLU from tinytorch.core.losses import BinaryCrossEntropyLoss, MSELoss, CrossEntropyLoss # Store original methods _original_sigmoid_forward = Sigmoid.forward _original_relu_forward = ReLU.forward - _original_softmax_forward = Softmax.forward - _original_gelu_forward = GELU.forward _original_bce_forward = BinaryCrossEntropyLoss.forward _original_mse_forward = MSELoss.forward _original_ce_forward = CrossEntropyLoss.forward @@ -1104,30 +613,6 @@ def enable_autograd(): return result - def tracked_softmax_forward(self, x, dim=-1): - """Softmax with gradient tracking.""" - # Call original forward to get result using Tensor operations - result = _original_softmax_forward(self, x, dim=dim) - - # Attach the correct gradient function - if x.requires_grad: - result.requires_grad = True - result._grad_fn = SoftmaxBackward(x, result, dim) - - return result - - def tracked_gelu_forward(self, x): - """GELU with gradient tracking.""" - # Call original forward to get result - result = _original_gelu_forward(self, x) - - # Attach the correct gradient function - if x.requires_grad: - result.requires_grad = True - result._grad_fn = GELUBackward(x) - - return result - def tracked_bce_forward(self, predictions, targets): """Binary cross-entropy with gradient tracking.""" # Compute BCE loss @@ -1187,8 +672,6 @@ def enable_autograd(): # Install patched methods Sigmoid.forward = tracked_sigmoid_forward ReLU.forward = tracked_relu_forward - Softmax.forward = tracked_softmax_forward - GELU.forward = tracked_gelu_forward BinaryCrossEntropyLoss.forward = tracked_bce_forward MSELoss.forward = tracked_mse_forward CrossEntropyLoss.forward = tracked_ce_forward diff --git a/tinytorch/core/layers.py b/tinytorch/core/layers.py index 8b142253..1289ad68 100644 --- a/tinytorch/core/layers.py +++ b/tinytorch/core/layers.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/04_layers/layers_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/03_layers/layers_dev.ipynb. + # %% auto 0 __all__ = ['Linear', 'Dropout'] diff --git a/tinytorch/core/losses.py b/tinytorch/core/losses.py index dd12532a..8f4369ba 100644 --- a/tinytorch/core/losses.py +++ b/tinytorch/core/losses.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_losses/losses_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_losses/losses_dev.ipynb. + # %% auto 0 __all__ = ['import_previous_module', 'log_softmax', 'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss'] diff --git a/tinytorch/core/optimizers.py b/tinytorch/core/optimizers.py index 314a8db9..6a4a8ecd 100644 --- a/tinytorch/core/optimizers.py +++ b/tinytorch/core/optimizers.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/10_optimizers/optimizers_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/06_optimizers/optimizers_dev.ipynb. + # %% auto 0 __all__ = ['Optimizer', 'SGD', 'Adam', 'AdamW'] diff --git a/tinytorch/core/spatial.py b/tinytorch/core/spatial.py index 0c64c1b3..928fd8c7 100644 --- a/tinytorch/core/spatial.py +++ b/tinytorch/core/spatial.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/06_spatial/spatial_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/09_spatial/spatial_dev.ipynb. + # %% auto 0 __all__ = ['Conv2d', 'MaxPool2d', 'AvgPool2d', 'SimpleCNN'] diff --git a/tinytorch/core/tensor.py b/tinytorch/core/tensor.py index 82e681fa..dfd03466 100644 --- a/tinytorch/core/tensor.py +++ b/tinytorch/core/tensor.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/02_tensor/tensor_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/01_tensor/tensor_dev.ipynb. + # %% auto 0 __all__ = ['Tensor'] diff --git a/tinytorch/core/training.py b/tinytorch/core/training.py index e4082b8f..dd393f81 100644 --- a/tinytorch/core/training.py +++ b/tinytorch/core/training.py @@ -1,21 +1,7 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/11_training/training_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/07_training/training_dev.ipynb. + # %% auto 0 -__all__ = ['CosineSchedule', 'Trainer'] +__all__ = ['CosineSchedule', 'save_checkpoint', 'load_checkpoint', 'Trainer'] # %% ../../modules/source/07_training/training_dev.ipynb 1 import numpy as np @@ -72,6 +58,90 @@ class CosineSchedule: ### END SOLUTION # %% ../../modules/source/07_training/training_dev.ipynb 14 +def save_checkpoint(checkpoint_dict: Dict[str, Any], path: str): + """ + Save checkpoint dictionary to disk using pickle. + + This is a low-level utility for saving model state. Use this when you have + a custom training loop and want to save just what you need (model params, + config, metadata). + + For complete training state with optimizer and scheduler, use + Trainer.save_checkpoint() instead. + + TODO: Implement checkpoint saving with pickle + + APPROACH: + 1. Create parent directory if it doesn't exist (Path(path).parent.mkdir) + 2. Open file in binary write mode ('wb') + 3. Use pickle.dump() to serialize the checkpoint dictionary + 4. Print confirmation message + + EXAMPLE: + >>> model = SimpleModel() + >>> checkpoint = { + ... 'model_params': [p.data.copy() for p in model.parameters()], + ... 'config': {'embed_dim': 32, 'num_layers': 2}, + ... 'metadata': {'final_loss': 0.089, 'training_steps': 5000} + ... } + >>> save_checkpoint(checkpoint, 'checkpoints/model.pkl') + ✓ Checkpoint saved: checkpoints/model.pkl + + HINTS: + - Use Path(path).parent.mkdir(parents=True, exist_ok=True) + - pickle.dump(obj, file) writes the object to file + - Always print a success message so users know it worked + """ + ### BEGIN SOLUTION + # Create parent directory if needed + Path(path).parent.mkdir(parents=True, exist_ok=True) + + # Save checkpoint using pickle + with open(path, 'wb') as f: + pickle.dump(checkpoint_dict, f) + + print(f"✓ Checkpoint saved: {path}") + ### END SOLUTION + +# %% ../../modules/source/07_training/training_dev.ipynb 15 +def load_checkpoint(path: str) -> Dict[str, Any]: + """ + Load checkpoint dictionary from disk using pickle. + + Companion function to save_checkpoint(). Restores the checkpoint dictionary + so you can rebuild your model, resume training, or inspect saved metadata. + + TODO: Implement checkpoint loading with pickle + + APPROACH: + 1. Open file in binary read mode ('rb') + 2. Use pickle.load() to deserialize the checkpoint + 3. Print confirmation message + 4. Return the loaded dictionary + + EXAMPLE: + >>> checkpoint = load_checkpoint('checkpoints/model.pkl') + ✓ Checkpoint loaded: checkpoints/model.pkl + >>> print(checkpoint['metadata']['final_loss']) + 0.089 + >>> model_params = checkpoint['model_params'] + >>> # Now restore model: for param, data in zip(model.parameters(), model_params)... + + HINTS: + - pickle.load(file) reads and deserializes the object + - Return the loaded dictionary + - Print a success message for user feedback + """ + ### BEGIN SOLUTION + # Load checkpoint using pickle + with open(path, 'rb') as f: + checkpoint = pickle.load(f) + + print(f"✓ Checkpoint loaded: {path}") + return checkpoint + ### END SOLUTION + +# %% ../../modules/source/07_training/training_dev.ipynb 19 class Trainer: """ Complete training orchestrator for neural networks. @@ -246,6 +316,11 @@ class Trainer: def save_checkpoint(self, path: str): """ Save complete training state for resumption. + + This high-level method saves everything needed to resume training: + model parameters, optimizer state, scheduler state, and training history. + + Uses the low-level save_checkpoint() function internally. Args: path: File path to save checkpoint @@ -260,19 +335,23 @@ class Trainer: 'training_mode': self.training_mode } - Path(path).parent.mkdir(parents=True, exist_ok=True) - with open(path, 'wb') as f: - pickle.dump(checkpoint, f) + # Use the standalone save_checkpoint function + save_checkpoint(checkpoint, path) def load_checkpoint(self, path: str): """ Load training state from checkpoint. + + This high-level method restores complete training state including + model parameters, optimizer state, scheduler state, and history. + + Uses the low-level load_checkpoint() function internally. Args: path: File path to load checkpoint from """ - with open(path, 'rb') as f: - checkpoint = pickle.load(f) + # Use the standalone load_checkpoint function + checkpoint = load_checkpoint(path) self.epoch = checkpoint['epoch'] self.step = checkpoint['step'] diff --git a/tinytorch/data/loader.py b/tinytorch/data/loader.py index 1018e73f..09ea90a2 100644 --- a/tinytorch/data/loader.py +++ b/tinytorch/data/loader.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_loader/loader_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/08_dataloader/dataloader_dev.ipynb. + # %% auto 0 __all__ = ['Dataset', 'TensorDataset', 'DataLoader'] diff --git a/tinytorch/generation/kv_cache.py b/tinytorch/generation/kv_cache.py index 55d8504b..f6f411a6 100644 --- a/tinytorch/generation/kv_cache.py +++ b/tinytorch/generation/kv_cache.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_kv_cache/kv_cache_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/15_memoization/memoization_dev.ipynb. + # %% auto 0 __all__ = ['KVCache', 'enable_kv_cache', 'disable_kv_cache'] diff --git a/tinytorch/models/transformer.py b/tinytorch/models/transformer.py index 4bf34131..0fdd20ea 100644 --- a/tinytorch/models/transformer.py +++ b/tinytorch/models/transformer.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_transformer/transformer_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/13_transformers/transformers_dev.ipynb. + # %% auto 0 __all__ = ['LayerNorm', 'MLP', 'TransformerBlock', 'GPT'] @@ -23,7 +9,6 @@ from ..core.tensor import Tensor from ..core.layers import Linear from ..core.attention import MultiHeadAttention from ..core.activations import GELU -from ..text.embeddings import Embedding, PositionalEncoding # %% ../../modules/source/13_transformers/transformers_dev.ipynb 9 class LayerNorm: @@ -61,6 +46,7 @@ class LayerNorm: self.eps = eps # Learnable parameters: scale and shift + # CRITICAL: requires_grad=True so optimizer can train these! self.gamma = Tensor(np.ones(normalized_shape), requires_grad=True) # Scale parameter self.beta = Tensor(np.zeros(normalized_shape), requires_grad=True) # Shift parameter ### END SOLUTION @@ -83,19 +69,18 @@ class LayerNorm: HINT: Use keepdims=True to maintain tensor dimensions for broadcasting """ ### BEGIN SOLUTION + # CRITICAL: Use Tensor operations (not .data) to maintain gradient flow! # Compute statistics across last dimension (features) mean = x.mean(axis=-1, keepdims=True) # Compute variance: E[(x - μ)²] - # Use Tensor operations to preserve computation graph! - diff = x - mean - variance = (diff * diff).mean(axis=-1, keepdims=True) + diff = x - mean # Tensor subtraction maintains gradient + variance = (diff * diff).mean(axis=-1, keepdims=True) # Tensor ops maintain gradient - # Normalize - use Tensor operations to preserve gradients! - # Add eps as a Tensor for proper gradient flow - eps_tensor = Tensor(np.array(self.eps), requires_grad=False) - std = Tensor(np.sqrt(variance.data + self.eps), requires_grad=variance.requires_grad) - normalized = (x - mean) / std + # Normalize: (x - mean) / sqrt(variance + eps) + # Note: sqrt and division need to preserve gradient flow + std_data = np.sqrt(variance.data + self.eps) + normalized = diff * Tensor(1.0 / std_data) # Scale by reciprocal to maintain gradient # Apply learnable transformation output = normalized * self.gamma + self.beta @@ -103,7 +88,7 @@ class LayerNorm: ### END SOLUTION def __call__(self, x): - """Allows the layer norm to be called like a function.""" + """Allows the layer to be called like a function.""" return self.forward(x) def parameters(self): @@ -147,7 +132,7 @@ class MLP: # Two-layer feed-forward network self.linear1 = Linear(embed_dim, hidden_dim) - self.gelu = GELU() # Use GELU activation from activations module + self.gelu = GELU() self.linear2 = Linear(hidden_dim, embed_dim) ### END SOLUTION @@ -171,7 +156,7 @@ class MLP: # First linear layer with expansion hidden = self.linear1.forward(x) - # GELU activation (YOUR activation from Module 03!) + # GELU activation hidden = self.gelu.forward(hidden) # Second linear layer back to original size @@ -404,10 +389,6 @@ class GPT: return logits ### END SOLUTION - def __call__(self, tokens): - """Allows the GPT model to be called like a function.""" - return self.forward(tokens) - def _create_causal_mask(self, seq_len): """Create causal mask to prevent attending to future positions.""" ### BEGIN SOLUTION diff --git a/tinytorch/optimization/acceleration.py b/tinytorch/optimization/acceleration.py index e59fe00f..fd53282e 100644 --- a/tinytorch/optimization/acceleration.py +++ b/tinytorch/optimization/acceleration.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_acceleration/acceleration_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/18_acceleration/acceleration_dev.ipynb. + # %% auto 0 __all__ = [] diff --git a/tinytorch/optimization/compression.py b/tinytorch/optimization/compression.py index 20c318fa..7f43ee68 100644 --- a/tinytorch/optimization/compression.py +++ b/tinytorch/optimization/compression.py @@ -1,22 +1,7 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_compression/compression_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/17_compression/compression_dev.ipynb. + # %% auto 0 -__all__ = ['Sequential', 'KnowledgeDistillation', 'test_unit_knowledge_distillation', 'CompressionComplete', 'measure_sparsity', - 'magnitude_prune', 'structured_prune', 'compress_model'] +__all__ = ['Tensor', 'Linear', 'Sequential'] # %% ../../modules/source/17_compression/compression_dev.ipynb 1 import numpy as np @@ -24,277 +9,77 @@ import copy from typing import List, Dict, Any, Tuple, Optional import time -# Import from TinyTorch modules -from ..core.tensor import Tensor -from ..core.layers import Linear +# Import from previous modules +# Note: In the full package, these would be imports like: +# from tinytorch.core.tensor import Tensor +# from tinytorch.core.layers import Linear +# For development, we'll create minimal implementations + +class Tensor: + """Minimal Tensor class for compression development - imports from Module 01 in practice.""" + def __init__(self, data, requires_grad=False): + self.data = np.array(data) + self.shape = self.data.shape + self.size = self.data.size + self.requires_grad = requires_grad + self.grad = None + + def __add__(self, other): + if isinstance(other, Tensor): + return Tensor(self.data + other.data) + return Tensor(self.data + other) + + def __mul__(self, other): + if isinstance(other, Tensor): + return Tensor(self.data * other.data) + return Tensor(self.data * other) + + def matmul(self, other): + return Tensor(np.dot(self.data, other.data)) + + def abs(self): + return Tensor(np.abs(self.data)) + + def sum(self, axis=None): + return Tensor(self.data.sum(axis=axis)) + + def __repr__(self): + return f"Tensor(shape={self.shape})" + +class Linear: + """Minimal Linear layer for compression development - imports from Module 03 in practice.""" + def __init__(self, in_features, out_features, bias=True): + self.in_features = in_features + self.out_features = out_features + # Initialize with He initialization + self.weight = Tensor(np.random.randn(in_features, out_features) * np.sqrt(2.0 / in_features)) + self.bias = Tensor(np.zeros(out_features)) if bias else None + + def forward(self, x): + output = x.matmul(self.weight) + if self.bias is not None: + output = output + self.bias + return output + + def parameters(self): + params = [self.weight] + if self.bias is not None: + params.append(self.bias) + return params -# Sequential container for model compression class Sequential: - """Sequential container for compression (not exported from core layers).""" + """Minimal Sequential container for model compression.""" def __init__(self, *layers): self.layers = list(layers) def forward(self, x): for layer in self.layers: - x = layer.forward(x) if hasattr(layer, 'forward') else layer(x) + x = layer.forward(x) return x - def __call__(self, x): - return self.forward(x) - def parameters(self): params = [] for layer in self.layers: if hasattr(layer, 'parameters'): params.extend(layer.parameters()) return params - -# %% ../../modules/source/17_compression/compression_dev.ipynb 15 -class KnowledgeDistillation: - """ - Knowledge distillation for model compression. - - Train a smaller student model to mimic a larger teacher model. - """ - - def __init__(self, teacher_model, student_model, temperature=3.0, alpha=0.7): - """ - Initialize knowledge distillation. - - TODO: Set up teacher and student models with distillation parameters - - APPROACH: - 1. Store teacher and student models - 2. Set temperature for softening probability distributions - 3. Set alpha for balancing hard vs soft targets - - EXAMPLE: - >>> teacher = Sequential(Linear(100, 200), Linear(200, 50)) - >>> student = Sequential(Linear(100, 50)) - >>> kd = KnowledgeDistillation(teacher, student, temperature=4.0, alpha=0.8) - >>> print(f"Temperature: {kd.temperature}, Alpha: {kd.alpha}") - Temperature: 4.0, Alpha: 0.8 - - HINTS: - - Simply assign the parameters to instance variables - - Temperature typically ranges from 3-5 for effective softening - - Alpha of 0.7 means 70% soft targets, 30% hard targets - - Args: - teacher_model: Large, pre-trained model - student_model: Smaller model to train - temperature: Softening parameter for distributions - alpha: Weight for soft target loss (1-alpha for hard targets) - """ - ### BEGIN SOLUTION - self.teacher_model = teacher_model - self.student_model = student_model - self.temperature = temperature - self.alpha = alpha - ### END SOLUTION - - def distillation_loss(self, student_logits, teacher_logits, true_labels): - """ - Calculate combined distillation loss. - - TODO: Implement knowledge distillation loss function - - APPROACH: - 1. Calculate hard target loss (student vs true labels) - 2. Calculate soft target loss (student vs teacher, with temperature) - 3. Combine losses: alpha * soft_loss + (1-alpha) * hard_loss - - EXAMPLE: - >>> kd = KnowledgeDistillation(teacher, student) - >>> loss = kd.distillation_loss(student_out, teacher_out, labels) - >>> print(f"Distillation loss: {loss:.4f}") - - HINTS: - - Use temperature to soften distributions: logits/temperature - - Soft targets use KL divergence or cross-entropy - - Hard targets use standard classification loss - """ - ### BEGIN SOLUTION - # Convert to numpy for this implementation - if hasattr(student_logits, 'data'): - student_logits = student_logits.data - if hasattr(teacher_logits, 'data'): - teacher_logits = teacher_logits.data - if hasattr(true_labels, 'data'): - true_labels = true_labels.data - - # Soften distributions with temperature - student_soft = self._softmax(student_logits / self.temperature) - teacher_soft = self._softmax(teacher_logits / self.temperature) - - # Soft target loss (KL divergence) - soft_loss = self._kl_divergence(student_soft, teacher_soft) - - # Hard target loss (cross-entropy) - student_hard = self._softmax(student_logits) - hard_loss = self._cross_entropy(student_hard, true_labels) - - # Combined loss - total_loss = self.alpha * soft_loss + (1 - self.alpha) * hard_loss - - return total_loss - ### END SOLUTION - - def _softmax(self, logits): - """Compute softmax with numerical stability.""" - exp_logits = np.exp(logits - np.max(logits, axis=-1, keepdims=True)) - return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True) - - def _kl_divergence(self, p, q): - """Compute KL divergence between distributions.""" - return np.sum(p * np.log(p / (q + 1e-8) + 1e-8)) - - def _cross_entropy(self, predictions, labels): - """Compute cross-entropy loss.""" - # Simple implementation for integer labels - if labels.ndim == 1: - return -np.mean(np.log(predictions[np.arange(len(labels)), labels] + 1e-8)) - else: - return -np.mean(np.sum(labels * np.log(predictions + 1e-8), axis=1)) - -def test_unit_knowledge_distillation(): - """🔬 Test knowledge distillation functionality.""" - print("🔬 Unit Test: Knowledge Distillation...") - - # Create teacher and student models - teacher = Sequential(Linear(10, 20), Linear(20, 5)) - student = Sequential(Linear(10, 5)) # Smaller model - - # Initialize knowledge distillation - kd = KnowledgeDistillation(teacher, student, temperature=3.0, alpha=0.7) - - # Create dummy data - input_data = Tensor(np.random.randn(8, 10)) # Batch of 8 - true_labels = np.array([0, 1, 2, 3, 4, 0, 1, 2]) # Class labels - - # Forward passes - teacher_output = teacher.forward(input_data) - student_output = student.forward(input_data) - - # Calculate distillation loss - loss = kd.distillation_loss(student_output, teacher_output, true_labels) - - # Verify loss is reasonable - assert isinstance(loss, (float, np.floating)), f"Loss should be float, got {type(loss)}" - assert loss > 0, f"Loss should be positive, got {loss}" - assert not np.isnan(loss), "Loss should not be NaN" - - print("✅ knowledge_distillation works correctly!") - -test_unit_knowledge_distillation() - -# %% ../../modules/source/17_compression/compression_dev.ipynb 29 -class CompressionComplete: - """ - Complete compression system for milestone use. - - Provides pruning, distillation, and low-rank approximation techniques. - """ - - @staticmethod - def measure_sparsity(model) -> float: - """Measure the sparsity of a model (fraction of zero weights).""" - total_params = 0 - zero_params = 0 - - if hasattr(model, 'parameters'): - for param in model.parameters(): - total_params += param.size - zero_params += np.sum(param.data == 0) - - return zero_params / total_params if total_params > 0 else 0.0 - - @staticmethod - def magnitude_prune(model, sparsity=0.5): - """ - Prune model weights by magnitude (smallest weights set to zero). - - Args: - model: Model with parameters() method - sparsity: Fraction of weights to prune (0-1) - """ - if hasattr(model, 'parameters'): - for param in model.parameters(): - threshold = np.percentile(np.abs(param.data), sparsity * 100) - param.data[np.abs(param.data) < threshold] = 0 - - return model - - @staticmethod - def structured_prune(model, prune_ratio=0.5): - """ - Prune entire neurons/channels (structured pruning). - - Args: - model: Model to prune - prune_ratio: Fraction of structures to prune (0-1) - """ - if hasattr(model, 'parameters'): - params = list(model.parameters()) - if len(params) > 0 and hasattr(params[0], 'data'): - weight = params[0] - if len(weight.shape) == 2: # Linear layer - # Prune output neurons - neuron_norms = np.linalg.norm(weight.data, axis=0) - threshold = np.percentile(neuron_norms, prune_ratio * 100) - mask = neuron_norms >= threshold - weight.data[:, ~mask] = 0 - - return model - - @staticmethod - def compress_model(model, compression_config: Dict[str, Any]): - """ - Apply complete compression pipeline to a model. - - Args: - model: Model to compress - compression_config: Dictionary with compression settings - - 'magnitude_sparsity': float (0-1) - - 'structured_prune_ratio': float (0-1) - - Returns: - Compressed model with sparsity stats - """ - stats = { - 'original_sparsity': CompressionComplete.measure_sparsity(model) - } - - # Apply magnitude pruning - if 'magnitude_sparsity' in compression_config: - model = CompressionComplete.magnitude_prune( - model, compression_config['magnitude_sparsity'] - ) - - # Apply structured pruning - if 'structured_prune_ratio' in compression_config: - model = CompressionComplete.structured_prune( - model, compression_config['structured_prune_ratio'] - ) - - stats['final_sparsity'] = CompressionComplete.measure_sparsity(model) - stats['compression_ratio'] = 1.0 / (1.0 - stats['final_sparsity']) if stats['final_sparsity'] < 1.0 else float('inf') - - return model, stats - -# Convenience functions for backward compatibility -def measure_sparsity(model) -> float: - """Measure model sparsity.""" - return CompressionComplete.measure_sparsity(model) - -def magnitude_prune(model, sparsity=0.5): - """Apply magnitude-based pruning.""" - return CompressionComplete.magnitude_prune(model, sparsity) - -def structured_prune(model, prune_ratio=0.5): - """Apply structured pruning.""" - return CompressionComplete.structured_prune(model, prune_ratio) - -def compress_model(model, compression_config: Dict[str, Any]): - """Apply complete compression pipeline.""" - return CompressionComplete.compress_model(model, compression_config) diff --git a/tinytorch/optimization/quantization.py b/tinytorch/optimization/quantization.py index c30509d3..872b359f 100644 --- a/tinytorch/optimization/quantization.py +++ b/tinytorch/optimization/quantization.py @@ -1,21 +1,7 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_quantization/quantization_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/16_quantization/quantization_dev.ipynb. + # %% auto 0 -__all__ = [] +__all__ = ['QuantizationComplete', 'quantize_int8', 'dequantize_int8', 'quantize_model'] # %% ../../modules/source/16_quantization/quantization_dev.ipynb 3 import numpy as np @@ -29,3 +15,94 @@ from ..core.layers import Linear from ..core.activations import ReLU print("✅ Quantization module imports complete") + +# %% ../../modules/source/16_quantization/quantization_dev.ipynb 34 +class QuantizationComplete: + """ + Complete quantization system for milestone use. + + Provides INT8 quantization with calibration for 4× memory reduction. + """ + + @staticmethod + def quantize_tensor(tensor: Tensor) -> Tuple[Tensor, float, int]: + """Quantize FP32 tensor to INT8.""" + data = tensor.data + min_val = float(np.min(data)) + max_val = float(np.max(data)) + + if abs(max_val - min_val) < 1e-8: + return Tensor(np.zeros_like(data, dtype=np.int8)), 1.0, 0 + + scale = (max_val - min_val) / 255.0 + zero_point = int(np.round(-128 - min_val / scale)) + zero_point = int(np.clip(zero_point, -128, 127)) + + quantized_data = np.round(data / scale + zero_point) + quantized_data = np.clip(quantized_data, -128, 127).astype(np.int8) + + return Tensor(quantized_data), scale, zero_point + + @staticmethod + def dequantize_tensor(q_tensor: Tensor, scale: float, zero_point: int) -> Tensor: + """Dequantize INT8 tensor back to FP32.""" + dequantized_data = (q_tensor.data.astype(np.float32) - zero_point) * scale + return Tensor(dequantized_data) + + @staticmethod + def quantize_model(model, calibration_data: Optional[List[Tensor]] = None) -> Dict[str, any]: + """ + Quantize all Linear layers in a model. + + Returns dictionary with quantization info and memory savings. + """ + quantized_layers = {} + original_size = 0 + quantized_size = 0 + + # Iterate through model parameters + if hasattr(model, 'parameters'): + for i, param in enumerate(model.parameters()): + param_size = param.data.nbytes + original_size += param_size + + # Quantize parameter + q_param, scale, zp = QuantizationComplete.quantize_tensor(param) + quantized_size += q_param.data.nbytes + + quantized_layers[f'param_{i}'] = { + 'quantized': q_param, + 'scale': scale, + 'zero_point': zp, + 'original_shape': param.data.shape + } + + return { + 'quantized_layers': quantized_layers, + 'original_size_mb': original_size / (1024 * 1024), + 'quantized_size_mb': quantized_size / (1024 * 1024), + 'compression_ratio': original_size / quantized_size if quantized_size > 0 else 1.0 + } + + @staticmethod + def compare_models(original_model, quantized_info: Dict) -> Dict[str, float]: + """Compare memory usage between original and quantized models.""" + return { + 'original_mb': quantized_info['original_size_mb'], + 'quantized_mb': quantized_info['quantized_size_mb'], + 'compression_ratio': quantized_info['compression_ratio'], + 'memory_saved_mb': quantized_info['original_size_mb'] - quantized_info['quantized_size_mb'] + } + +# Convenience functions for backward compatibility +def quantize_int8(tensor: Tensor) -> Tuple[Tensor, float, int]: + """Quantize FP32 tensor to INT8.""" + return QuantizationComplete.quantize_tensor(tensor) + +def dequantize_int8(q_tensor: Tensor, scale: float, zero_point: int) -> Tensor: + """Dequantize INT8 tensor back to FP32.""" + return QuantizationComplete.dequantize_tensor(q_tensor, scale, zero_point) + +def quantize_model(model, calibration_data: Optional[List[Tensor]] = None) -> Dict[str, any]: + """Quantize entire model to INT8.""" + return QuantizationComplete.quantize_model(model, calibration_data) diff --git a/tinytorch/profiling/profiler.py b/tinytorch/profiling/profiler.py index 3f393015..88aece66 100644 --- a/tinytorch/profiling/profiler.py +++ b/tinytorch/profiling/profiler.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_profiler/profiler_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/14_profiling/profiling_dev.ipynb. + # %% auto 0 __all__ = ['Profiler', 'quick_profile', 'analyze_weight_distribution'] diff --git a/tinytorch/text/embeddings.py b/tinytorch/text/embeddings.py index dacb0f27..3d8a6d03 100644 --- a/tinytorch/text/embeddings.py +++ b/tinytorch/text/embeddings.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_embeddings/embeddings_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/11_embeddings/embeddings_dev.ipynb. + # %% auto 0 __all__ = ['Embedding', 'PositionalEncoding', 'EmbeddingLayer'] @@ -95,13 +81,10 @@ class Embedding: # This is equivalent to one-hot multiplication but much more efficient embedded = self.weight.data[indices.data.astype(int)] - # Create result tensor + # Create result tensor with gradient tracking + # Note: Gradient computation handled by autograd system (Module 05) + # The embedding lookup is differentiable through the weight matrix result = Tensor(embedded, requires_grad=self.weight.requires_grad) - - # Attach gradient function (students learned this in Module 05!) - if self.weight.requires_grad: - from tinytorch.core.autograd import EmbeddingBackward - result._grad_fn = EmbeddingBackward(self.weight, indices) return result @@ -336,10 +319,6 @@ class EmbeddingLayer: return output - def __call__(self, tokens: Tensor) -> Tensor: - """Allows the embedding layer to be called like a function.""" - return self.forward(tokens) - def parameters(self) -> List[Tensor]: """Return all trainable parameters.""" params = self.token_embedding.parameters() diff --git a/tinytorch/text/tokenization.py b/tinytorch/text/tokenization.py index 384f738f..5b368a5d 100644 --- a/tinytorch/text/tokenization.py +++ b/tinytorch/text/tokenization.py @@ -1,19 +1,5 @@ -# ╔═══════════════════════════════════════════════════════════════════════════════╗ -# ║ 🚨 CRITICAL WARNING 🚨 ║ -# ║ AUTOGENERATED! DO NOT EDIT! ║ -# ║ ║ -# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ -# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ -# ║ ║ -# ║ ✅ TO EDIT: modules/source/XX_tokenization/tokenization_dev.py ║ -# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ -# ║ ║ -# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ -# ║ Editing it directly may break module functionality and training. ║ -# ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ -# ║ happens! The tinytorch/ directory is just the compiled output. ║ -# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/10_tokenization/tokenization_dev.ipynb. + # %% auto 0 __all__ = ['Tokenizer', 'CharTokenizer', 'BPETokenizer'] @@ -24,16 +10,6 @@ import json import re from collections import defaultdict, Counter -# %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 3 -import numpy as np -from typing import List, Dict, Tuple, Optional, Set -import json -import re -from collections import defaultdict, Counter - -# Import only Module 01 (Tensor) - this module has minimal dependencies -from ..core.tensor import Tensor - # %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 8 class Tokenizer: """ diff --git a/tito/__pycache__/main.cpython-313.pyc b/tito/__pycache__/main.cpython-313.pyc index d62f5066..776ffbb9 100644 Binary files a/tito/__pycache__/main.cpython-313.pyc and b/tito/__pycache__/main.cpython-313.pyc differ diff --git a/tito/commands/__pycache__/__init__.cpython-313.pyc b/tito/commands/__pycache__/__init__.cpython-313.pyc index 0eb73da9..49efb838 100644 Binary files a/tito/commands/__pycache__/__init__.cpython-313.pyc and b/tito/commands/__pycache__/__init__.cpython-313.pyc differ diff --git a/tito/commands/__pycache__/base.cpython-313.pyc b/tito/commands/__pycache__/base.cpython-313.pyc index e7d0935c..f4b2a61b 100644 Binary files a/tito/commands/__pycache__/base.cpython-313.pyc and b/tito/commands/__pycache__/base.cpython-313.pyc differ diff --git a/tito/commands/__pycache__/book.cpython-313.pyc b/tito/commands/__pycache__/book.cpython-313.pyc index 4821def6..ed1e0c62 100644 Binary files a/tito/commands/__pycache__/book.cpython-313.pyc and b/tito/commands/__pycache__/book.cpython-313.pyc differ diff --git a/tito/commands/__pycache__/doctor.cpython-313.pyc b/tito/commands/__pycache__/doctor.cpython-313.pyc index b852f96b..01dcc471 100644 Binary files a/tito/commands/__pycache__/doctor.cpython-313.pyc and b/tito/commands/__pycache__/doctor.cpython-313.pyc differ diff --git a/tito/commands/__pycache__/export.cpython-313.pyc b/tito/commands/__pycache__/export.cpython-313.pyc index ac4d9754..80a7a167 100644 Binary files a/tito/commands/__pycache__/export.cpython-313.pyc and b/tito/commands/__pycache__/export.cpython-313.pyc differ diff --git a/tito/commands/__pycache__/info.cpython-313.pyc b/tito/commands/__pycache__/info.cpython-313.pyc index e694c5dc..77f5b891 100644 Binary files a/tito/commands/__pycache__/info.cpython-313.pyc and b/tito/commands/__pycache__/info.cpython-313.pyc differ diff --git a/tito/commands/__pycache__/protect.cpython-313.pyc b/tito/commands/__pycache__/protect.cpython-313.pyc index 2bad6fbe..811829ac 100644 Binary files a/tito/commands/__pycache__/protect.cpython-313.pyc and b/tito/commands/__pycache__/protect.cpython-313.pyc differ diff --git a/tito/commands/__pycache__/setup.cpython-313.pyc b/tito/commands/__pycache__/setup.cpython-313.pyc index d263f56f..4bd81fc8 100644 Binary files a/tito/commands/__pycache__/setup.cpython-313.pyc and b/tito/commands/__pycache__/setup.cpython-313.pyc differ diff --git a/tito/commands/__pycache__/test.cpython-313.pyc b/tito/commands/__pycache__/test.cpython-313.pyc index 2e18a8de..3ab227c9 100644 Binary files a/tito/commands/__pycache__/test.cpython-313.pyc and b/tito/commands/__pycache__/test.cpython-313.pyc differ diff --git a/tito/commands/book.py b/tito/commands/book.py index e5a79e1c..ef898a53 100644 --- a/tito/commands/book.py +++ b/tito/commands/book.py @@ -10,7 +10,7 @@ from rich.panel import Panel from .base import BaseCommand -NOTEBOOKS_DIR = "modules/source" +NOTEBOOKS_DIR = "modules" class BookCommand(BaseCommand): @property diff --git a/tito/commands/clean.py b/tito/commands/clean.py index 5da54068..3ecb7b8f 100644 --- a/tito/commands/clean.py +++ b/tito/commands/clean.py @@ -113,7 +113,7 @@ class CleanCommand(BaseCommand): # Ask for confirmation unless --force is used if not args.force: console.print("\n[yellow]This will permanently remove the files listed above.[/yellow]") - console.print("[yellow]Python source files (*_dev.py) will be preserved.[/yellow]\n") + console.print("[yellow]Python source files (*.py) will be preserved.[/yellow]\n") try: response = input("Are you sure you want to proceed? (y/N): ").strip().lower() diff --git a/tito/commands/export.py b/tito/commands/export.py index 1aa11aa9..67a7ef30 100644 --- a/tito/commands/export.py +++ b/tito/commands/export.py @@ -71,7 +71,7 @@ class ExportCommand(BaseCommand): else: short_name = module_name - dev_file = module_path / f"{short_name}_dev.py" + dev_file = module_path / f"{short_name}.py" if not dev_file.exists(): return "unknown" @@ -89,8 +89,8 @@ class ExportCommand(BaseCommand): return "unknown" def _discover_modules(self) -> list: - """Discover available modules from modules/source directory.""" - source_dir = Path("modules/source") + """Discover available modules from modules directory.""" + source_dir = Path("modules") modules = [] if source_dir.exists(): @@ -266,7 +266,7 @@ class ExportCommand(BaseCommand): # ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ # ║ Editing it directly may break module functionality and training. ║ # ║ ║ -# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ +# ║ 🎓 LEARNING TIP: Work in modules/ - that's where real development ║ # ║ happens! The tinytorch/ directory is just the compiled output. ║ # ╚═══════════════════════════════════════════════════════════════════════════════╝ @@ -305,20 +305,20 @@ class ExportCommand(BaseCommand): # Common mappings source_mappings = { - ('core', 'tensor'): 'modules/source/02_tensor/tensor_dev.py', - ('core', 'activations'): 'modules/source/03_activations/activations_dev.py', - ('core', 'layers'): 'modules/source/04_layers/layers_dev.py', - ('core', 'dense'): 'modules/source/05_dense/dense_dev.py', - ('core', 'spatial'): 'modules/source/06_spatial/spatial_dev.py', - ('core', 'attention'): 'modules/source/07_attention/attention_dev.py', - ('core', 'dataloader'): 'modules/source/08_dataloader/dataloader_dev.py', - ('core', 'autograd'): 'modules/source/09_autograd/autograd_dev.py', - ('core', 'optimizers'): 'modules/source/10_optimizers/optimizers_dev.py', - ('core', 'training'): 'modules/source/11_training/training_dev.py', - ('core', 'compression'): 'modules/source/12_compression/compression_dev.py', - ('core', 'kernels'): 'modules/source/13_kernels/kernels_dev.py', - ('core', 'benchmarking'): 'modules/source/14_benchmarking/benchmarking_dev.py', - ('core', 'networks'): 'modules/source/16_tinygpt/tinygpt_dev.ipynb', + ('core', 'tensor'): 'modules/02_tensor/tensor.py', + ('core', 'activations'): 'modules/03_activations/activations.py', + ('core', 'layers'): 'modules/04_layers/layers.py', + ('core', 'dense'): 'modules/05_dense/dense.py', + ('core', 'spatial'): 'modules/06_spatial/spatial.py', + ('core', 'attention'): 'modules/07_attention/attention.py', + ('core', 'dataloader'): 'modules/08_dataloader/dataloader.py', + ('core', 'autograd'): 'modules/09_autograd/autograd.py', + ('core', 'optimizers'): 'modules/10_optimizers/optimizers.py', + ('core', 'training'): 'modules/11_training/training.py', + ('core', 'compression'): 'modules/12_compression/compression.py', + ('core', 'kernels'): 'modules/13_kernels/kernels.py', + ('core', 'benchmarking'): 'modules/14_benchmarking/benchmarking.py', + ('core', 'networks'): 'modules/16_tinygpt/tinygpt_dev.ipynb', } if module_parts in source_mappings: @@ -327,9 +327,9 @@ class ExportCommand(BaseCommand): # Fallback: try to guess based on the file name if len(module_parts) >= 2: module_name = module_parts[-1] # e.g., 'tensor' from ('core', 'tensor') - return f"modules/source/XX_{module_name}/{module_name}_dev.py" + return f"modules/XX_{module_name}/{module_name}.py" - return "modules/source/[unknown]/[unknown]_dev.py" + return "modules/[unknown]/[unknown].py" def _show_export_details(self, console, module_name: Optional[str] = None): """Show detailed export information including where each module exports to.""" @@ -338,7 +338,7 @@ class ExportCommand(BaseCommand): if module_name: # Single module export - module_path = Path(f"modules/source/{module_name}") + module_path = Path(f"modules/{module_name}") export_target = self._get_export_target(module_path) if export_target != "unknown": target_file = export_target.replace('.', '/') + '.py' @@ -346,7 +346,7 @@ class ExportCommand(BaseCommand): # Extract the short name for display short_name = module_name[3:] if module_name.startswith(tuple(f"{i:02d}_" for i in range(100))) else module_name - exports_text.append(f" Source: modules/source/{module_name}/{short_name}_dev.py\n", style="dim") + exports_text.append(f" Source: modules/{module_name}/{short_name}.py\n", style="dim") exports_text.append(f" Target: tinytorch/{target_file}\n", style="dim") else: exports_text.append(f" ❓ {module_name} → export target not found\n", style="yellow") @@ -354,7 +354,7 @@ class ExportCommand(BaseCommand): # All modules export modules = self._discover_modules() for module_name in modules: - module_path = Path(f"modules/source/{module_name}") + module_path = Path(f"modules/{module_name}") export_target = self._get_export_target(module_path) if export_target != "unknown": target_file = export_target.replace('.', '/') + '.py' @@ -456,7 +456,7 @@ class ExportCommand(BaseCommand): module_name = module_path.name short_name = module_name[3:] if module_name.startswith(tuple(f"{i:02d}_" for i in range(100))) else module_name - dev_file = module_path / f"{short_name}_dev.py" + dev_file = module_path / f"{short_name}.py" if not dev_file.exists(): self.console.print(f"[red]❌ Python file not found: {dev_file}[/red]") return False @@ -539,7 +539,7 @@ class ExportCommand(BaseCommand): converted = [] for module_name in modules: - module_path = Path(f"modules/source/{module_name}") + module_path = Path(f"modules/{module_name}") if self._convert_py_to_notebook(module_path): converted.append(module_name) @@ -563,9 +563,9 @@ class ExportCommand(BaseCommand): # Process each module for module_name in modules_to_export: logger.debug(f"Processing module: {module_name}") - module_path = Path(f"modules/source/{module_name}") + module_path = Path(f"modules/{module_name}") if not module_path.exists(): - console.print(Panel(f"[red]❌ Module '{module_name}' not found in modules/source/[/red]", + console.print(Panel(f"[red]❌ Module '{module_name}' not found in modules/[/red]", title="Module Not Found", border_style="red")) # Show available modules diff --git a/tito/commands/grade.py b/tito/commands/grade.py index aed17aff..e314604c 100644 --- a/tito/commands/grade.py +++ b/tito/commands/grade.py @@ -193,7 +193,7 @@ class GradeCommand(BaseCommand): return module # Try to find the module by short name - source_dir = Path("modules/source") + source_dir = Path("modules") if source_dir.exists(): for module_dir in source_dir.iterdir(): if module_dir.is_dir() and module_dir.name.endswith(f"_{module}"): @@ -218,7 +218,7 @@ class GradeCommand(BaseCommand): # Step 1: Generate assignment first result = subprocess.run( ["nbgrader", "generate_assignment", module, - "--source", f"modules/source/{module}", + "--source", f"modules/{module}", "--force"], capture_output=True, text=True @@ -259,7 +259,7 @@ class GradeCommand(BaseCommand): try: result = subprocess.run( ["nbgrader", "generate_assignment", module, - "--source", f"modules/source/{module}", + "--source", f"modules/{module}", "--force"], capture_output=True, text=True @@ -418,7 +418,7 @@ class GradeCommand(BaseCommand): c = get_config() c.CourseDirectory.course_id = "tinytorch" -c.CourseDirectory.source_directory = "modules/source" +c.CourseDirectory.source_directory = "modules" c.CourseDirectory.release_directory = "release" c.CourseDirectory.submitted_directory = "submitted" c.CourseDirectory.autograded_directory = "autograded" diff --git a/tito/commands/help.py b/tito/commands/help.py index 33b4e67a..dffb520e 100644 --- a/tito/commands/help.py +++ b/tito/commands/help.py @@ -270,8 +270,8 @@ class HelpCommand(BaseCommand): elif starting_point == 'first_module': console.print("\n[bold blue]🛠️ Setting up Module 1...[/bold blue]") console.print("Next commands:") - console.print(" [code]cd modules/source/01_setup[/code]") - console.print(" [code]jupyter lab setup_dev.py[/code]") + console.print(" [code]cd modules/01_setup[/code]") + console.print(" [code]jupyter lab setup.py[/code]") elif starting_point == 'milestone_project': console.print("\n[bold blue]🎯 Weekend Project Recommendations...[/bold blue]") @@ -315,7 +315,7 @@ class HelpCommand(BaseCommand): # Common workflows console.print("\n[bold cyan]📋 Common Workflows:[/bold cyan]") workflows = [ - ("New User", "tito help -i → tito checkpoint status → cd modules/source/01_setup"), + ("New User", "tito help -i → tito checkpoint status → cd modules/01_setup"), ("Continue Learning", "tito checkpoint status → work on next module → tito module complete XX"), ("Join Community", "tito leaderboard join → submit progress → see global rankings"), ("Get Help", "tito system doctor → check docs/FAQ → ask community") @@ -371,7 +371,7 @@ class HelpCommand(BaseCommand): # Simplified implementation for now checkpoints_dir = Path("tests/checkpoints") - modules_dir = Path("modules/source") + modules_dir = Path("modules") return { 'is_new_user': not checkpoints_dir.exists(), @@ -424,7 +424,7 @@ class HelpCommand(BaseCommand): "• [code]tito system doctor[/code] - Verify installation\n" "• [code]tito help --interactive[/code] - Personalized guidance\n" "• [code]tito checkpoint status[/code] - See learning path\n" - "• [code]cd modules/source/01_setup[/code] - Start first module", + "• [code]cd modules/01_setup[/code] - Start first module", title="First Steps", border_style="blue" ) diff --git a/tito/commands/module.py b/tito/commands/module.py index 60e95d16..acc49cb3 100644 --- a/tito/commands/module.py +++ b/tito/commands/module.py @@ -275,13 +275,13 @@ class ModuleCommand(BaseCommand): """Normalize module name to full format (e.g., tensor -> 02_tensor).""" # If already in full format, validate it exists if module_name.startswith(tuple(f"{i:02d}_" for i in range(100))): - module_path = Path(f"modules/source/{module_name}") + module_path = Path(f"modules/{module_name}") if module_path.exists(): return module_name return "" # Try to find the module by short name - source_dir = Path("modules/source") + source_dir = Path("modules") if source_dir.exists(): for module_dir in source_dir.iterdir(): if module_dir.is_dir() and module_dir.name.endswith(f"_{module_name}"): @@ -291,7 +291,7 @@ class ModuleCommand(BaseCommand): def _get_available_modules_text(self) -> str: """Get formatted text listing available modules.""" - source_dir = Path("modules/source") + source_dir = Path("modules") modules = [] if source_dir.exists(): diff --git a/tito/commands/module_workflow.py b/tito/commands/module_workflow.py index a0e017f6..330e77e2 100644 --- a/tito/commands/module_workflow.py +++ b/tito/commands/module_workflow.py @@ -258,7 +258,7 @@ class ModuleWorkflowCommand(BaseCommand): try: # Run the module's inline tests module_dir = self.config.modules_dir / module_name - dev_file = module_dir / f"{module_name.split('_')[1]}_dev.py" + dev_file = module_dir / f"{module_name.split('_')[1]}.py" if not dev_file.exists(): self.console.print(f"[yellow]⚠️ No dev file found: {dev_file}[/yellow]") diff --git a/tito/commands/nbgrader.py b/tito/commands/nbgrader.py index c9f736df..0c659eeb 100644 --- a/tito/commands/nbgrader.py +++ b/tito/commands/nbgrader.py @@ -218,7 +218,7 @@ class NBGraderCommand(BaseCommand): def _get_module_directories(self) -> List[Path]: """Get all module directories with proper hierarchy support.""" - source_dir = Path("modules/source") + source_dir = Path("modules") if not source_dir.exists(): return [] @@ -240,19 +240,19 @@ class NBGraderCommand(BaseCommand): def _resolve_module_name(self, module_input: str) -> Optional[str]: """Resolve module name from various input formats.""" # If it's already a directory name, use it - if Path(f"modules/source/{module_input}").exists(): + if Path(f"modules/{module_input}").exists(): return module_input # Try to find by number prefix if module_input.isdigit(): prefix = module_input.zfill(2) - source_dir = Path("modules/source") + source_dir = Path("modules") for item in source_dir.iterdir(): if item.is_dir() and item.name.startswith(prefix): return item.name # Try to find by name suffix - source_dir = Path("modules/source") + source_dir = Path("modules") for item in source_dir.iterdir(): if item.is_dir() and item.name.endswith(f"_{module_input}"): return item.name @@ -375,7 +375,7 @@ class NBGraderCommand(BaseCommand): console.print(f"📝 Generating assignment for module: {module_name}") # Find the module development file in TinyTorch modules directory - module_dir = Path("modules/source") / module_name + module_dir = Path("modules") / module_name # Extract the short name from the module directory name # e.g., "00_setup" -> "setup", "01_tensor" -> "tensor" @@ -394,7 +394,7 @@ class NBGraderCommand(BaseCommand): if not dev_file: console.print(f"❌ Module file not found in: {module_dir}") - console.print(f" Looking for: {short_name}_dev.py or {short_name}_dev_enhanced.py") + console.print(f" Looking for: {short_name}.py or {short_name}_dev_enhanced.py") return False # Convert to notebook and generate assignment diff --git a/tito/commands/notebooks.py b/tito/commands/notebooks.py index 942c6182..22ac5638 100644 --- a/tito/commands/notebooks.py +++ b/tito/commands/notebooks.py @@ -45,20 +45,20 @@ class NotebooksCommand(BaseCommand): def validate_args(self, args: Namespace) -> None: """Validate notebooks command arguments.""" if args.module: - # Look in modules/source/ subdirectory + # Look in modules/ subdirectory source_dir = self.config.modules_dir / 'source' if not source_dir.exists(): source_dir = self.config.modules_dir - module_file = source_dir / args.module / f"{args.module}_dev.py" + module_file = source_dir / args.module / f"{args.module}.py" if not module_file.exists(): raise ModuleNotFoundError( - f"Module '{args.module}' not found or no {args.module}_dev.py file" + f"Module '{args.module}' not found or no {args.module}.py file" ) def _find_dev_files(self) -> List[Path]: - """Find all *_dev.py files in modules directory.""" + """Find all *.py files in modules directory.""" dev_files = [] - # Look in modules/source/ subdirectory + # Look in modules/ subdirectory source_dir = self.config.modules_dir / 'source' if not source_dir.exists(): # Fallback to modules_dir directly @@ -66,7 +66,7 @@ class NotebooksCommand(BaseCommand): for module_dir in source_dir.iterdir(): if module_dir.is_dir(): - dev_py = module_dir / f"{module_dir.name}_dev.py" + dev_py = module_dir / f"{module_dir.name}.py" if dev_py.exists(): dev_files.append(dev_py) return dev_files @@ -103,17 +103,17 @@ class NotebooksCommand(BaseCommand): # Find files to convert if args.module: - # Look in modules/source/ subdirectory + # Look in modules/ subdirectory source_dir = self.config.modules_dir / 'source' if not source_dir.exists(): source_dir = self.config.modules_dir - dev_files = [source_dir / args.module / f"{args.module}_dev.py"] + dev_files = [source_dir / args.module / f"{args.module}.py"] self.console.print(f"🔄 Building notebook for module: {args.module}") else: dev_files = self._find_dev_files() if not dev_files: self.console.print(Panel( - "[yellow]⚠️ No *_dev.py files found in modules/[/yellow]", + "[yellow]⚠️ No *.py files found in modules/[/yellow]", title="Nothing to Convert", border_style="yellow" )) diff --git a/tito/commands/protect.py b/tito/commands/protect.py index ed3d5898..29979726 100644 --- a/tito/commands/protect.py +++ b/tito/commands/protect.py @@ -167,11 +167,11 @@ if [ ! -z "$CORE_FILES_MODIFIED" ]; then echo "The following auto-generated files are staged:" echo "$CORE_FILES_MODIFIED" echo "" - echo "🛡️ PROTECTION TRIGGERED: These files are auto-generated from modules/source/" + echo "🛡️ PROTECTION TRIGGERED: These files are auto-generated from modules/" echo "" echo "TO FIX:" echo "1. Unstage these files: git reset HEAD tinytorch/core/" - echo "2. Make changes in modules/source/ instead" + echo "2. Make changes in modules/ instead" echo "3. Run: tito module complete " echo "4. Commit the source changes, not the generated files" echo "" diff --git a/tito/commands/status.py b/tito/commands/status.py index 4d7058ec..032f5f6e 100644 --- a/tito/commands/status.py +++ b/tito/commands/status.py @@ -45,7 +45,7 @@ class StatusCommand(BaseCommand): short_name = module_name[3:] # Remove "00_" prefix else: short_name = module_name - dev_file = module_path / f"{short_name}_dev.py" + dev_file = module_path / f"{short_name}.py" if not dev_file.exists(): return "not_found" @@ -125,9 +125,9 @@ class StatusCommand(BaseCommand): console = self.console # Scan modules directory - modules_dir = Path("modules/source") + modules_dir = Path("modules") if not modules_dir.exists(): - console.print(Panel("[red]❌ modules/source/ directory not found[/red]", + console.print(Panel("[red]❌ modules/ directory not found[/red]", title="Error", border_style="red")) return 1 @@ -141,7 +141,7 @@ class StatusCommand(BaseCommand): title="Warning", border_style="yellow")) return 0 - console.print(Panel(f"📋 Found {len(module_dirs)} modules in modules/source directory", + console.print(Panel(f"📋 Found {len(module_dirs)} modules in modules directory", title="Module Status Check", border_style="bright_cyan")) # Create status table @@ -209,7 +209,7 @@ class StatusCommand(BaseCommand): console.print(f" [bold cyan]tito status --comprehensive[/bold cyan] # Full system health dashboard") console.print(f" [bold cyan]tito module test --all[/bold cyan] # Test all modules") console.print(f" [bold cyan]tito module test MODULE_NAME[/bold cyan] # Test specific module") - console.print(f" [bold cyan]pytest modules/source/*/ -k test_[/bold cyan] # Run pytest on inline tests") + console.print(f" [bold cyan]pytest modules/*/ -k test_[/bold cyan] # Run pytest on inline tests") console.print(f" [bold cyan]pytest tests/test_*.py[/bold cyan] # Run external tests") # Detailed view @@ -243,7 +243,7 @@ class StatusCommand(BaseCommand): short_name = module_name[3:] # Remove "00_" prefix else: short_name = module_name - dev_file = module_dir / f"{short_name}_dev.py" + dev_file = module_dir / f"{short_name}.py" readme_file = module_dir / "README.md" metadata_file = module_dir / "module.yaml" @@ -386,7 +386,7 @@ class StatusCommand(BaseCommand): if status['dev_file']: dev_status += f" ({status.get('export_count', 0)} exports, {status.get('inline_test_count', 0)} inline tests)" - files_table.add_row(f"{module_name}_dev.py", dev_status) + files_table.add_row(f"{module_name}.py", dev_status) files_table.add_row("tests/test_*.py", "✅ Found" if status['external_tests'] else "❌ Missing") files_table.add_row("README.md", "✅ Found" if status['readme'] else "❌ Missing") @@ -396,7 +396,7 @@ class StatusCommand(BaseCommand): if status['dev_file'] or status['external_tests']: console.print("\n[dim]💡 Test commands:[/dim]") if status['dev_file']: - console.print(f"[dim] pytest modules/source/{module_name}/{module_name}_dev.py -k test_[/dim]") + console.print(f"[dim] pytest modules/{module_name}/{module_name}.py -k test_[/dim]") if status['external_tests']: short_name = module_name[3:] if module_name.startswith(tuple(f"{i:02d}_" for i in range(100))) else module_name console.print(f"[dim] pytest tests/test_{short_name}.py -v[/dim]") @@ -415,7 +415,7 @@ class StatusCommand(BaseCommand): console.print(f"📝 {metadata['description']}") # Export info (read from dev file - source of truth) - module_path = Path(f"modules/source/{module_name}") + module_path = Path(f"modules/{module_name}") export_target = self._get_export_target(module_path) if export_target not in ['not_found', 'no_export', 'read_error']: console.print(f"📦 Exports to: {export_target}") diff --git a/tito/commands/test.py b/tito/commands/test.py index fad98330..6138bd56 100644 --- a/tito/commands/test.py +++ b/tito/commands/test.py @@ -533,7 +533,7 @@ class TestCommand(BaseCommand): def _discover_modules(self) -> List[str]: """Discover available modules.""" modules = [] - source_dir = Path("modules/source") + source_dir = Path("modules") if source_dir.exists(): exclude_dirs = {'.quarto', '__pycache__', '.git', '.pytest_cache'} @@ -554,7 +554,7 @@ class TestCommand(BaseCommand): else: short_name = module_name - return Path("modules/source") / module_name / f"{short_name}_dev.py" + return Path("modules") / module_name / f"{short_name}.py" def _generate_summary_report(self, results: List[ModuleTestResult]) -> None: """Generate a summary report for all modules.""" @@ -781,7 +781,7 @@ class TestCommand(BaseCommand): f"[dim] tito module test --all --summary - Summary report[/dim]", title="Module Required", border_style="red")) else: - console.print(Panel("[red]❌ No modules found in modules/source directory[/red]", + console.print(Panel("[red]❌ No modules found in modules directory[/red]", title="Error", border_style="red")) return 1 \ No newline at end of file diff --git a/tito/commands/view.py b/tito/commands/view.py index db718dc9..024c87b6 100644 --- a/tito/commands/view.py +++ b/tito/commands/view.py @@ -48,23 +48,23 @@ class ViewCommand(BaseCommand): # Look for the specific dev file for this module # Extract module name (e.g., "tensor" from "01_tensor") module_name = args.module.split('_', 1)[1] if '_' in args.module else args.module - dev_file = module_dir / f"{module_name}_dev.py" + dev_file = module_dir / f"{module_name}.py" if not dev_file.exists(): - # Fallback: look for any *_dev.py file - dev_files = list(module_dir.glob("*_dev.py")) + # Fallback: look for any *.py file + dev_files = list(module_dir.glob("*.py")) if not dev_files: raise ModuleNotFoundError( f"No dev file found in module '{args.module}'. Expected: {dev_file}" ) def _find_dev_files(self) -> List[Path]: - """Find all *_dev.py files in modules directory.""" + """Find all *.py files in modules directory.""" dev_files = [] for module_dir in self.config.modules_dir.iterdir(): if module_dir.is_dir(): - # Look for any *_dev.py file in the directory - for dev_py in module_dir.glob("*_dev.py"): + # Look for any *.py file in the directory + for dev_py in module_dir.glob("*.py"): dev_files.append(dev_py) return dev_files @@ -131,13 +131,13 @@ class ViewCommand(BaseCommand): target_dir = self.config.modules_dir / args.module # Find the specific dev file for this module module_name = args.module.split('_', 1)[1] if '_' in args.module else args.module - dev_file = target_dir / f"{module_name}_dev.py" + dev_file = target_dir / f"{module_name}.py" if dev_file.exists(): dev_files = [dev_file] else: # Fallback: find any dev files - dev_files = list(target_dir.glob("*_dev.py")) + dev_files = list(target_dir.glob("*.py")) self.console.print(f"🔄 Generating notebook for module: {args.module}") else: @@ -145,7 +145,7 @@ class ViewCommand(BaseCommand): dev_files = self._find_dev_files() if not dev_files: self.console.print(Panel( - "[yellow]⚠️ No *_dev.py files found in modules/[/yellow]", + "[yellow]⚠️ No *.py files found in modules/[/yellow]", title="Nothing to Convert", border_style="yellow" )) diff --git a/tito/core/__pycache__/config.cpython-313.pyc b/tito/core/__pycache__/config.cpython-313.pyc index 4f92b07a..e253f601 100644 Binary files a/tito/core/__pycache__/config.cpython-313.pyc and b/tito/core/__pycache__/config.cpython-313.pyc differ diff --git a/tito/core/__pycache__/virtual_env_manager.cpython-313.pyc b/tito/core/__pycache__/virtual_env_manager.cpython-313.pyc index e762b8e5..2e718d77 100644 Binary files a/tito/core/__pycache__/virtual_env_manager.cpython-313.pyc and b/tito/core/__pycache__/virtual_env_manager.cpython-313.pyc differ diff --git a/tito/core/status_analyzer.py b/tito/core/status_analyzer.py index 517e4a9f..65e83b26 100644 --- a/tito/core/status_analyzer.py +++ b/tito/core/status_analyzer.py @@ -187,8 +187,8 @@ class TinyTorchStatusAnalyzer: # Check basic files - try multiple naming patterns possible_dev_files = [ - module_path / f"{module_name}_dev.py", - module_path / f"{module_name.split('_', 1)[1]}_dev.py" if '_' in module_name else None, + module_path / f"{module_name}.py", + module_path / f"{module_name.split('_', 1)[1]}.py" if '_' in module_name else None, ] dev_file = None for possible_file in possible_dev_files: @@ -197,8 +197,8 @@ class TinyTorchStatusAnalyzer: break if dev_file is None: - # Check if there's any *_dev.py file - dev_files = list(module_path.glob("*_dev.py")) + # Check if there's any *.py file + dev_files = list(module_path.glob("*.py")) if dev_files: dev_file = dev_files[0] # Use the first one found