From f8fd2e000c41a14d12edbfe4326d29f4e0daca20 Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Fri, 26 Sep 2025 11:51:54 -0400 Subject: [PATCH] STANDARDIZE: Consistent Linear terminology across all modules Remove backward compatibility aliases and enforce PyTorch-consistent naming: - Remove Dense = Linear alias in Module 04 (layers) - Update all Dense references to Linear in Modules 02, 08, 09, 18, 21 - Remove MaxPool2d = MaxPool2D alias in Module 17 (quantization) - Standardize fc/dense_weights to linear_weights in Module 18 (compression) Benefits: - Eliminates naming confusion between Dense/Linear terminology - Aligns with PyTorch production patterns (nn.Linear) - Reduces cognitive load with single consistent naming convention - Improves student transfer to real ML frameworks All modules tested and functionality preserved. --- modules/02_tensor/tensor_dev.py | 2 +- modules/04_layers/layers_dev.py | 45 ++++++------ modules/08_training/training_dev.py | 10 +-- modules/09_spatial/spatial_dev.py | 65 +++++++++-------- modules/17_quantization/quantization_dev.py | 2 - modules/18_compression/compression_dev.py | 78 ++++++++++----------- modules/21_mlops/mlops_dev.py | 2 +- 7 files changed, 102 insertions(+), 102 deletions(-) diff --git a/modules/02_tensor/tensor_dev.py b/modules/02_tensor/tensor_dev.py index 2e4154c8..ef3ce961 100644 --- a/modules/02_tensor/tensor_dev.py +++ b/modules/02_tensor/tensor_dev.py @@ -58,7 +58,7 @@ print("Ready to build tensors!") # # Final package structure: # from tinytorch.core.tensor import Tensor # The foundation of everything! # from tinytorch.core.activations import ReLU, Sigmoid, Tanh -# from tinytorch.core.layers import Dense, Conv2D +# from tinytorch.core.layers import Linear, Conv2D # ``` # # **Why this matters:** diff --git a/modules/04_layers/layers_dev.py b/modules/04_layers/layers_dev.py index 0fb924ab..71c12a75 100644 --- a/modules/04_layers/layers_dev.py +++ b/modules/04_layers/layers_dev.py @@ -110,8 +110,8 @@ class Module: class MLP(Module): def __init__(self): super().__init__() - self.layer1 = Dense(784, 128) # Auto-registered! - self.layer2 = Dense(128, 10) # Auto-registered! + self.layer1 = Linear(784, 128) # Auto-registered! + self.layer2 = Linear(128, 10) # Auto-registered! def forward(self, x): x = self.layer1(x) @@ -520,9 +520,6 @@ class Linear(Module): return Tensor(output_data) ### END SOLUTION -# Backward compatibility alias -#| export -Dense = Linear # %% [markdown] """ @@ -538,7 +535,7 @@ def test_dense_layer(): print("๐Ÿงช Testing Dense Layer...") # Test case 1: Basic functionality - layer = Dense(input_size=3, output_size=2) + layer = Linear(input_size=3, output_size=2) input_tensor = Tensor([[1.0, 2.0, 3.0]]) # Shape: (1, 3) output = layer.forward(input_tensor) @@ -547,13 +544,13 @@ def test_dense_layer(): print("โœ… Output shape correct") # Test case 2: No bias - layer_no_bias = Dense(input_size=2, output_size=3, use_bias=False) + layer_no_bias = Linear(input_size=2, output_size=3, use_bias=False) assert layer_no_bias.bias is None, "Bias should be None when use_bias=False" print("โœ… No bias option works") # Test case 3: Multiple samples (batch processing) batch_input = Tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]) # Shape: (3, 2) - layer_batch = Dense(input_size=2, output_size=2) + layer_batch = Linear(input_size=2, output_size=2) batch_output = layer_batch.forward(batch_input) assert batch_output.shape == (3, 2), f"Expected shape (3, 2), got {batch_output.shape}" @@ -565,7 +562,7 @@ def test_dense_layer(): print("โœ… Callable interface works") # Test case 5: Parameter initialization - layer_init = Dense(input_size=10, output_size=5) + layer_init = Linear(input_size=10, output_size=5) assert layer_init.weights.shape == (10, 5), f"Expected weights shape (10, 5), got {layer_init.weights.shape}" assert layer_init.bias.shape == (5,), f"Expected bias shape (5,), got {layer_init.bias.shape}" @@ -590,7 +587,7 @@ def test_dense_parameter_management(): print("๐Ÿงช Testing Dense Layer Parameter Management...") # Test case 1: Parameter registration - layer = Dense(input_size=3, output_size=2) + layer = Linear(input_size=3, output_size=2) params = layer.parameters() assert len(params) == 2, f"Expected 2 parameters (weights + bias), got {len(params)}" @@ -602,8 +599,8 @@ def test_dense_parameter_management(): class SimpleNetwork(Module): def __init__(self): super().__init__() - self.layer1 = Dense(4, 3) - self.layer2 = Dense(3, 2) + self.layer1 = Linear(4, 3) + self.layer2 = Linear(3, 2) def forward(self, x): x = self.layer1(x) @@ -624,13 +621,13 @@ def test_dense_parameter_management(): print("โœ… Network forward pass works") # Test case 4: Parameter shapes - layer = Dense(input_size=10, output_size=5) + layer = Linear(input_size=10, output_size=5) assert layer.weights.shape == (10, 5), f"Expected weights shape (10, 5), got {layer.weights.shape}" assert layer.bias.shape == (5,), f"Expected bias shape (5,), got {layer.bias.shape}" print("โœ… Parameter shapes correct") # Test case 5: No bias option - layer_no_bias = Dense(input_size=3, output_size=2, use_bias=False) + layer_no_bias = Linear(input_size=3, output_size=2, use_bias=False) params_no_bias = layer_no_bias.parameters() assert len(params_no_bias) == 1, f"Expected 1 parameter (weights only), got {len(params_no_bias)}" @@ -742,7 +739,7 @@ def test_sequential_network(): print("โœ… Empty Sequential network creation") # Test case 2: Create network with layers - layers = [Dense(3, 4), Dense(4, 2)] + layers = [Linear(3, 4), Linear(4, 2)] network = Sequential(layers) assert len(network.layers) == 2, "Network should have 2 layers" print("โœ… Sequential network with layers") @@ -760,7 +757,7 @@ def test_sequential_network(): print("โœ… Parameter collection from all layers") # Test case 5: Adding layers dynamically - network.add(Dense(2, 1)) + network.add(Linear(2, 1)) assert len(network.layers) == 3, "Network should have 3 layers after adding one" # Test forward pass after adding layer @@ -920,7 +917,7 @@ def test_flatten_operations(): # Test case 5: Integration with Sequential network = Sequential([ - Dense(8, 4), + Linear(8, 4), Flatten() ]) test_input = Tensor(np.random.randn(2, 8)) @@ -1193,8 +1190,8 @@ def run_comprehensive_tests(): print("\n2. Dense Layer Composition:") # Create a simple 2-layer network - layer1 = Dense(4, 3) - layer2 = Dense(3, 2) + layer1 = Linear(4, 3) + layer2 = Linear(3, 2) # Test data flow input_data = Tensor([[1, 2, 3, 4]]) @@ -1218,7 +1215,7 @@ def run_comprehensive_tests(): # Test 4: Parameter access and modification print("\n4. Parameter Management:") - layer = Dense(5, 3) + layer = Linear(5, 3) original_weights = layer.weights.data.copy() # Simulate parameter update @@ -1246,8 +1243,8 @@ def demonstrate_layer_composition(): print("=" * 50) print("\n1. Creating individual layers:") - layer1 = Dense(input_size=4, output_size=3) - layer2 = Dense(input_size=3, output_size=2) + layer1 = Linear(input_size=4, output_size=3) + layer2 = Linear(input_size=3, output_size=2) print(f" Layer 1: {layer1.input_size} โ†’ {layer1.output_size}") print(f" Layer 2: {layer2.input_size} โ†’ {layer2.output_size}") @@ -1268,8 +1265,8 @@ def demonstrate_layer_composition(): class TwoLayerNetwork(Module): def __init__(self, input_size, hidden_size, output_size): super().__init__() - self.layer1 = Dense(input_size, hidden_size) - self.layer2 = Dense(hidden_size, output_size) + self.layer1 = Linear(input_size, hidden_size) + self.layer2 = Linear(hidden_size, output_size) def forward(self, x): x = self.layer1(x) diff --git a/modules/08_training/training_dev.py b/modules/08_training/training_dev.py index 3a65c6dc..f8489589 100644 --- a/modules/08_training/training_dev.py +++ b/modules/08_training/training_dev.py @@ -69,7 +69,7 @@ sys.path.append(os.path.abspath('modules/source/09_dataloader')) # Import all the building blocks we need from tinytorch.core.tensor import Tensor from tinytorch.core.activations import ReLU, Sigmoid, Tanh, Softmax -from tinytorch.core.layers import Dense +from tinytorch.core.layers import Linear from tinytorch.core.networks import Sequential, create_mlp from tinytorch.core.spatial import Conv2D, flatten from tinytorch.utils.data import Dataset, DataLoader @@ -918,7 +918,7 @@ class Trainer: 4. Prepare for training and validation loops EXAMPLE: - model = Sequential([Dense(10, 5), ReLU(), Dense(5, 2)]) + model = Sequential([Linear(10, 5), ReLU(), Linear(5, 2)]) optimizer = Adam(model.parameters, learning_rate=0.001) loss_fn = CrossEntropyLoss() metrics = [Accuracy()] @@ -1260,7 +1260,7 @@ def test_unit_trainer(): print("๐Ÿ”ฌ Unit Test: Trainer Class...") # Create simple model and components - model = Sequential([Dense(2, 3), ReLU(), Dense(3, 2)]) # Simple model + model = Sequential([Linear(2, 3), ReLU(), Linear(3, 2)]) # Simple model optimizer = SGD([], learning_rate=0.01) # Empty parameters list for testing loss_fn = MeanSquaredError() metrics = [Accuracy()] @@ -1608,7 +1608,7 @@ def test_training_pipeline_profiler(): profiler = TrainingPipelineProfiler(warning_threshold_seconds=1.0) # Create test components - model = Sequential([Dense(10, 5), ReLU(), Dense(5, 2)]) + model = Sequential([Linear(10, 5), ReLU(), Linear(5, 2)]) optimizer = SGD([], learning_rate=0.01) loss_fn = MeanSquaredError() @@ -1839,7 +1839,7 @@ def test_production_training_optimizer(): optimizer_tool = ProductionTrainingOptimizer() # Create test components - model = Sequential([Dense(10, 5), ReLU(), Dense(5, 2)]) + model = Sequential([Linear(10, 5), ReLU(), Linear(5, 2)]) optimizer = SGD([], learning_rate=0.01) loss_fn = MeanSquaredError() diff --git a/modules/09_spatial/spatial_dev.py b/modules/09_spatial/spatial_dev.py index b668eb0a..ea10ce47 100644 --- a/modules/09_spatial/spatial_dev.py +++ b/modules/09_spatial/spatial_dev.py @@ -80,8 +80,8 @@ print("Ready to build convolutional neural networks!") ```python # Final package structure: -from tinytorch.core.cnn import Conv2D, conv2d_naive, flatten # CNN operations! -from tinytorch.core.layers import Dense # Fully connected layers +from tinytorch.core.spatial import Conv2D, MaxPool2D, flatten # CNN operations! +from tinytorch.core.layers import Linear # Fully connected layers from tinytorch.core.activations import ReLU # Nonlinearity from tinytorch.core.tensor import Tensor # Foundation ``` @@ -142,6 +142,10 @@ def flatten(x, start_dim=1): # Flatten 2D to (1, total_elements) - treat as single sample total_size = int(np.prod(data.shape)) new_shape = (1, total_size) + elif start_dim == 0: + # Special case: flatten everything but maintain 2D for Linear layers + total_size = int(np.prod(data.shape)) + new_shape = (1, total_size) else: # Calculate new shape - preserve dimensions before start_dim, flatten rest batch_dims = data.shape[:start_dim] @@ -1187,20 +1191,20 @@ print("๐Ÿ“ˆ Progress: Single-channel โœ“, Multi-channel โœ“, Pooling โœ“") # %% [markdown] """ -## Step 5: Flattening for Dense Layers +## Step 5: Flattening for Linear Layers ### What is Flattening? **Flattening** converts multi-dimensional tensors to 1D vectors, enabling connection between convolutional and dense layers. ### Why Flattening is Needed -- **Interface compatibility**: Conv2D outputs 2D/3D, Dense expects 1D +- **Interface compatibility**: Conv2D outputs 2D/3D, Linear expects 1D - **Network composition**: Connect spatial features to classification - **Standard practice**: Almost all CNNs use this pattern - **Dimension management**: Preserve information while changing shape ### The Pattern ``` -Conv2D โ†’ ReLU โ†’ MaxPool2D โ†’ Flatten โ†’ Dense โ†’ Output +Conv2D โ†’ ReLU โ†’ MaxPool2D โ†’ Flatten โ†’ Linear โ†’ Output ``` ### Real-World Usage @@ -1215,7 +1219,7 @@ Conv2D โ†’ ReLU โ†’ MaxPool2D โ†’ Flatten โ†’ Dense โ†’ Output # We use that single implementation throughout this module for consistency and clarity. print("โœ… Flatten function is available from the Spatial Helper Functions section") -print("๐Ÿ” The flatten() function handles tensor flattening for CNN-to-Dense transitions") +print("๐Ÿ” The flatten() function handles tensor flattening for CNN-to-Linear transitions") # %% [markdown] """ @@ -1281,7 +1285,7 @@ except Exception as e: print("๐ŸŽฏ Flatten behavior:") print(" Converts 2D tensor to 1D") print(" Preserves batch dimension") -print(" Enables connection to Dense layers") +print(" Enables connection to Linear layers") print("๐Ÿ“ˆ Progress: Convolution operation โœ“, Conv2D layer โœ“, Flatten โœ“") # %% [markdown] @@ -1294,13 +1298,13 @@ Let us test our complete CNN system with realistic multi-channel scenarios: #### **CIFAR-10 Style CNN** ```python # RGB images to classification -RGB Input โ†’ Multi-Channel Conv2D โ†’ ReLU โ†’ MaxPool2D โ†’ Flatten โ†’ Dense โ†’ Output +RGB Input โ†’ Multi-Channel Conv2D โ†’ ReLU โ†’ MaxPool2D โ†’ Flatten โ†’ Linear โ†’ Output ``` #### **Deep Multi-Channel CNN** ```python # Progressive feature extraction -RGB โ†’ Conv2D(3โ†’32) โ†’ ReLU โ†’ Pool โ†’ Conv2D(32โ†’64) โ†’ ReLU โ†’ Pool โ†’ Flatten โ†’ Dense +RGB โ†’ Conv2D(3โ†’32) โ†’ ReLU โ†’ Pool โ†’ Conv2D(32โ†’64) โ†’ ReLU โ†’ Pool โ†’ Flatten โ†’ Linear ``` #### **Production CNN Pattern** @@ -1320,11 +1324,11 @@ try: # Test 1: CIFAR-10 Style RGB CNN Pipeline print("\n1. CIFAR-10 Style RGB CNN Pipeline:") - # Create pipeline: RGB โ†’ Conv2D(3โ†’16) โ†’ ReLU โ†’ MaxPool2D โ†’ Flatten โ†’ Dense + # Create pipeline: RGB โ†’ Conv2D(3โ†’16) โ†’ ReLU โ†’ MaxPool2D โ†’ Flatten โ†’ Linear rgb_conv = Conv2D(in_channels=3, out_channels=16, kernel_size=(3, 3)) relu = ReLU() pool = MaxPool2D(pool_size=(2, 2)) - dense = Dense(input_size=16 * 3 * 3, output_size=10) # 16 channels, 3x3 spatial = 144 features + dense = Linear(input_size=16 * 3 * 3, output_size=10) # 16 channels, 3x3 spatial = 144 features # Simulated CIFAR-10 image (3 channels, 8x8 for testing) rgb_image = Tensor(np.random.randn(3, 8, 8)) # RGB 8x8 image @@ -1334,7 +1338,7 @@ try: conv_features = rgb_conv(rgb_image) # (3,8,8) โ†’ (16,6,6) activated = relu(conv_features) # (16,6,6) โ†’ (16,6,6) pooled = pool(activated) # (16,6,6) โ†’ (16,3,3) - flattened = flatten(pooled) # (16,3,3) โ†’ (1,144) + flattened = flatten(pooled, start_dim=0) # (16,3,3) โ†’ (1,144) predictions = dense(flattened) # (1,144) โ†’ (1,10) assert conv_features.shape == (16, 6, 6), f"Conv features wrong: {conv_features.shape}" @@ -1348,14 +1352,14 @@ try: # Test 2: Deep Multi-Channel CNN print("\n2. Deep Multi-Channel CNN:") - # Create deeper pipeline: RGB โ†’ Conv1(3โ†’32) โ†’ ReLU โ†’ Pool โ†’ Conv2(32โ†’64) โ†’ ReLU โ†’ Pool โ†’ Dense + # Create deeper pipeline: RGB โ†’ Conv1(3โ†’32) โ†’ ReLU โ†’ Pool โ†’ Conv2(32โ†’64) โ†’ ReLU โ†’ Pool โ†’ Linear conv1_deep = Conv2D(in_channels=3, out_channels=32, kernel_size=(3, 3)) relu1 = ReLU() pool1 = MaxPool2D(pool_size=(2, 2)) conv2_deep = Conv2D(in_channels=32, out_channels=64, kernel_size=(3, 3)) relu2 = ReLU() pool2 = MaxPool2D(pool_size=(2, 2)) - classifier_deep = Dense(input_size=64 * 1 * 1, output_size=5) # 64 channels, 1x1 spatial + classifier_deep = Linear(input_size=64 * 1 * 1, output_size=5) # 64 channels, 1x1 spatial # Larger RGB input for deep processing large_rgb = Tensor(np.random.randn(3, 12, 12)) # RGB 12x12 image @@ -1368,7 +1372,7 @@ try: h4 = conv2_deep(h3) # (32,5,5) โ†’ (64,3,3) h5 = relu2(h4) # (64,3,3) โ†’ (64,3,3) h6 = pool2(h5) # (64,3,3) โ†’ (64,1,1) - h7 = flatten(h6) # (64,1,1) โ†’ (1,64) + h7 = flatten(h6, start_dim=0) # (64,1,1) โ†’ (1,64) output_deep = classifier_deep(h7) # (1,64) โ†’ (1,5) assert h1.shape == (32, 10, 10), f"Conv1 output wrong: {h1.shape}" @@ -1398,7 +1402,7 @@ try: # Create classifier with correct input size feature_size = batch_flat.shape[1] # 32 features - batch_classifier = Dense(input_size=feature_size, output_size=3) + batch_classifier = Linear(input_size=feature_size, output_size=3) batch_pred = batch_classifier(batch_flat) # (4,32) โ†’ (4,3) assert batch_conv_out.shape == (4, 8, 4, 4), f"Batch conv wrong: {batch_conv_out.shape}" @@ -1424,10 +1428,10 @@ try: # Analyze different configurations configs = [ - (Conv2D(1, 8, (3, 3)), "1โ†’8 channels"), - (Conv2D(3, 16, (3, 3)), "3โ†’16 channels (RGB)"), - (Conv2D(16, 32, (3, 3)), "16โ†’32 channels"), - (Conv2D(32, 64, (3, 3)), "32โ†’64 channels"), + (Conv2D(in_channels=1, out_channels=8, kernel_size=(3, 3)), "1โ†’8 channels"), + (Conv2D(in_channels=3, out_channels=16, kernel_size=(3, 3)), "3โ†’16 channels (RGB)"), + (Conv2D(in_channels=16, out_channels=32, kernel_size=(3, 3)), "16โ†’32 channels"), + (Conv2D(in_channels=32, out_channels=64, kernel_size=(3, 3)), "32โ†’64 channels"), ] for conv_layer, desc in configs: @@ -1443,7 +1447,7 @@ try: print(" โ€ข Batch processing with multiple channels") print(" โ€ข Backward compatibility with single-channel") print(" โ€ข Production-ready parameter scaling") - print(" โ€ข Complete Conv โ†’ Pool โ†’ Dense pipelines") + print(" โ€ข Complete Conv โ†’ Pool โ†’ Linear pipelines") print("๐Ÿ“ˆ Progress: Production-ready multi-channel CNN system!") except Exception as e: @@ -1559,20 +1563,21 @@ def test_module_conv2d_tensor_compatibility(): # 1. Define a Conv2D layer # Kernel of size 3x3 - conv_layer = Conv2D((3, 3)) + conv_layer = Conv2D(in_channels=1, out_channels=1, kernel_size=(3, 3)) # 2. Create a batch of 5 grayscale images (10x10) - # Shape: (batch_size, height, width) - input_images = np.random.randn(5, 10, 10) + # Shape: (batch_size, channels, height, width) + input_images = np.random.randn(5, 1, 10, 10) input_tensor = Tensor(input_images) # 3. Perform a forward pass output_tensor = conv_layer(input_tensor) # 4. Assert the output shape is correct + # Output: (batch_size, out_channels, height, width) # Output height = 10 - 3 + 1 = 8 # Output width = 10 - 3 + 1 = 8 - expected_shape = (5, 8, 8) + expected_shape = (5, 1, 8, 8) assert isinstance(output_tensor, Tensor), "Conv2D output must be a Tensor" assert output_tensor.shape == expected_shape, f"Expected output shape {expected_shape}, but got {output_tensor.shape}" print("โœ… Integration Test Passed: Conv2D layer correctly transformed image tensor.") @@ -2020,7 +2025,7 @@ Congratulations! You have successfully implemented a complete multi-channel CNN - **Parameter scaling**: How memory requirements grow with channels and kernel sizes - **Spatial downsampling**: MaxPooling for translation invariance and efficiency - **Feature hierarchy**: Progressive extraction from RGB โ†’ edges โ†’ objects โ†’ concepts -- **Production architectures**: Conv โ†’ ReLU โ†’ Pool โ†’ Conv โ†’ ReLU โ†’ Pool โ†’ Dense patterns +- **Production architectures**: Conv โ†’ ReLU โ†’ Pool โ†’ Conv โ†’ ReLU โ†’ Pool โ†’ Linear patterns - **He initialization**: Proper weight initialization for stable multi-layer training ### Mathematical Foundations @@ -2044,7 +2049,7 @@ Congratulations! You have successfully implemented a complete multi-channel CNN - **Computer vision**: Face recognition, document analysis, quality inspection ### CNN Architecture Patterns -- **Basic CNN**: RGB โ†’ Conv(3โ†’32) โ†’ ReLU โ†’ Pool โ†’ Conv(32โ†’64) โ†’ ReLU โ†’ Pool โ†’ Dense +- **Basic CNN**: RGB โ†’ Conv(3โ†’32) โ†’ ReLU โ†’ Pool โ†’ Conv(32โ†’64) โ†’ ReLU โ†’ Pool โ†’ Linear - **Parameter efficiency**: 32ร—3ร—3ร—3 = 864 parameters vs 32ร—32ร—32 = 32,768 for dense layer - **Spatial hierarchy**: Early layers detect edges, later layers detect objects - **Translation invariance**: Same features detected regardless of position in image @@ -2058,7 +2063,7 @@ Congratulations! You have successfully implemented a complete multi-channel CNN ### Production-Ready Features ```python from tinytorch.core.spatial import Conv2D, MaxPool2D, flatten -from tinytorch.core.layers import Dense +from tinytorch.core.layers import Linear from tinytorch.core.activations import ReLU # CIFAR-10 CNN architecture @@ -2066,13 +2071,13 @@ conv1 = Conv2D(in_channels=3, out_channels=32, kernel_size=(3, 3)) pool1 = MaxPool2D(pool_size=(2, 2)) conv2 = Conv2D(in_channels=32, out_channels=64, kernel_size=(3, 3)) pool2 = MaxPool2D(pool_size=(2, 2)) -classifier = Dense(input_size=64*6*6, output_size=10) +classifier = Linear(input_size=64*6*6, output_size=10) # Process RGB image rgb_image = Tensor(np.random.randn(3, 32, 32)) # CIFAR-10 format features1 = pool1(ReLU()(conv1(rgb_image))) # (3,32,32) โ†’ (32,15,15) features2 = pool2(ReLU()(conv2(features1))) # (32,15,15) โ†’ (64,6,6) -predictions = classifier(flatten(features2)) # (64,6,6) โ†’ (1,10) +predictions = classifier(flatten(features2, start_dim=0)) # (64,6,6) โ†’ (1,10) ``` ### Next Steps diff --git a/modules/17_quantization/quantization_dev.py b/modules/17_quantization/quantization_dev.py index 7641e886..da7304df 100644 --- a/modules/17_quantization/quantization_dev.py +++ b/modules/17_quantization/quantization_dev.py @@ -63,7 +63,6 @@ from typing import Union, List, Optional, Tuple, Dict, Any try: from tinytorch.core.tensor import Tensor from tinytorch.core.spatial import Conv2d, MaxPool2D - MaxPool2d = MaxPool2D # Alias for consistent naming except ImportError: # For development, import from local modules sys.path.append(os.path.join(os.path.dirname(__file__), '..', '02_tensor')) @@ -71,7 +70,6 @@ except ImportError: try: from tensor_dev import Tensor from spatial_dev import Conv2d, MaxPool2D - MaxPool2d = MaxPool2D # Alias for consistent naming except ImportError: # Create minimal mock classes if not available class Tensor: diff --git a/modules/18_compression/compression_dev.py b/modules/18_compression/compression_dev.py index 93f0e38a..a4b02374 100644 --- a/modules/18_compression/compression_dev.py +++ b/modules/18_compression/compression_dev.py @@ -87,8 +87,8 @@ def _determine_layer_type_and_sparsity(shape: tuple) -> Tuple[str, float]: if len(shape) == 4: # Convolution: (filters, channels, height, width) layer_type = "Conv2D" recommended_sparsity = DEFAULT_CONV_SPARSITY # Conservative - conv layers extract spatial features - elif len(shape) == 2: # Dense/Linear: (output_neurons, input_neurons) - layer_type = "Dense" + elif len(shape) == 2: # Linear/Linear: (output_neurons, input_neurons) + layer_type = "Linear" recommended_sparsity = DEFAULT_DENSE_SPARSITY # Aggressive - dense layers have high redundancy else: layer_type = "Other" @@ -175,17 +175,17 @@ def test_redundancy_analysis(): # Create realistic CNN weights with natural sparsity np.random.seed(42) conv_weights = np.random.normal(0, 0.02, (64, 32, 3, 3)) # Conv layer - fc_weights = np.random.normal(0, 0.01, (1000, 512)) # FC layer + linear_weights = np.random.normal(0, 0.01, (1000, 512)) # Linear layer # Analyze both layer types conv_stats = analyze_weight_redundancy(conv_weights, "Conv2D Layer Weights") - fc_stats = analyze_weight_redundancy(fc_weights, "Dense Layer Weights") + linear_stats = analyze_weight_redundancy(linear_weights, "Linear Layer Weights") # Verify analysis produces reasonable results assert conv_stats['total_params'] == 64*32*3*3, "Conv param count mismatch" - assert fc_stats['total_params'] == 1000*512, "FC param count mismatch" + assert linear_stats['total_params'] == 1000*512, "Linear param count mismatch" assert conv_stats['natural_sparsity'] > 0, "Should detect some natural sparsity" - assert fc_stats['natural_sparsity'] > 0, "Should detect some natural sparsity" + assert linear_stats['natural_sparsity'] > 0, "Should detect some natural sparsity" print("โœ… Weight redundancy analysis test passed!") @@ -594,7 +594,7 @@ class SparseLinear: out_features: Number of output features Attributes: - dense_weights: Original dense weight matrix (out_features, in_features) + linear_weights: Original dense weight matrix (out_features, in_features) sparse_weights: Pruned weight matrix with zeros mask: Binary mask indicating kept weights (1=keep, 0=prune) sparsity: Fraction of weights that are zero @@ -605,8 +605,8 @@ class SparseLinear: self.in_features = in_features self.out_features = out_features - # Dense weights (will be pruned) - self.dense_weights = None + # Linear weights (will be pruned) + self.linear_weights = None self.bias = None # Sparse representation @@ -619,23 +619,23 @@ class SparseLinear: self.sparse_ops = 0 # END SOLUTION - def load_dense_weights(self, weights: np.ndarray, bias: Optional[np.ndarray] = None): + def load_linear_weights(self, weights: np.ndarray, bias: Optional[np.ndarray] = None): """Load dense weights before pruning.""" # BEGIN SOLUTION assert weights.shape == (self.out_features, self.in_features), f"Weight shape mismatch" - self.dense_weights = weights.copy() + self.linear_weights = weights.copy() self.bias = bias.copy() if bias is not None else np.zeros(self.out_features) # END SOLUTION def prune_weights(self, sparsity: float = DEFAULT_SPARSITY): """Prune weights using magnitude-based pruning.""" # BEGIN SOLUTION - if self.dense_weights is None: + if self.linear_weights is None: raise ValueError("Must load dense weights before pruning") # Use magnitude pruner pruner = MagnitudePruner() - self.sparse_weights, self.mask, stats = pruner.prune(self.dense_weights, sparsity) + self.sparse_weights, self.mask, stats = pruner.prune(self.linear_weights, sparsity) self.sparsity = stats['actual_sparsity'] print(f"โœ‚๏ธ Pruned {self.sparsity:.1%} of weights") @@ -645,14 +645,14 @@ class SparseLinear: def forward_dense(self, x: np.ndarray) -> np.ndarray: """Forward pass using dense weights (reference).""" # BEGIN SOLUTION - if self.dense_weights is None: - raise ValueError("Dense weights not loaded") + if self.linear_weights is None: + raise ValueError("Linear weights not loaded") # Count operations self.dense_ops = self.in_features * self.out_features # Standard matrix multiply: y = x @ W^T + b - output = np.dot(x, self.dense_weights.T) + self.bias + output = np.dot(x, self.linear_weights.T) + self.bias return output # END SOLUTION @@ -759,7 +759,7 @@ def test_sparse_neural_network(): np.random.seed(42) weights = np.random.normal(0, 0.1, (128, 256)) bias = np.random.normal(0, 0.01, 128) - sparse_layer.load_dense_weights(weights, bias) + sparse_layer.load_linear_weights(weights, bias) # Prune weights sparse_layer.prune_weights(sparsity=0.8) # 80% sparsity @@ -773,13 +773,13 @@ def test_sparse_neural_network(): output_sparse_opt = sparse_layer.forward_sparse_optimized(x) print(f"Output shapes:") - print(f" Dense: {output_dense.shape}") + print(f" Linear: {output_dense.shape}") print(f" Sparse naive: {output_sparse_naive.shape}") print(f" Sparse optimized: {output_sparse_opt.shape}") # Verify outputs have correct shape expected_shape = (4, 128) - assert output_dense.shape == expected_shape, "Dense output shape incorrect" + assert output_dense.shape == expected_shape, "Linear output shape incorrect" assert output_sparse_naive.shape == expected_shape, "Sparse naive output shape incorrect" assert output_sparse_opt.shape == expected_shape, "Sparse optimized output shape incorrect" @@ -801,7 +801,7 @@ def test_sparse_neural_network(): print(f"\nPerformance Benchmark:") print(f" Sparsity: {benchmark['sparsity']:.1%}") - print(f" Dense ops: {benchmark['dense_ops']:,}") + print(f" Linear ops: {benchmark['dense_ops']:,}") print(f" Sparse ops: {benchmark['sparse_ops']:,}") print(f" Theoretical speedup: {benchmark['theoretical_speedup']:.1f}x") print(f" Actual speedup: {benchmark['actual_speedup']:.1f}x") @@ -809,7 +809,7 @@ def test_sparse_neural_network(): # Verify operation counting expected_dense_ops = 256 * 128 - assert benchmark['dense_ops'] == expected_dense_ops, "Dense op count incorrect" + assert benchmark['dense_ops'] == expected_dense_ops, "Linear op count incorrect" assert benchmark['sparse_ops'] < benchmark['dense_ops'], "Sparse should use fewer ops" print("โœ… Sparse neural network test passed!") @@ -841,13 +841,13 @@ def _determine_layer_type_and_sparsity(shape: tuple) -> Tuple[str, float]: shape: Weight tensor shape Returns: - layer_type: Type of layer (Conv2D, Dense, Other) + layer_type: Type of layer (Conv2D, Linear, Other) recommended_sparsity: Recommended sparsity level for this layer type """ if len(shape) == CONV2D_NDIM: # Conv layer: (out, in, H, W) return "Conv2D", DEFAULT_CONV_SPARSITY - elif len(shape) == DENSE_NDIM: # Dense layer: (out, in) - return "Dense", DEFAULT_DENSE_SPARSITY + elif len(shape) == DENSE_NDIM: # Linear layer: (out, in) + return "Linear", DEFAULT_DENSE_SPARSITY else: return "Other", DEFAULT_OTHER_SPARSITY @@ -980,7 +980,7 @@ class ModelCompressor: ) analysis['total_params'] += weights.size - if layer_type in ['Conv2D', 'Dense']: + if layer_type in ['Conv2D', 'Linear']: analysis['compressible_params'] += weights.size _print_layer_analysis_row(layer_name, layer_type, weights.size, @@ -1155,8 +1155,8 @@ def test_compression_pipeline(): model_weights = { 'conv1': np.random.normal(0, 0.02, (32, 3, 3, 3)), # Conv: 32 filters, 3 input channels 'conv2': np.random.normal(0, 0.02, (64, 32, 3, 3)), # Conv: 64 filters, 32 input channels - 'fc1': np.random.normal(0, 0.01, (512, 1024)), # Dense: 512 โ†’ 1024 - 'fc2': np.random.normal(0, 0.01, (10, 512)), # Dense: 10 โ†’ 512 (output layer) + 'linear1': np.random.normal(0, 0.01, (512, 1024)), # Linear: 512 โ†’ 1024 + 'linear2': np.random.normal(0, 0.01, (10, 512)), # Linear: 10 โ†’ 512 (output layer) } # Create compressor @@ -1168,18 +1168,18 @@ def test_compression_pipeline(): assert analysis['total_params'] > 0, "Should count total parameters" assert len(analysis['layers']) == 4, "Should analyze all 4 layers" assert 'conv1' in analysis['layers'], "Should analyze conv1" - assert 'fc1' in analysis['layers'], "Should analyze fc1" + assert 'linear1' in analysis['layers'], "Should analyze linear1" # Verify layer type detection assert analysis['layers']['conv1']['type'] == 'Conv2D', "Should detect conv layers" - assert analysis['layers']['fc1']['type'] == 'Dense', "Should detect dense layers" + assert analysis['layers']['linear1']['type'] == 'Linear', "Should detect linear layers" # Step 2: Compress model with custom sparsities custom_sparsities = { 'conv1': 0.5, # Conservative for first conv layer 'conv2': 0.6, # Moderate for second conv layer - 'fc1': 0.8, # Aggressive for large dense layer - 'fc2': 0.3 # Conservative for output layer + 'linear1': 0.8, # Aggressive for large dense layer + 'linear2': 0.3 # Conservative for output layer } compressed_model = compressor.compress_model(model_weights, custom_sparsities) @@ -1262,8 +1262,8 @@ def profile_compression_memory(): model_weights = { 'conv1': np.random.normal(0, 0.02, (128, 64, 3, 3)), # ~0.3M parameters 'conv2': np.random.normal(0, 0.02, (256, 128, 3, 3)), # ~1.2M parameters - 'fc1': np.random.normal(0, 0.01, (1024, 4096)), # ~4.2M parameters - 'fc2': np.random.normal(0, 0.01, (10, 1024)), # ~10K parameters + 'linear1': np.random.normal(0, 0.01, (1024, 4096)), # ~4.2M parameters + 'linear2': np.random.normal(0, 0.01, (10, 1024)), # ~10K parameters } snapshot1 = tracemalloc.take_snapshot() @@ -1351,13 +1351,13 @@ def analyze_deployment_scenarios(): # Model sizes at different compression levels model_configs = [ - {'name': 'Dense Model', 'size_mb': 200, 'gflops': 50, 'accuracy': 95.0}, + {'name': 'Linear Model', 'size_mb': 200, 'gflops': 50, 'accuracy': 95.0}, {'name': '50% Sparse', 'size_mb': 100, 'gflops': 25, 'accuracy': 94.5}, {'name': '70% Sparse', 'size_mb': 60, 'gflops': 15, 'accuracy': 93.8}, {'name': '90% Sparse', 'size_mb': 20, 'gflops': 5, 'accuracy': 91.2}, ] - print("Scenario | Memory | Compute | Dense | 50% | 70% | 90% | Best Option") + print("Scenario | Memory | Compute | Linear | 50% | 70% | 90% | Best Option") print("-" * 80) for scenario in scenarios: @@ -1435,7 +1435,7 @@ def benchmark_sparse_inference_speedup(): # Load and prune weights weights = np.random.normal(0, 0.1, (size[1], size[0])) - sparse_layer.load_dense_weights(weights) + sparse_layer.load_linear_weights(weights) sparse_layer.prune_weights(sparsity) # Benchmark @@ -1711,11 +1711,11 @@ def run_all_tests(): np.random.seed(42) demo_model = { 'backbone_conv': np.random.normal(0, 0.02, (128, 64, 3, 3)), - 'classifier_fc': np.random.normal(0, 0.01, (10, 2048)), + 'classifier_linear': np.random.normal(0, 0.01, (10, 2048)), } compressor = ModelCompressor() - compressed = compressor.compress_model(demo_model, {'backbone_conv': 0.7, 'classifier_fc': 0.8}) + compressed = compressor.compress_model(demo_model, {'backbone_conv': 0.7, 'classifier_linear': 0.8}) original_params = sum(w.size for w in demo_model.values()) compressed_params = sum(np.sum(info['weights'] != 0) for info in compressed.values()) @@ -1773,7 +1773,7 @@ b) The structured vs unstructured tradeoff: - Inference speed: structured pruning provides actual speedup, unstructured often theoretical only c) Layer-specific sparsity tolerance: -- Dense layers: High redundancy, many parameters, more overparametrized โ†’ tolerate 80% sparsity +- Linear layers: High redundancy, many parameters, more overparametrized โ†’ tolerate 80% sparsity - Conv layers: Fewer parameters, each filter captures important spatial features โ†’ more sensitive - First layers: Extract low-level features (edges, textures) โ†’ very sensitive to pruning - Later layers: More abstract features with redundancy โ†’ can handle moderate pruning diff --git a/modules/21_mlops/mlops_dev.py b/modules/21_mlops/mlops_dev.py index ace87e63..8a53916d 100644 --- a/modules/21_mlops/mlops_dev.py +++ b/modules/21_mlops/mlops_dev.py @@ -67,7 +67,7 @@ from collections import defaultdict try: from tinytorch.core.tensor import Tensor from tinytorch.core.training import Trainer - from tinytorch.core.layers import Dense + from tinytorch.core.layers import Linear except ImportError: # For development, fallback gracefully print("โš ๏ธ Some TinyTorch modules not available - MLOps will use mock implementations")