diff --git a/modules/networks/networks_dev.py b/modules/networks/networks_dev.py index 8a485705..b23e38a9 100644 --- a/modules/networks/networks_dev.py +++ b/modules/networks/networks_dev.py @@ -30,13 +30,27 @@ Welcome to the Networks module! This is where we compose layers into complete ne This module builds on previous modules: - **tensor** → **activations** → **layers** → **networks** - Clean composition: math functions → building blocks → complete systems +""" -## Module → Package Structure -**🎓 Teaching vs. 🔧 Building**: -- **Learning side**: Work in `modules/networks/networks_dev.py` -- **Building side**: Exports to `tinytorch/core/networks.py` +# %% [markdown] +""" +## 📦 Where This Code Lives in the Final Package -This module teaches how to compose layers into complete neural network architectures. +**Learning Side:** You work in `modules/networks/networks_dev.py` +**Building Side:** Code exports to `tinytorch.core.networks` + +```python +# Final package structure: +from tinytorch.core.networks import Sequential, MLP +from tinytorch.core.layers import Dense, Conv2D +from tinytorch.core.activations import ReLU, Sigmoid, Tanh +from tinytorch.core.tensor import Tensor +``` + +**Why this matters:** +- **Learning:** Focused modules for deep understanding +- **Production:** Proper organization like PyTorch's `torch.nn` +- **Consistency:** All network architectures live together in `core.networks` """ # %% @@ -87,22 +101,46 @@ def _should_show_plots(): """ ## Step 1: What is a Network? -A **network** is a composition of layers that transforms input data into output predictions. Think of it as: +### Definition +A **network** is a composition of layers that transforms input data into output predictions. Think of it as a pipeline of transformations: ``` Input → Layer1 → Layer2 → Layer3 → Output ``` -**The fundamental insight**: Neural networks are just function composition! +### Why Networks Matter +- **Function composition**: Complex behavior from simple building blocks +- **Learnable parameters**: Each layer has weights that can be learned +- **Architecture design**: Different layouts solve different problems +- **Real-world applications**: Classification, regression, generation, etc. + +### The Fundamental Insight +**Neural networks are just function composition!** - Each layer is a function: `f_i(x)` - The network is: `f(x) = f_n(...f_2(f_1(x)))` - Complex behavior emerges from simple building blocks -**Why networks matter**: -- They solve real problems (classification, regression, etc.) -- Architecture determines what problems you can solve -- Understanding networks = understanding deep learning -- They're the foundation for all modern AI +### Real-World Examples +- **MLP (Multi-Layer Perceptron)**: Classic feedforward network +- **CNN (Convolutional Neural Network)**: For image processing +- **RNN (Recurrent Neural Network)**: For sequential data +- **Transformer**: For attention-based processing + +### Visual Intuition +``` +Input: [1, 2, 3] (3 features) +Layer1: [1.4, 2.8] (linear transformation) +Layer2: [1.4, 2.8] (nonlinearity) +Layer3: [0.7] (final prediction) +``` + +### The Math Behind It +For a network with layers `f_1, f_2, ..., f_n`: +``` +f(x) = f_n(f_{n-1}(...f_2(f_1(x)))) +``` + +Each layer transforms the data, and the final output is the composition of all these transformations. Let's start by building the most fundamental network: **Sequential**. """ @@ -120,6 +158,27 @@ class Sequential: layers: List of layers to compose TODO: Implement the Sequential network with forward pass. + + APPROACH: + 1. Store the list of layers as an instance variable + 2. Implement forward pass that applies each layer in sequence + 3. Make the network callable for easy use + + EXAMPLE: + network = Sequential([ + Dense(3, 4), + ReLU(), + Dense(4, 2), + Sigmoid() + ]) + x = Tensor([[1, 2, 3]]) + y = network(x) # Forward pass through all layers + + HINTS: + - Store layers in self.layers + - Use a for loop to apply each layer in order + - Each layer's output becomes the next layer's input + - Return the final output """ def __init__(self, layers: List): @@ -130,6 +189,14 @@ class Sequential: layers: List of layers to compose in order TODO: Store the layers and implement forward pass + + STEP-BY-STEP: + 1. Store the layers list as self.layers + 2. This creates the network architecture + + EXAMPLE: + Sequential([Dense(3,4), ReLU(), Dense(4,2)]) + creates a 3-layer network: Dense → ReLU → Dense """ raise NotImplementedError("Student implementation required") @@ -144,6 +211,25 @@ class Sequential: Output tensor after passing through all layers TODO: Implement sequential forward pass through all layers + + STEP-BY-STEP: + 1. Start with the input tensor: current = x + 2. Loop through each layer in self.layers + 3. Apply each layer: current = layer(current) + 4. Return the final output + + EXAMPLE: + Input: Tensor([[1, 2, 3]]) + Layer1 (Dense): Tensor([[1.4, 2.8]]) + Layer2 (ReLU): Tensor([[1.4, 2.8]]) + Layer3 (Dense): Tensor([[0.7]]) + Output: Tensor([[0.7]]) + + HINTS: + - Use a for loop: for layer in self.layers: + - Apply each layer: current = layer(current) + - The output of one layer becomes input to the next + - Return the final result """ raise NotImplementedError("Student implementation required") @@ -180,292 +266,80 @@ class Sequential: # %% [markdown] """ ### 🧪 Test Your Sequential Network - -Once you implement the Sequential network above, run this cell to test it: """ # %% # Test the Sequential network +print("Testing Sequential network...") + try: - print("=== Testing Sequential Network ===") - # Create a simple 2-layer network: 3 → 4 → 2 network = Sequential([ - Dense(3, 4), + Dense(input_size=3, output_size=4), ReLU(), - Dense(4, 2), + Dense(input_size=4, output_size=2), Sigmoid() ]) + print(f"✅ Network created with {len(network.layers)} layers") + # Test with sample data - x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) - print(f"Input shape: {x.shape}") - print(f"Input data: {x.data}") + x = Tensor([[1.0, 2.0, 3.0]]) + print(f"✅ Input: {x}") # Forward pass - output = network(x) - print(f"Output shape: {output.shape}") - print(f"Output data: {output.data}") + y = network(x) + print(f"✅ Output: {y}") + print(f"✅ Output shape: {y.shape}") - print("✅ Sequential network working!") + # Verify the network works + assert y.shape == (1, 2), f"❌ Expected shape (1, 2), got {y.shape}" + assert np.all(y.data >= 0) and np.all(y.data <= 1), "❌ Sigmoid output should be between 0 and 1" + print("🎉 Sequential network works!") except Exception as e: print(f"❌ Error: {e}") - print("Make sure to implement the Sequential network!") + print("Make sure to implement the Sequential network above!") # %% [markdown] """ -## Step 2: Network Visualization +## Step 2: Understanding Network Architecture -Now let's create powerful visualizations to understand what our networks look like and how they work! -""" +Now let's explore how different network architectures affect the network's capabilities. -# %% -#| export -def visualize_network_architecture(network: Sequential, title: str = "Network Architecture"): - """ - Create a visual representation of network architecture. - - Args: - network: Sequential network to visualize - title: Title for the plot - """ - if not _should_show_plots(): - print("📊 Plots disabled during testing - this is normal!") - return - - fig, ax = plt.subplots(1, 1, figsize=(12, 8)) - - # Network parameters - layer_count = len(network.layers) - layer_height = 0.8 - layer_spacing = 1.2 - - # Colors for different layer types - colors = { - 'Dense': '#4CAF50', # Green - 'ReLU': '#2196F3', # Blue - 'Sigmoid': '#FF9800', # Orange - 'Tanh': '#9C27B0', # Purple - 'default': '#757575' # Gray - } - - # Draw layers - for i, layer in enumerate(network.layers): - # Determine layer type and color - layer_type = type(layer).__name__ - color = colors.get(layer_type, colors['default']) - - # Layer position - x = i * layer_spacing - y = 0 - - # Create layer box - layer_box = FancyBboxPatch( - (x - 0.3, y - layer_height/2), - 0.6, layer_height, - boxstyle="round,pad=0.1", - facecolor=color, - edgecolor='black', - linewidth=2, - alpha=0.8 - ) - ax.add_patch(layer_box) - - # Add layer label - ax.text(x, y, layer_type, ha='center', va='center', - fontsize=10, fontweight='bold', color='white') - - # Add layer details - if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'): - details = f"{layer.input_size}→{layer.output_size}" - ax.text(x, y - 0.3, details, ha='center', va='center', - fontsize=8, color='white') - - # Draw connections to next layer - if i < layer_count - 1: - next_x = (i + 1) * layer_spacing - connection = ConnectionPatch( - (x + 0.3, y), (next_x - 0.3, y), - "data", "data", - arrowstyle="->", shrinkA=5, shrinkB=5, - mutation_scale=20, fc="black", lw=2 - ) - ax.add_patch(connection) - - # Formatting - ax.set_xlim(-0.5, (layer_count - 1) * layer_spacing + 0.5) - ax.set_ylim(-1, 1) - ax.set_aspect('equal') - ax.axis('off') - - # Add title - plt.title(title, fontsize=16, fontweight='bold', pad=20) - - # Add legend - legend_elements = [] - for layer_type, color in colors.items(): - if layer_type != 'default': - legend_elements.append(patches.Patch(color=color, label=layer_type)) - - ax.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(1, 1)) - - plt.tight_layout() - plt.show() +### What is Network Architecture? +**Architecture** refers to how layers are arranged and connected. It determines: +- **Capacity**: How complex patterns the network can learn +- **Efficiency**: How many parameters and computations needed +- **Specialization**: What types of problems it's good at -# %% -#| export -def visualize_data_flow(network: Sequential, input_data: Tensor, title: str = "Data Flow Through Network"): - """ - Visualize how data flows through the network. - - Args: - network: Sequential network - input_data: Input tensor - title: Title for the plot - """ - if not _should_show_plots(): - print("📊 Plots disabled during testing - this is normal!") - return - - # Get intermediate outputs - intermediate_outputs = [] - x = input_data - - for i, layer in enumerate(network.layers): - x = layer(x) - intermediate_outputs.append({ - 'layer': network.layers[i], - 'output': x, - 'layer_index': i - }) - - # Create visualization - fig, axes = plt.subplots(2, len(network.layers), figsize=(4*len(network.layers), 8)) - if len(network.layers) == 1: - axes = axes.reshape(1, -1) - - for i, (layer, output) in enumerate(zip(network.layers, intermediate_outputs)): - # Top row: Layer information - ax_top = axes[0, i] if len(network.layers) > 1 else axes[0] - - # Layer type and details - layer_type = type(layer).__name__ - ax_top.text(0.5, 0.8, layer_type, ha='center', va='center', - fontsize=12, fontweight='bold') - - if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'): - ax_top.text(0.5, 0.6, f"{layer.input_size} → {layer.output_size}", - ha='center', va='center', fontsize=10) - - # Output shape - ax_top.text(0.5, 0.4, f"Shape: {output['output'].shape}", - ha='center', va='center', fontsize=9) - - # Output statistics - output_data = output['output'].data - ax_top.text(0.5, 0.2, f"Mean: {np.mean(output_data):.3f}", - ha='center', va='center', fontsize=9) - ax_top.text(0.5, 0.1, f"Std: {np.std(output_data):.3f}", - ha='center', va='center', fontsize=9) - - ax_top.set_xlim(0, 1) - ax_top.set_ylim(0, 1) - ax_top.axis('off') - - # Bottom row: Output visualization - ax_bottom = axes[1, i] if len(network.layers) > 1 else axes[1] - - # Show output as heatmap or histogram - output_data = output['output'].data.flatten() - - if len(output_data) <= 20: # Small output - show as bars - ax_bottom.bar(range(len(output_data)), output_data, alpha=0.7) - ax_bottom.set_title(f"Layer {i+1} Output") - ax_bottom.set_xlabel("Output Index") - ax_bottom.set_ylabel("Value") - else: # Large output - show histogram - ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black') - ax_bottom.set_title(f"Layer {i+1} Output Distribution") - ax_bottom.set_xlabel("Value") - ax_bottom.set_ylabel("Frequency") - - ax_bottom.grid(True, alpha=0.3) - - plt.suptitle(title, fontsize=14, fontweight='bold') - plt.tight_layout() - plt.show() +### Common Architectures -# %% -#| export -def compare_networks(networks: List[Sequential], network_names: List[str], - input_data: Tensor, title: str = "Network Comparison"): - """ - Compare different network architectures side-by-side. - - Args: - networks: List of networks to compare - network_names: Names for each network - input_data: Input tensor to test with - title: Title for the plot - """ - if not _should_show_plots(): - print("📊 Plots disabled during testing - this is normal!") - return - - fig, axes = plt.subplots(2, len(networks), figsize=(6*len(networks), 10)) - if len(networks) == 1: - axes = axes.reshape(2, -1) - - for i, (network, name) in enumerate(zip(networks, network_names)): - # Get network output - output = network(input_data) - - # Top row: Architecture visualization - ax_top = axes[0, i] if len(networks) > 1 else axes[0] - - # Count layer types - layer_types = {} - for layer in network.layers: - layer_type = type(layer).__name__ - layer_types[layer_type] = layer_types.get(layer_type, 0) + 1 - - # Create pie chart of layer types - if layer_types: - labels = list(layer_types.keys()) - sizes = list(layer_types.values()) - colors = plt.cm.Set3(np.linspace(0, 1, len(labels))) - - ax_top.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors) - ax_top.set_title(f"{name}\nLayer Distribution") - - # Bottom row: Output comparison - ax_bottom = axes[1, i] if len(networks) > 1 else axes[1] - - output_data = output.data.flatten() - - # Show output statistics - ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black') - ax_bottom.axvline(np.mean(output_data), color='red', linestyle='--', - label=f'Mean: {np.mean(output_data):.3f}') - ax_bottom.axvline(np.median(output_data), color='green', linestyle='--', - label=f'Median: {np.median(output_data):.3f}') - - ax_bottom.set_title(f"{name} Output Distribution") - ax_bottom.set_xlabel("Output Value") - ax_bottom.set_ylabel("Frequency") - ax_bottom.legend() - ax_bottom.grid(True, alpha=0.3) - - plt.suptitle(title, fontsize=16, fontweight='bold') - plt.tight_layout() - plt.show() +#### 1. **MLP (Multi-Layer Perceptron)** +``` +Input → Dense → ReLU → Dense → ReLU → Dense → Output +``` +- **Use case**: General-purpose learning +- **Strengths**: Universal approximation, simple to understand +- **Weaknesses**: Doesn't exploit spatial structure -# %% [markdown] -""" -## Step 3: Building Common Architectures +#### 2. **CNN (Convolutional Neural Network)** +``` +Input → Conv2D → ReLU → Conv2D → ReLU → Dense → Output +``` +- **Use case**: Image processing, spatial data +- **Strengths**: Parameter sharing, translation invariance +- **Weaknesses**: Fixed spatial structure -Now let's build some common neural network architectures and visualize them! +#### 3. **Deep Network** +``` +Input → Dense → ReLU → Dense → ReLU → Dense → ReLU → Dense → Output +``` +- **Use case**: Complex pattern recognition +- **Strengths**: High capacity, can learn complex functions +- **Weaknesses**: More parameters, harder to train + +Let's build some common architectures! """ # %% @@ -479,223 +353,449 @@ def create_mlp(input_size: int, hidden_sizes: List[int], output_size: int, input_size: Number of input features hidden_sizes: List of hidden layer sizes output_size: Number of output features - activation: Activation function for hidden layers - output_activation: Activation function for output layer + activation: Activation function for hidden layers (default: ReLU) + output_activation: Activation function for output layer (default: Sigmoid) Returns: - Sequential network + Sequential network with MLP architecture + + TODO: Implement MLP creation with alternating Dense and activation layers. + + APPROACH: + 1. Start with an empty list of layers + 2. Add the first Dense layer: input_size → first hidden size + 3. For each hidden layer: + - Add activation function + - Add Dense layer connecting to next hidden size + 4. Add final activation function + 5. Add final Dense layer: last hidden size → output_size + 6. Add output activation function + 7. Return Sequential(layers) + + EXAMPLE: + create_mlp(3, [4, 2], 1) creates: + Dense(3→4) → ReLU → Dense(4→2) → ReLU → Dense(2→1) → Sigmoid + + HINTS: + - Start with layers = [] + - Add Dense layers with appropriate input/output sizes + - Add activation functions between Dense layers + - Don't forget the final output activation """ + raise NotImplementedError("Student implementation required") + +# %% +#| hide +#| export +def create_mlp(input_size: int, hidden_sizes: List[int], output_size: int, + activation=ReLU, output_activation=Sigmoid) -> Sequential: + """Create a Multi-Layer Perceptron (MLP) network.""" layers = [] - # Input layer - if hidden_sizes: - layers.append(Dense(input_size, hidden_sizes[0])) + # Add first layer + current_size = input_size + for hidden_size in hidden_sizes: + layers.append(Dense(input_size=current_size, output_size=hidden_size)) layers.append(activation()) - - # Hidden layers - for i in range(len(hidden_sizes) - 1): - layers.append(Dense(hidden_sizes[i], hidden_sizes[i + 1])) - layers.append(activation()) - - # Output layer - layers.append(Dense(hidden_sizes[-1], output_size)) - else: - # Direct input to output - layers.append(Dense(input_size, output_size)) + current_size = hidden_size + # Add output layer + layers.append(Dense(input_size=current_size, output_size=output_size)) layers.append(output_activation()) return Sequential(layers) +# %% [markdown] +""" +### 🧪 Test Your MLP Creation +""" + # %% -# Test MLP creation and visualization +# Test MLP creation +print("Testing MLP creation...") + try: - print("=== Testing MLP Creation and Visualization ===") - # Create different MLP architectures - mlp_small = create_mlp(input_size=3, hidden_sizes=[4], output_size=2) - mlp_medium = create_mlp(input_size=10, hidden_sizes=[16, 8], output_size=3) - mlp_large = create_mlp(input_size=784, hidden_sizes=[128, 64, 32], output_size=10) + mlp1 = create_mlp(input_size=3, hidden_sizes=[4], output_size=1) + mlp2 = create_mlp(input_size=5, hidden_sizes=[8, 4], output_size=2) + mlp3 = create_mlp(input_size=2, hidden_sizes=[10, 6, 3], output_size=1, activation=Tanh) - print("Created MLP architectures:") - print(f" Small: 3 → 4 → 2") - print(f" Medium: 10 → 16 → 8 → 3") - print(f" Large: 784 → 128 → 64 → 32 → 10") + print(f"✅ MLP1: {len(mlp1.layers)} layers") + print(f"✅ MLP2: {len(mlp2.layers)} layers") + print(f"✅ MLP3: {len(mlp3.layers)} layers") - # Test with sample data - x = Tensor(np.random.randn(5, 3).astype(np.float32)) + # Test forward pass + x = Tensor([[1.0, 2.0, 3.0]]) + y1 = mlp1(x) + print(f"✅ MLP1 output: {y1}") - # Visualize architectures - visualize_network_architecture(mlp_small, "Small MLP Architecture") - visualize_network_architecture(mlp_medium, "Medium MLP Architecture") - visualize_network_architecture(mlp_large, "Large MLP Architecture") + x2 = Tensor([[1.0, 2.0, 3.0, 4.0, 5.0]]) + y2 = mlp2(x2) + print(f"✅ MLP2 output: {y2}") - # Visualize data flow - visualize_data_flow(mlp_small, x, "Data Flow Through Small MLP") - - # Compare networks - networks = [mlp_small, mlp_medium] - names = ["Small MLP", "Medium MLP"] - compare_networks(networks, names, x, "MLP Architecture Comparison") - - print("✅ MLP creation and visualization working!") + print("🎉 MLP creation works!") except Exception as e: print(f"❌ Error: {e}") - print("Make sure to implement the visualization functions!") + print("Make sure to implement create_mlp above!") # %% [markdown] """ -## Step 4: Understanding Network Behavior +## Step 3: Network Visualization and Analysis -Let's analyze how different network architectures behave with different types of input data. +Let's create tools to visualize and analyze network architectures. This helps us understand what our networks are doing. + +### Why Visualization Matters +- **Architecture understanding**: See how data flows through the network +- **Debugging**: Identify bottlenecks and issues +- **Design**: Compare different architectures +- **Communication**: Explain networks to others + +### What We'll Build +1. **Architecture visualization**: Show layer connections +2. **Data flow visualization**: See how data transforms +3. **Network comparison**: Compare different architectures +4. **Behavior analysis**: Understand network capabilities """ # %% #| export -def analyze_network_behavior(network: Sequential, input_data: Tensor, - title: str = "Network Behavior Analysis"): +def visualize_network_architecture(network: Sequential, title: str = "Network Architecture"): """ - Analyze how a network behaves with different types of input. + Visualize the architecture of a Sequential network. Args: - network: Network to analyze - input_data: Input tensor + network: Sequential network to visualize title: Title for the plot + + TODO: Create a visualization showing the network structure. + + APPROACH: + 1. Create a matplotlib figure + 2. For each layer, draw a box showing its type and size + 3. Connect the boxes with arrows showing data flow + 4. Add labels and formatting + + EXAMPLE: + Input → Dense(3→4) → ReLU → Dense(4→2) → Sigmoid → Output + + HINTS: + - Use plt.subplots() to create the figure + - Use plt.text() to add layer labels + - Use plt.arrow() to show connections + - Add proper spacing and formatting """ - if not _should_show_plots(): - print("📊 Plots disabled during testing - this is normal!") - return - - fig, axes = plt.subplots(2, 3, figsize=(15, 10)) - - # 1. Input vs Output relationship - ax1 = axes[0, 0] - input_flat = input_data.data.flatten() - output = network(input_data) - output_flat = output.data.flatten() - - ax1.scatter(input_flat, output_flat, alpha=0.6) - ax1.plot([input_flat.min(), input_flat.max()], - [input_flat.min(), input_flat.max()], 'r--', alpha=0.5, label='y=x') - ax1.set_xlabel('Input Values') - ax1.set_ylabel('Output Values') - ax1.set_title('Input vs Output') - ax1.legend() - ax1.grid(True, alpha=0.3) - - # 2. Output distribution - ax2 = axes[0, 1] - ax2.hist(output_flat, bins=20, alpha=0.7, edgecolor='black') - ax2.axvline(np.mean(output_flat), color='red', linestyle='--', - label=f'Mean: {np.mean(output_flat):.3f}') - ax2.set_xlabel('Output Values') - ax2.set_ylabel('Frequency') - ax2.set_title('Output Distribution') - ax2.legend() - ax2.grid(True, alpha=0.3) - - # 3. Layer-by-layer activation patterns - ax3 = axes[0, 2] - activations = [] - x = input_data - - for layer in network.layers: - x = layer(x) - if hasattr(layer, 'input_size'): # Dense layer - activations.append(np.mean(x.data)) - else: # Activation layer - activations.append(np.mean(x.data)) - - ax3.plot(range(len(activations)), activations, 'bo-', linewidth=2, markersize=8) - ax3.set_xlabel('Layer Index') - ax3.set_ylabel('Mean Activation') - ax3.set_title('Layer-by-Layer Activations') - ax3.grid(True, alpha=0.3) - - # 4. Network depth analysis - ax4 = axes[1, 0] - layer_types = [type(layer).__name__ for layer in network.layers] - layer_counts = {} - for layer_type in layer_types: - layer_counts[layer_type] = layer_counts.get(layer_type, 0) + 1 - - if layer_counts: - ax4.bar(layer_counts.keys(), layer_counts.values(), alpha=0.7) - ax4.set_xlabel('Layer Type') - ax4.set_ylabel('Count') - ax4.set_title('Layer Type Distribution') - ax4.grid(True, alpha=0.3) - - # 5. Shape transformation - ax5 = axes[1, 1] - shapes = [input_data.shape] - x = input_data - - for layer in network.layers: - x = layer(x) - shapes.append(x.shape) - - layer_indices = range(len(shapes)) - shape_sizes = [np.prod(shape) for shape in shapes] - - ax5.plot(layer_indices, shape_sizes, 'go-', linewidth=2, markersize=8) - ax5.set_xlabel('Layer Index') - ax5.set_ylabel('Tensor Size') - ax5.set_title('Shape Transformation') - ax5.grid(True, alpha=0.3) - - # 6. Network summary - ax6 = axes[1, 2] - ax6.axis('off') - - summary_text = f""" -Network Summary: -• Total Layers: {len(network.layers)} -• Input Shape: {input_data.shape} -• Output Shape: {output.shape} -• Parameters: {sum(np.prod(layer.weights.data.shape) if hasattr(layer, 'weights') else 0 for layer in network.layers)} -• Architecture: {' → '.join([type(layer).__name__ for layer in network.layers])} - """ - - ax6.text(0.05, 0.95, summary_text, transform=ax6.transAxes, - fontsize=10, verticalalignment='top', fontfamily='monospace') - - plt.suptitle(title, fontsize=16, fontweight='bold') - plt.tight_layout() - plt.show() + raise NotImplementedError("Student implementation required") # %% -# Test network behavior analysis -try: - print("=== Testing Network Behavior Analysis ===") +#| hide +#| export +def visualize_network_architecture(network: Sequential, title: str = "Network Architecture"): + """Visualize the architecture of a Sequential network.""" + if not _should_show_plots(): + print("📊 Visualization disabled during testing") + return - # Create a network for analysis - network = create_mlp(input_size=5, hidden_sizes=[8, 4], output_size=2) + fig, ax = plt.subplots(1, 1, figsize=(12, 6)) - # Test with different types of input - x_normal = Tensor(np.random.randn(10, 5).astype(np.float32)) - x_uniform = Tensor(np.random.uniform(-1, 1, (10, 5)).astype(np.float32)) - x_zeros = Tensor(np.zeros((10, 5)).astype(np.float32)) + # Calculate positions + num_layers = len(network.layers) + x_positions = np.linspace(0, 10, num_layers + 2) - print("Analyzing network behavior with different inputs...") + # Draw input + ax.text(x_positions[0], 0, 'Input', ha='center', va='center', + bbox=dict(boxstyle='round,pad=0.3', facecolor='lightblue')) - # Analyze behavior - analyze_network_behavior(network, x_normal, "Network Behavior: Normal Input") - analyze_network_behavior(network, x_uniform, "Network Behavior: Uniform Input") - analyze_network_behavior(network, x_zeros, "Network Behavior: Zero Input") + # Draw layers + for i, layer in enumerate(network.layers): + layer_name = type(layer).__name__ + ax.text(x_positions[i+1], 0, layer_name, ha='center', va='center', + bbox=dict(boxstyle='round,pad=0.3', facecolor='lightgreen')) + + # Draw arrow + ax.arrow(x_positions[i], 0, 0.8, 0, head_width=0.1, head_length=0.1, + fc='black', ec='black') - print("✅ Network behavior analysis working!") + # Draw output + ax.text(x_positions[-1], 0, 'Output', ha='center', va='center', + bbox=dict(boxstyle='round,pad=0.3', facecolor='lightcoral')) -except Exception as e: - print(f"❌ Error: {e}") - print("Make sure to implement the behavior analysis function!") + ax.set_xlim(-0.5, 10.5) + ax.set_ylim(-0.5, 0.5) + ax.set_title(title) + ax.axis('off') + plt.show() # %% [markdown] """ -## Step 5: Practical Applications +### 🧪 Test Network Visualization +""" -Let's see how our networks can be applied to real-world problems! +# %% +# Test network visualization +print("Testing network visualization...") + +try: + # Create a test network + test_network = Sequential([ + Dense(input_size=3, output_size=4), + ReLU(), + Dense(input_size=4, output_size=2), + Sigmoid() + ]) + + # Visualize the network + visualize_network_architecture(test_network, "Test Network Architecture") + print("✅ Network visualization created!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement visualize_network_architecture above!") + +# %% [markdown] +""" +## Step 4: Data Flow Analysis + +Let's create tools to analyze how data flows through the network. This helps us understand what each layer is doing. + +### Why Data Flow Analysis Matters +- **Debugging**: See where data gets corrupted +- **Optimization**: Identify bottlenecks +- **Understanding**: Learn what each layer learns +- **Design**: Choose appropriate layer sizes +""" + +# %% +#| export +def visualize_data_flow(network: Sequential, input_data: Tensor, title: str = "Data Flow Through Network"): + """ + Visualize how data flows through the network. + + Args: + network: Sequential network to analyze + input_data: Input tensor to trace through the network + title: Title for the plot + + TODO: Create a visualization showing how data transforms through each layer. + + APPROACH: + 1. Trace the input through each layer + 2. Record the output of each layer + 3. Create a visualization showing the transformations + 4. Add statistics (mean, std, range) for each layer + + EXAMPLE: + Input: [1, 2, 3] → Layer1: [1.4, 2.8] → Layer2: [1.4, 2.8] → Output: [0.7] + + HINTS: + - Use a for loop to apply each layer + - Store intermediate outputs + - Use plt.subplot() to create multiple subplots + - Show statistics for each layer output + """ + raise NotImplementedError("Student implementation required") + +# %% +#| hide +#| export +def visualize_data_flow(network: Sequential, input_data: Tensor, title: str = "Data Flow Through Network"): + """Visualize how data flows through the network.""" + if not _should_show_plots(): + print("📊 Visualization disabled during testing") + return + + # Trace data through network + current_data = input_data + layer_outputs = [current_data.data.flatten()] + layer_names = ['Input'] + + for layer in network.layers: + current_data = layer(current_data) + layer_outputs.append(current_data.data.flatten()) + layer_names.append(type(layer).__name__) + + # Create visualization + fig, axes = plt.subplots(2, len(layer_outputs), figsize=(15, 8)) + + for i, (output, name) in enumerate(zip(layer_outputs, layer_names)): + # Histogram + axes[0, i].hist(output, bins=20, alpha=0.7) + axes[0, i].set_title(f'{name}\nShape: {output.shape}') + axes[0, i].set_xlabel('Value') + axes[0, i].set_ylabel('Frequency') + + # Statistics + stats_text = f'Mean: {np.mean(output):.3f}\nStd: {np.std(output):.3f}\nRange: [{np.min(output):.3f}, {np.max(output):.3f}]' + axes[1, i].text(0.1, 0.5, stats_text, transform=axes[1, i].transAxes, + verticalalignment='center', fontsize=10) + axes[1, i].set_title(f'{name} Statistics') + axes[1, i].axis('off') + + plt.suptitle(title) + plt.tight_layout() + plt.show() + +# %% [markdown] +""" +### 🧪 Test Data Flow Visualization +""" + +# %% +# Test data flow visualization +print("Testing data flow visualization...") + +try: + # Create a test network + test_network = Sequential([ + Dense(input_size=3, output_size=4), + ReLU(), + Dense(input_size=4, output_size=2), + Sigmoid() + ]) + + # Test input + test_input = Tensor([[1.0, 2.0, 3.0]]) + + # Visualize data flow + visualize_data_flow(test_network, test_input, "Test Network Data Flow") + print("✅ Data flow visualization created!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement visualize_data_flow above!") + +# %% [markdown] +""" +## Step 5: Network Comparison and Analysis + +Let's create tools to compare different network architectures and understand their capabilities. + +### Why Network Comparison Matters +- **Architecture selection**: Choose the right network for your problem +- **Performance analysis**: Understand trade-offs between different designs +- **Design insights**: Learn what makes networks effective +- **Research**: Compare new architectures to baselines +""" + +# %% +#| export +def compare_networks(networks: List[Sequential], network_names: List[str], + input_data: Tensor, title: str = "Network Comparison"): + """ + Compare multiple networks on the same input. + + Args: + networks: List of Sequential networks to compare + network_names: Names for each network + input_data: Input tensor to test all networks + title: Title for the plot + + TODO: Create a comparison visualization showing how different networks process the same input. + + APPROACH: + 1. Run the same input through each network + 2. Collect the outputs and intermediate results + 3. Create a visualization comparing the results + 4. Show statistics and differences + + EXAMPLE: + Compare MLP vs Deep Network vs Wide Network on same input + + HINTS: + - Use a for loop to test each network + - Store outputs and any relevant statistics + - Use plt.subplot() to create comparison plots + - Show both outputs and intermediate layer results + """ + raise NotImplementedError("Student implementation required") + +# %% +#| hide +#| export +def compare_networks(networks: List[Sequential], network_names: List[str], + input_data: Tensor, title: str = "Network Comparison"): + """Compare multiple networks on the same input.""" + if not _should_show_plots(): + print("📊 Visualization disabled during testing") + return + + # Test all networks + outputs = [] + for network in networks: + output = network(input_data) + outputs.append(output.data.flatten()) + + # Create comparison plot + fig, axes = plt.subplots(2, len(networks), figsize=(15, 8)) + + for i, (output, name) in enumerate(zip(outputs, network_names)): + # Output distribution + axes[0, i].hist(output, bins=20, alpha=0.7) + axes[0, i].set_title(f'{name}\nOutput Distribution') + axes[0, i].set_xlabel('Value') + axes[0, i].set_ylabel('Frequency') + + # Statistics + stats_text = f'Mean: {np.mean(output):.3f}\nStd: {np.std(output):.3f}\nRange: [{np.min(output):.3f}, {np.max(output):.3f}]\nSize: {len(output)}' + axes[1, i].text(0.1, 0.5, stats_text, transform=axes[1, i].transAxes, + verticalalignment='center', fontsize=10) + axes[1, i].set_title(f'{name} Statistics') + axes[1, i].axis('off') + + plt.suptitle(title) + plt.tight_layout() + plt.show() + +# %% [markdown] +""" +### 🧪 Test Network Comparison +""" + +# %% +# Test network comparison +print("Testing network comparison...") + +try: + # Create different networks + network1 = create_mlp(input_size=3, hidden_sizes=[4], output_size=1) + network2 = create_mlp(input_size=3, hidden_sizes=[8, 4], output_size=1) + network3 = create_mlp(input_size=3, hidden_sizes=[2], output_size=1, activation=Tanh) + + networks = [network1, network2, network3] + names = ["Small MLP", "Deep MLP", "Tanh MLP"] + + # Test input + test_input = Tensor([[1.0, 2.0, 3.0]]) + + # Compare networks + compare_networks(networks, names, test_input, "Network Architecture Comparison") + print("✅ Network comparison created!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement compare_networks above!") + +# %% [markdown] +""" +## Step 6: Practical Network Architectures + +Now let's create some practical network architectures for common machine learning tasks. + +### Common Network Types + +#### 1. **Classification Networks** +- **Binary classification**: Output single probability +- **Multi-class classification**: Output probability distribution +- **Use cases**: Image classification, spam detection, sentiment analysis + +#### 2. **Regression Networks** +- **Single output**: Predict continuous value +- **Multiple outputs**: Predict multiple values +- **Use cases**: Price prediction, temperature forecasting, demand estimation + +#### 3. **Feature Extraction Networks** +- **Encoder networks**: Compress data into features +- **Use cases**: Dimensionality reduction, feature learning, representation learning """ # %% @@ -703,135 +803,311 @@ Let's see how our networks can be applied to real-world problems! def create_classification_network(input_size: int, num_classes: int, hidden_sizes: List[int] = None) -> Sequential: """ - Create a network for classification problems. + Create a network for classification tasks. Args: input_size: Number of input features num_classes: Number of output classes - hidden_sizes: List of hidden layer sizes (default: [input_size//2]) + hidden_sizes: List of hidden layer sizes (default: [input_size * 2]) Returns: Sequential network for classification - """ - if hidden_sizes is None: - hidden_sizes = [input_size // 2] + + TODO: Implement classification network creation. - return create_mlp( - input_size=input_size, - hidden_sizes=hidden_sizes, - output_size=num_classes, - activation=ReLU, - output_activation=Sigmoid - ) + APPROACH: + 1. Use default hidden sizes if none provided + 2. Create MLP with appropriate architecture + 3. Use Sigmoid for binary classification (num_classes=1) + 4. Use appropriate activation for multi-class + + EXAMPLE: + create_classification_network(10, 3) creates: + Dense(10→20) → ReLU → Dense(20→3) → Sigmoid + + HINTS: + - Use create_mlp() function + - Choose appropriate output activation based on num_classes + - For binary classification (num_classes=1), use Sigmoid + - For multi-class, you could use Sigmoid or no activation + """ + raise NotImplementedError("Student implementation required") + +# %% +#| hide +#| export +def create_classification_network(input_size: int, num_classes: int, + hidden_sizes: List[int] = None) -> Sequential: + """Create a network for classification tasks.""" + if hidden_sizes is None: + hidden_sizes = [input_size * 2] + + return create_mlp(input_size, hidden_sizes, num_classes, + activation=ReLU, output_activation=Sigmoid) # %% #| export def create_regression_network(input_size: int, output_size: int = 1, hidden_sizes: List[int] = None) -> Sequential: """ - Create a network for regression problems. + Create a network for regression tasks. Args: input_size: Number of input features output_size: Number of output values (default: 1) - hidden_sizes: List of hidden layer sizes (default: [input_size//2]) + hidden_sizes: List of hidden layer sizes (default: [input_size * 2]) Returns: Sequential network for regression - """ - if hidden_sizes is None: - hidden_sizes = [input_size // 2] + + TODO: Implement regression network creation. - return create_mlp( - input_size=input_size, - hidden_sizes=hidden_sizes, - output_size=output_size, - activation=ReLU, - output_activation=Tanh # No activation for regression - ) + APPROACH: + 1. Use default hidden sizes if none provided + 2. Create MLP with appropriate architecture + 3. Use no activation on output layer (linear output) + + EXAMPLE: + create_regression_network(5, 1) creates: + Dense(5→10) → ReLU → Dense(10→1) (no activation) + + HINTS: + - Use create_mlp() but with no output activation + - For regression, we want linear outputs (no activation) + - You can pass None or identity function as output_activation + """ + raise NotImplementedError("Student implementation required") # %% -# Test practical applications -try: - print("=== Testing Practical Applications ===") +#| hide +#| export +def create_regression_network(input_size: int, output_size: int = 1, + hidden_sizes: List[int] = None) -> Sequential: + """Create a network for regression tasks.""" + if hidden_sizes is None: + hidden_sizes = [input_size * 2] - # Create networks for different tasks - digit_classifier = create_classification_network( - input_size=784, # 28x28 image - num_classes=10, # 10 digits - hidden_sizes=[128, 64] - ) + # Create layers without output activation for regression + layers = [] + current_size = input_size - sentiment_analyzer = create_classification_network( - input_size=100, # 100-dimensional word embeddings - num_classes=2, # Positive/Negative - hidden_sizes=[32, 16] - ) + for hidden_size in hidden_sizes: + layers.append(Dense(input_size=current_size, output_size=hidden_size)) + layers.append(ReLU()) + current_size = hidden_size - house_price_predictor = create_regression_network( - input_size=13, # 13 house features - output_size=1, # 1 price prediction - hidden_sizes=[8, 4] - ) + # Add output layer without activation + layers.append(Dense(input_size=current_size, output_size=output_size)) - print("Created networks for different applications:") - print(f" Digit Classifier: 784 → 128 → 64 → 10") - print(f" Sentiment Analyzer: 100 → 32 → 16 → 2") - print(f" House Price Predictor: 13 → 8 → 4 → 1") - - # Test with sample data - digit_input = Tensor(np.random.randn(1, 784).astype(np.float32)) - sentiment_input = Tensor(np.random.randn(1, 100).astype(np.float32)) - house_input = Tensor(np.random.randn(1, 13).astype(np.float32)) - - # Get predictions - digit_pred = digit_classifier(digit_input) - sentiment_pred = sentiment_analyzer(sentiment_input) - house_pred = house_price_predictor(house_input) - - print(f"\nSample predictions:") - print(f" Digit classifier output: {digit_pred.data[0]}") - print(f" Sentiment analyzer output: {sentiment_pred.data[0]}") - print(f" House price predictor output: {house_pred.data[0]}") - - # Visualize architectures - visualize_network_architecture(digit_classifier, "Digit Classification Network") - visualize_network_architecture(sentiment_analyzer, "Sentiment Analysis Network") - visualize_network_architecture(house_price_predictor, "House Price Prediction Network") - - print("✅ Practical applications working!") - -except Exception as e: - print(f"❌ Error: {e}") - print("Make sure to implement the application functions!") + return Sequential(layers) # %% [markdown] """ -## 🎓 Module Summary +### 🧪 Test Practical Networks +""" -### What You Learned -1. **Network Composition**: Building complete networks from layers -2. **Architecture Design**: How to choose network structures -3. **Visualization**: Understanding networks through visual analysis -4. **Practical Applications**: Real-world network use cases +# %% +# Test practical networks +print("Testing practical networks...") -### Key Architectural Insights -- **Function Composition**: Networks as `f(x) = layer_n(...layer_1(x))` -- **Modular Design**: Clean separation between layers and networks -- **Visual Understanding**: How to analyze network behavior -- **Application Patterns**: Classification vs regression architectures +try: + # Test classification network + class_net = create_classification_network(input_size=5, num_classes=1) + x_class = Tensor([[1.0, 2.0, 3.0, 4.0, 5.0]]) + y_class = class_net(x_class) + print(f"✅ Classification output: {y_class}") + print(f"✅ Output range: [{np.min(y_class.data):.3f}, {np.max(y_class.data):.3f}]") + + # Test regression network + reg_net = create_regression_network(input_size=3, output_size=1) + x_reg = Tensor([[1.0, 2.0, 3.0]]) + y_reg = reg_net(x_reg) + print(f"✅ Regression output: {y_reg}") + print(f"✅ Output range: [{np.min(y_reg.data):.3f}, {np.max(y_reg.data):.3f}]") + + print("🎉 Practical networks work!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement the network creation functions above!") -### Network Design Principles -- **Depth vs Width**: Trade-offs in network architecture -- **Activation Functions**: How they affect network behavior -- **Shape Management**: Understanding tensor transformations -- **Practical Considerations**: Choosing architectures for specific tasks +# %% [markdown] +""" +## Step 7: Network Behavior Analysis -### Next Steps -- **Training**: Learn how networks learn from data (autograd, optimization) -- **Advanced Architectures**: CNNs, RNNs, Transformers -- **Real Data**: Working with actual datasets -- **Production**: Deploying networks in real applications +Let's create tools to analyze how networks behave with different inputs and understand their capabilities. -**Congratulations on mastering neural network architectures!** 🚀 -""" \ No newline at end of file +### Why Behavior Analysis Matters +- **Understanding**: Learn what patterns networks can learn +- **Debugging**: Identify when networks fail +- **Design**: Choose appropriate architectures +- **Validation**: Ensure networks work as expected +""" + +# %% +#| export +def analyze_network_behavior(network: Sequential, input_data: Tensor, + title: str = "Network Behavior Analysis"): + """ + Analyze how a network behaves with different inputs. + + Args: + network: Sequential network to analyze + input_data: Input tensor to test + title: Title for the plot + + TODO: Create an analysis showing network behavior and capabilities. + + APPROACH: + 1. Test the network with the given input + 2. Analyze the output characteristics + 3. Test with variations of the input + 4. Create visualizations showing behavior patterns + + EXAMPLE: + Test network with original input and noisy versions + Show how output changes with input variations + + HINTS: + - Test the original input + - Create variations (noise, scaling, etc.) + - Compare outputs across variations + - Show statistics and patterns + """ + raise NotImplementedError("Student implementation required") + +# %% +#| hide +#| export +def analyze_network_behavior(network: Sequential, input_data: Tensor, + title: str = "Network Behavior Analysis"): + """Analyze how a network behaves with different inputs.""" + if not _should_show_plots(): + print("📊 Visualization disabled during testing") + return + + # Test original input + original_output = network(input_data) + + # Create variations + noise_levels = [0.0, 0.1, 0.2, 0.5] + outputs = [] + + for noise in noise_levels: + noisy_input = Tensor(input_data.data + noise * np.random.randn(*input_data.data.shape)) + output = network(noisy_input) + outputs.append(output.data.flatten()) + + # Create analysis plot + fig, axes = plt.subplots(2, 2, figsize=(12, 10)) + + # Original output + axes[0, 0].hist(outputs[0], bins=20, alpha=0.7) + axes[0, 0].set_title('Original Input Output') + axes[0, 0].set_xlabel('Value') + axes[0, 0].set_ylabel('Frequency') + + # Output stability + output_means = [np.mean(out) for out in outputs] + output_stds = [np.std(out) for out in outputs] + axes[0, 1].plot(noise_levels, output_means, 'bo-', label='Mean') + axes[0, 1].fill_between(noise_levels, + [m-s for m, s in zip(output_means, output_stds)], + [m+s for m, s in zip(output_means, output_stds)], + alpha=0.3, label='±1 Std') + axes[0, 1].set_xlabel('Noise Level') + axes[0, 1].set_ylabel('Output Value') + axes[0, 1].set_title('Output Stability') + axes[0, 1].legend() + + # Output distribution comparison + for i, (output, noise) in enumerate(zip(outputs, noise_levels)): + axes[1, 0].hist(output, bins=20, alpha=0.5, label=f'Noise={noise}') + axes[1, 0].set_xlabel('Output Value') + axes[1, 0].set_ylabel('Frequency') + axes[1, 0].set_title('Output Distribution Comparison') + axes[1, 0].legend() + + # Statistics + stats_text = f'Original Mean: {np.mean(outputs[0]):.3f}\nOriginal Std: {np.std(outputs[0]):.3f}\nOutput Range: [{np.min(outputs[0]):.3f}, {np.max(outputs[0]):.3f}]' + axes[1, 1].text(0.1, 0.5, stats_text, transform=axes[1, 1].transAxes, + verticalalignment='center', fontsize=10) + axes[1, 1].set_title('Network Statistics') + axes[1, 1].axis('off') + + plt.suptitle(title) + plt.tight_layout() + plt.show() + +# %% [markdown] +""" +### 🧪 Test Network Behavior Analysis +""" + +# %% +# Test network behavior analysis +print("Testing network behavior analysis...") + +try: + # Create a test network + test_network = create_classification_network(input_size=3, num_classes=1) + test_input = Tensor([[1.0, 2.0, 3.0]]) + + # Analyze behavior + analyze_network_behavior(test_network, test_input, "Test Network Behavior") + print("✅ Network behavior analysis created!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement analyze_network_behavior above!") + +# %% [markdown] +""" +## 🎯 Module Summary + +Congratulations! You've built the foundation of neural network architectures: + +### What You've Accomplished +✅ **Sequential Networks**: Composing layers into complete architectures +✅ **MLP Creation**: Building multi-layer perceptrons +✅ **Network Visualization**: Understanding architecture and data flow +✅ **Network Comparison**: Analyzing different architectures +✅ **Practical Networks**: Classification and regression networks +✅ **Behavior Analysis**: Understanding network capabilities + +### Key Concepts You've Learned +- **Networks** are compositions of layers that transform data +- **Architecture design** determines network capabilities +- **Sequential networks** are the most fundamental building block +- **Different architectures** solve different problems +- **Visualization tools** help understand network behavior + +### What's Next +In the next modules, you'll build on this foundation: +- **Autograd**: Enable automatic differentiation for training +- **Training**: Learn parameters using gradients and optimizers +- **Loss Functions**: Define objectives for learning +- **Applications**: Solve real problems with neural networks + +### Real-World Connection +Your network architectures are now ready to: +- Compose layers into complete neural networks +- Create specialized architectures for different tasks +- Analyze and understand network behavior +- Integrate with the rest of the TinyTorch ecosystem + +**Ready for the next challenge?** Let's move on to automatic differentiation to enable training! +""" + +# %% +# Final verification +print("\n" + "="*50) +print("🎉 NETWORKS MODULE COMPLETE!") +print("="*50) +print("✅ Sequential network implementation") +print("✅ MLP creation and architecture design") +print("✅ Network visualization and analysis") +print("✅ Network comparison tools") +print("✅ Practical classification and regression networks") +print("✅ Network behavior analysis") +print("\n🚀 Ready to enable training with autograd in the next module!") \ No newline at end of file