From 00231d98ea52aff988faf70232db1163a2de14e2 Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Thu, 10 Jul 2025 23:16:12 -0400 Subject: [PATCH] feat: add Networks module with forward-pass and visualizations - Add modules/networks/networks_dev.py and networks_dev.ipynb (Jupytext/nbdev educational pattern) - Add comprehensive visualizations: architecture, data flow, layer analysis, network comparison - Add modules/networks/README.md with learning goals, usage, and visualization docs - Add modules/networks/tests/test_networks.py with thorough tests for composition, MLPs, and visualizations - Register 'networks' in CLI info and test commands - Update CLI info command to check layers/networks status - This module focuses on forward pass only (no training yet) --- modules/networks/README.md | 266 ++++++ modules/networks/networks_dev.ipynb | 1004 +++++++++++++++++++++++ modules/networks/networks_dev.py | 837 +++++++++++++++++++ modules/networks/tests/test_networks.py | 420 ++++++++++ tito/commands/info.py | 28 + tito/commands/test.py | 2 +- 6 files changed, 2556 insertions(+), 1 deletion(-) create mode 100644 modules/networks/README.md create mode 100644 modules/networks/networks_dev.ipynb create mode 100644 modules/networks/networks_dev.py create mode 100644 modules/networks/tests/test_networks.py diff --git a/modules/networks/README.md b/modules/networks/README.md new file mode 100644 index 00000000..af175246 --- /dev/null +++ b/modules/networks/README.md @@ -0,0 +1,266 @@ +# ๐Ÿง  Module 3: Networks - Neural Network Architectures + +**Compose layers into complete neural network architectures with powerful visualizations** + +## ๐ŸŽฏ Learning Objectives + +After completing this module, you will: +- Understand networks as function composition: `f(x) = layer_n(...layer_2(layer_1(x)))` +- Build common architectures (MLP, CNN) from layers +- Visualize network structure and data flow +- See how architecture affects capability +- Master forward pass inference (no training yet!) + +## ๐Ÿง  Build โ†’ Use โ†’ Understand + +This module follows the TinyTorch pedagogical framework: + +1. **Build**: Compose layers into complete networks +2. **Use**: Create different architectures and run inference +3. **Understand**: How architecture design affects network behavior + +## ๐Ÿ“š What You'll Build + +### **Sequential Network** +```python +# Basic network composition +network = Sequential([ + Dense(784, 128), + ReLU(), + Dense(128, 64), + ReLU(), + Dense(64, 10), + Sigmoid() +]) + +# Forward pass +x = Tensor([[1.0, 2.0, 3.0, ...]]) # Input data +output = network(x) # Network prediction +``` + +### **MLP (Multi-Layer Perceptron)** +```python +# Create MLP for classification +mlp = create_mlp( + input_size=784, # 28x28 image + hidden_sizes=[128, 64], # Hidden layers + output_size=10, # 10 classes + activation=ReLU, + output_activation=Sigmoid +) +``` + +### **Specialized Networks** +```python +# Classification network +classifier = create_classification_network( + input_size=100, num_classes=2 +) + +# Regression network +regressor = create_regression_network( + input_size=13, output_size=1 +) +``` + +## ๐ŸŽจ Visualization Features + +This module includes powerful visualizations to help you understand: + +### **Network Architecture Visualization** +- **Layer-by-layer structure**: See how layers connect +- **Color-coded layers**: Different colors for Dense, ReLU, Sigmoid, etc. +- **Connection arrows**: Visualize data flow between layers +- **Layer details**: Input/output sizes and parameters + +### **Data Flow Visualization** +- **Shape transformations**: See how tensor shapes change through the network +- **Activation patterns**: Visualize intermediate layer outputs +- **Statistics tracking**: Mean, std, and distribution of activations +- **Layer analysis**: Understand what each layer learns + +### **Network Comparison** +- **Side-by-side analysis**: Compare different architectures +- **Performance metrics**: Output distributions and statistics +- **Architectural insights**: Layer type distributions and complexity + +### **Behavior Analysis** +- **Input-output relationships**: How inputs map to outputs +- **Activation patterns**: Layer-by-layer activation analysis +- **Network depth**: Understanding the role of depth vs width +- **Practical insights**: Real-world application considerations + +## ๐Ÿš€ Getting Started + +### Prerequisites +- Complete Module 1: Tensor โœ… +- Complete Module 2: Layers โœ… +- Understand basic function composition +- Familiar with matplotlib for visualizations + +### Quick Start +```bash +# Navigate to the networks module +cd modules/networks + +# Work in the development notebook +jupyter notebook networks_dev.ipynb + +# Or work in the Python file +code networks_dev.py +``` + +## ๐Ÿ“– Module Structure + +``` +modules/networks/ +โ”œโ”€โ”€ networks_dev.py # Main development file (work here!) +โ”œโ”€โ”€ networks_dev.ipynb # Jupyter notebook version +โ”œโ”€โ”€ tests/ +โ”‚ โ””โ”€โ”€ test_networks.py # Comprehensive tests +โ”œโ”€โ”€ README.md # This file +โ””โ”€โ”€ solutions/ # Reference implementations (if stuck) +``` + +## ๐ŸŽ“ Learning Path + +### Step 1: Sequential Network (Function Composition) +- Understand `f(x) = layer_n(...layer_1(x))` +- Implement basic network composition +- Test with simple examples + +### Step 2: Network Visualization +- Visualize network architectures +- Understand data flow through networks +- Compare different network designs + +### Step 3: Common Architectures +- Build MLPs for different tasks +- Create classification networks +- Design regression networks + +### Step 4: Behavior Analysis +- Analyze network behavior with different inputs +- Understand architectural trade-offs +- See how design affects capability + +### Step 5: Practical Applications +- Build networks for real problems +- Understand classification vs regression +- See how architecture matches task + +## ๐Ÿงช Testing Your Implementation + +### Module-Level Tests +```bash +# Run comprehensive tests +python -m pytest tests/test_networks.py -v + +# Quick test +python -c "from networks_dev import Sequential; print('โœ… Networks working!')" +``` + +### Package-Level Tests +```bash +# Export to package +python ../../bin/tito sync + +# Test integration +python ../../bin/tito test --module networks +``` + +## ๐ŸŽฏ Key Concepts + +### **Function Composition** +- Networks as `f(x) = g(h(x))` +- Each layer is a function +- Composition creates complex behavior + +### **Architecture Design** +- **Depth**: Number of layers +- **Width**: Number of neurons per layer +- **Activation**: Nonlinearity choices +- **Output**: Task-specific final layer + +### **Visualization Benefits** +- **Debugging**: See where things go wrong +- **Understanding**: Visualize complex transformations +- **Design**: Compare different architectures +- **Intuition**: Build mental models of networks + +### **Practical Considerations** +- **Input size**: Must match your data +- **Output size**: Must match your task +- **Hidden layers**: Balance complexity vs overfitting +- **Activation functions**: Choose based on task + +## ๐Ÿ” Common Issues + +### **Import Errors** +```python +# Make sure you're in the right directory +import sys +sys.path.append('../../') +from modules.layers.layers_dev import Dense +from modules.activations.activations_dev import ReLU, Sigmoid +``` + +### **Shape Mismatches** +```python +# Check layer sizes match +layer1 = Dense(3, 4) # 3 inputs, 4 outputs +layer2 = Dense(4, 2) # 4 inputs (matches layer1 output), 2 outputs +``` + +### **Visualization Issues** +```python +# Make sure matplotlib is installed +pip install matplotlib seaborn + +# Check if plots are disabled during testing +if _should_show_plots(): + # Your visualization code + pass +``` + +## ๐ŸŽ‰ Success Criteria + +You've successfully completed this module when: +- โœ… All tests pass (`pytest tests/test_networks.py`) +- โœ… You can build and visualize different network architectures +- โœ… You understand how architecture affects network behavior +- โœ… You can create networks for classification and regression tasks +- โœ… Package export works (`tito test --module networks`) + +## ๐Ÿš€ What's Next + +After completing this module, you're ready for: +- **Module 4: Training** - Learn how networks learn from data +- **Module 5: Data** - Work with real datasets +- **Module 6: Applications** - Solve real-world problems + +## ๐Ÿค Getting Help + +- Check the tests for examples of expected behavior +- Look at the solutions/ directory if you're stuck +- Review the pedagogical principles in `docs/pedagogy/` +- Remember: Build โ†’ Use โ†’ Understand! + +## ๐ŸŽจ Visualization Examples + +### Network Architecture +``` +Input โ†’ Dense(784,128) โ†’ ReLU โ†’ Dense(128,64) โ†’ ReLU โ†’ Dense(64,10) โ†’ Sigmoid โ†’ Output +``` + +### Data Flow +``` +(1,784) โ†’ (1,128) โ†’ (1,128) โ†’ (1,64) โ†’ (1,64) โ†’ (1,10) โ†’ (1,10) +``` + +### Layer Analysis +- **Dense layers**: Linear transformations +- **ReLU**: Introduces nonlinearity +- **Sigmoid**: Outputs probabilities + +**Build powerful neural networks with beautiful visualizations!** ๐Ÿš€ \ No newline at end of file diff --git a/modules/networks/networks_dev.ipynb b/modules/networks/networks_dev.ipynb new file mode 100644 index 00000000..a8f94222 --- /dev/null +++ b/modules/networks/networks_dev.ipynb @@ -0,0 +1,1004 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "jupyter:\n", + " jupytext:\n", + " text_representation:\n", + " extension: .py\n", + " format_name: percent\n", + " format_version: '1.3'\n", + " jupytext_version: 1.17.1\n", + "---\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "# Module 3: Networks - Neural Network Architectures\n", + "\n", + "Welcome to the Networks module! This is where we compose layers into complete neural network architectures.\n", + "\n", + "## Learning Goals\n", + "- Understand networks as function composition: `f(x) = layer_n(...layer_2(layer_1(x)))`\n", + "- Build common architectures (MLP, CNN) from layers\n", + "- Visualize network structure and data flow\n", + "- See how architecture affects capability\n", + "- Master forward pass inference (no training yet!)\n", + "\n", + "## Build \u2192 Use \u2192 Understand\n", + "1. **Build**: Compose layers into complete networks\n", + "2. **Use**: Create different architectures and run inference\n", + "3. **Understand**: How architecture design affects network behavior\n", + "\n", + "## Module Dependencies\n", + "This module builds on previous modules:\n", + "- **tensor** \u2192 **activations** \u2192 **layers** \u2192 **networks**\n", + "- Clean composition: math functions \u2192 building blocks \u2192 complete systems\n", + "\n", + "## Module \u2192 Package Structure\n", + "**\ud83c\udf93 Teaching vs. \ud83d\udd27 Building**: \n", + "- **Learning side**: Work in `modules/networks/networks_dev.py` \n", + "- **Building side**: Exports to `tinytorch/core/networks.py`\n", + "\n", + "This module teaches how to compose layers into complete neural network architectures.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp core.networks\n", + "\n", + "# Setup and imports\n", + "import numpy as np\n", + "import sys\n", + "from typing import List, Union, Optional, Callable\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.patches as patches\n", + "from matplotlib.patches import FancyBboxPatch, ConnectionPatch\n", + "import seaborn as sns\n", + "\n", + "# Import our building blocks\n", + "from tinytorch.core.tensor import Tensor\n", + "from tinytorch.core.layers import Dense\n", + "from tinytorch.core.activations import ReLU, Sigmoid, Tanh\n", + "\n", + "print(\"\ud83d\udd25 TinyTorch Networks Module\")\n", + "print(f\"NumPy version: {np.__version__}\")\n", + "print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n", + "print(\"Ready to build neural network architectures!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import numpy as np\n", + "import sys\n", + "from typing import List, Union, Optional, Callable\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.patches as patches\n", + "from matplotlib.patches import FancyBboxPatch, ConnectionPatch\n", + "import seaborn as sns\n", + "\n", + "# Import our building blocks\n", + "from tinytorch.core.tensor import Tensor\n", + "from tinytorch.core.layers import Dense\n", + "from tinytorch.core.activations import ReLU, Sigmoid, Tanh" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "def _should_show_plots():\n", + " \"\"\"Check if we should show plots (disable during testing)\"\"\"\n", + " return 'pytest' not in sys.modules and 'test' not in sys.argv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 1: What is a Network?\n", + "\n", + "A **network** is a composition of layers that transforms input data into output predictions. Think of it as:\n", + "\n", + "```\n", + "Input \u2192 Layer1 \u2192 Layer2 \u2192 Layer3 \u2192 Output\n", + "```\n", + "\n", + "**The fundamental insight**: Neural networks are just function composition!\n", + "- Each layer is a function: `f_i(x)`\n", + "- The network is: `f(x) = f_n(...f_2(f_1(x)))`\n", + "- Complex behavior emerges from simple building blocks\n", + "\n", + "**Why networks matter**:\n", + "- They solve real problems (classification, regression, etc.)\n", + "- Architecture determines what problems you can solve\n", + "- Understanding networks = understanding deep learning\n", + "- They're the foundation for all modern AI\n", + "\n", + "Let's start by building the most fundamental network: **Sequential**.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class Sequential:\n", + " \"\"\"\n", + " Sequential Network: Composes layers in sequence\n", + " \n", + " The most fundamental network architecture.\n", + " Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))\n", + " \n", + " Args:\n", + " layers: List of layers to compose\n", + " \n", + " TODO: Implement the Sequential network with forward pass.\n", + " \"\"\"\n", + " \n", + " def __init__(self, layers: List):\n", + " \"\"\"\n", + " Initialize Sequential network with layers.\n", + " \n", + " Args:\n", + " layers: List of layers to compose in order\n", + " \n", + " TODO: Store the layers and implement forward pass\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Forward pass through all layers in sequence.\n", + " \n", + " Args:\n", + " x: Input tensor\n", + " \n", + " Returns:\n", + " Output tensor after passing through all layers\n", + " \n", + " TODO: Implement sequential forward pass through all layers\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make network callable: network(x) same as network.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class Sequential:\n", + " \"\"\"\n", + " Sequential Network: Composes layers in sequence\n", + " \n", + " The most fundamental network architecture.\n", + " Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))\n", + " \"\"\"\n", + " \n", + " def __init__(self, layers: List):\n", + " \"\"\"Initialize Sequential network with layers.\"\"\"\n", + " self.layers = layers\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"Forward pass through all layers in sequence.\"\"\"\n", + " # Apply each layer in order\n", + " for layer in self.layers:\n", + " x = layer(x)\n", + " return x\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make network callable: network(x) same as network.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83e\uddea Test Your Sequential Network\n", + "\n", + "Once you implement the Sequential network above, run this cell to test it:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test the Sequential network\n", + "try:\n", + " print(\"=== Testing Sequential Network ===\")\n", + " \n", + " # Create a simple 2-layer network: 3 \u2192 4 \u2192 2\n", + " network = Sequential([\n", + " Dense(3, 4),\n", + " ReLU(),\n", + " Dense(4, 2),\n", + " Sigmoid()\n", + " ])\n", + " \n", + " # Test with sample data\n", + " x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])\n", + " print(f\"Input shape: {x.shape}\")\n", + " print(f\"Input data: {x.data}\")\n", + " \n", + " # Forward pass\n", + " output = network(x)\n", + " print(f\"Output shape: {output.shape}\")\n", + " print(f\"Output data: {output.data}\")\n", + " \n", + " print(\"\u2705 Sequential network working!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Make sure to implement the Sequential network!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 2: Network Visualization\n", + "\n", + "Now let's create powerful visualizations to understand what our networks look like and how they work!\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def visualize_network_architecture(network: Sequential, title: str = \"Network Architecture\"):\n", + " \"\"\"\n", + " Create a visual representation of network architecture.\n", + " \n", + " Args:\n", + " network: Sequential network to visualize\n", + " title: Title for the plot\n", + " \"\"\"\n", + " if not _should_show_plots():\n", + " print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n", + " return\n", + " \n", + " fig, ax = plt.subplots(1, 1, figsize=(12, 8))\n", + " \n", + " # Network parameters\n", + " layer_count = len(network.layers)\n", + " layer_height = 0.8\n", + " layer_spacing = 1.2\n", + " \n", + " # Colors for different layer types\n", + " colors = {\n", + " 'Dense': '#4CAF50', # Green\n", + " 'ReLU': '#2196F3', # Blue\n", + " 'Sigmoid': '#FF9800', # Orange\n", + " 'Tanh': '#9C27B0', # Purple\n", + " 'default': '#757575' # Gray\n", + " }\n", + " \n", + " # Draw layers\n", + " for i, layer in enumerate(network.layers):\n", + " # Determine layer type and color\n", + " layer_type = type(layer).__name__\n", + " color = colors.get(layer_type, colors['default'])\n", + " \n", + " # Layer position\n", + " x = i * layer_spacing\n", + " y = 0\n", + " \n", + " # Create layer box\n", + " layer_box = FancyBboxPatch(\n", + " (x - 0.3, y - layer_height/2),\n", + " 0.6, layer_height,\n", + " boxstyle=\"round,pad=0.1\",\n", + " facecolor=color,\n", + " edgecolor='black',\n", + " linewidth=2,\n", + " alpha=0.8\n", + " )\n", + " ax.add_patch(layer_box)\n", + " \n", + " # Add layer label\n", + " ax.text(x, y, layer_type, ha='center', va='center', \n", + " fontsize=10, fontweight='bold', color='white')\n", + " \n", + " # Add layer details\n", + " if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'):\n", + " details = f\"{layer.input_size}\u2192{layer.output_size}\"\n", + " ax.text(x, y - 0.3, details, ha='center', va='center',\n", + " fontsize=8, color='white')\n", + " \n", + " # Draw connections to next layer\n", + " if i < layer_count - 1:\n", + " next_x = (i + 1) * layer_spacing\n", + " connection = ConnectionPatch(\n", + " (x + 0.3, y), (next_x - 0.3, y),\n", + " \"data\", \"data\",\n", + " arrowstyle=\"->\", shrinkA=5, shrinkB=5,\n", + " mutation_scale=20, fc=\"black\", lw=2\n", + " )\n", + " ax.add_patch(connection)\n", + " \n", + " # Formatting\n", + " ax.set_xlim(-0.5, (layer_count - 1) * layer_spacing + 0.5)\n", + " ax.set_ylim(-1, 1)\n", + " ax.set_aspect('equal')\n", + " ax.axis('off')\n", + " \n", + " # Add title\n", + " plt.title(title, fontsize=16, fontweight='bold', pad=20)\n", + " \n", + " # Add legend\n", + " legend_elements = []\n", + " for layer_type, color in colors.items():\n", + " if layer_type != 'default':\n", + " legend_elements.append(patches.Patch(color=color, label=layer_type))\n", + " \n", + " ax.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(1, 1))\n", + " \n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def visualize_data_flow(network: Sequential, input_data: Tensor, title: str = \"Data Flow Through Network\"):\n", + " \"\"\"\n", + " Visualize how data flows through the network.\n", + " \n", + " Args:\n", + " network: Sequential network\n", + " input_data: Input tensor\n", + " title: Title for the plot\n", + " \"\"\"\n", + " if not _should_show_plots():\n", + " print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n", + " return\n", + " \n", + " # Get intermediate outputs\n", + " intermediate_outputs = []\n", + " x = input_data\n", + " \n", + " for i, layer in enumerate(network.layers):\n", + " x = layer(x)\n", + " intermediate_outputs.append({\n", + " 'layer': network.layers[i],\n", + " 'output': x,\n", + " 'layer_index': i\n", + " })\n", + " \n", + " # Create visualization\n", + " fig, axes = plt.subplots(2, len(network.layers), figsize=(4*len(network.layers), 8))\n", + " if len(network.layers) == 1:\n", + " axes = axes.reshape(1, -1)\n", + " \n", + " for i, (layer, output) in enumerate(zip(network.layers, intermediate_outputs)):\n", + " # Top row: Layer information\n", + " ax_top = axes[0, i] if len(network.layers) > 1 else axes[0]\n", + " \n", + " # Layer type and details\n", + " layer_type = type(layer).__name__\n", + " ax_top.text(0.5, 0.8, layer_type, ha='center', va='center',\n", + " fontsize=12, fontweight='bold')\n", + " \n", + " if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'):\n", + " ax_top.text(0.5, 0.6, f\"{layer.input_size} \u2192 {layer.output_size}\", \n", + " ha='center', va='center', fontsize=10)\n", + " \n", + " # Output shape\n", + " ax_top.text(0.5, 0.4, f\"Shape: {output['output'].shape}\", \n", + " ha='center', va='center', fontsize=9)\n", + " \n", + " # Output statistics\n", + " output_data = output['output'].data\n", + " ax_top.text(0.5, 0.2, f\"Mean: {np.mean(output_data):.3f}\", \n", + " ha='center', va='center', fontsize=9)\n", + " ax_top.text(0.5, 0.1, f\"Std: {np.std(output_data):.3f}\", \n", + " ha='center', va='center', fontsize=9)\n", + " \n", + " ax_top.set_xlim(0, 1)\n", + " ax_top.set_ylim(0, 1)\n", + " ax_top.axis('off')\n", + " \n", + " # Bottom row: Output visualization\n", + " ax_bottom = axes[1, i] if len(network.layers) > 1 else axes[1]\n", + " \n", + " # Show output as heatmap or histogram\n", + " output_data = output['output'].data.flatten()\n", + " \n", + " if len(output_data) <= 20: # Small output - show as bars\n", + " ax_bottom.bar(range(len(output_data)), output_data, alpha=0.7)\n", + " ax_bottom.set_title(f\"Layer {i+1} Output\")\n", + " ax_bottom.set_xlabel(\"Output Index\")\n", + " ax_bottom.set_ylabel(\"Value\")\n", + " else: # Large output - show histogram\n", + " ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black')\n", + " ax_bottom.set_title(f\"Layer {i+1} Output Distribution\")\n", + " ax_bottom.set_xlabel(\"Value\")\n", + " ax_bottom.set_ylabel(\"Frequency\")\n", + " \n", + " ax_bottom.grid(True, alpha=0.3)\n", + " \n", + " plt.suptitle(title, fontsize=14, fontweight='bold')\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def compare_networks(networks: List[Sequential], network_names: List[str], \n", + " input_data: Tensor, title: str = \"Network Comparison\"):\n", + " \"\"\"\n", + " Compare different network architectures side-by-side.\n", + " \n", + " Args:\n", + " networks: List of networks to compare\n", + " network_names: Names for each network\n", + " input_data: Input tensor to test with\n", + " title: Title for the plot\n", + " \"\"\"\n", + " if not _should_show_plots():\n", + " print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n", + " return\n", + " \n", + " fig, axes = plt.subplots(2, len(networks), figsize=(6*len(networks), 10))\n", + " if len(networks) == 1:\n", + " axes = axes.reshape(2, -1)\n", + " \n", + " for i, (network, name) in enumerate(zip(networks, network_names)):\n", + " # Get network output\n", + " output = network(input_data)\n", + " \n", + " # Top row: Architecture visualization\n", + " ax_top = axes[0, i] if len(networks) > 1 else axes[0]\n", + " \n", + " # Count layer types\n", + " layer_types = {}\n", + " for layer in network.layers:\n", + " layer_type = type(layer).__name__\n", + " layer_types[layer_type] = layer_types.get(layer_type, 0) + 1\n", + " \n", + " # Create pie chart of layer types\n", + " if layer_types:\n", + " labels = list(layer_types.keys())\n", + " sizes = list(layer_types.values())\n", + " colors = plt.cm.Set3(np.linspace(0, 1, len(labels)))\n", + " \n", + " ax_top.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors)\n", + " ax_top.set_title(f\"{name}\\nLayer Distribution\")\n", + " \n", + " # Bottom row: Output comparison\n", + " ax_bottom = axes[1, i] if len(networks) > 1 else axes[1]\n", + " \n", + " output_data = output.data.flatten()\n", + " \n", + " # Show output statistics\n", + " ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black')\n", + " ax_bottom.axvline(np.mean(output_data), color='red', linestyle='--', \n", + " label=f'Mean: {np.mean(output_data):.3f}')\n", + " ax_bottom.axvline(np.median(output_data), color='green', linestyle='--',\n", + " label=f'Median: {np.median(output_data):.3f}')\n", + " \n", + " ax_bottom.set_title(f\"{name} Output Distribution\")\n", + " ax_bottom.set_xlabel(\"Output Value\")\n", + " ax_bottom.set_ylabel(\"Frequency\")\n", + " ax_bottom.legend()\n", + " ax_bottom.grid(True, alpha=0.3)\n", + " \n", + " plt.suptitle(title, fontsize=16, fontweight='bold')\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 3: Building Common Architectures\n", + "\n", + "Now let's build some common neural network architectures and visualize them!\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def create_mlp(input_size: int, hidden_sizes: List[int], output_size: int, \n", + " activation=ReLU, output_activation=Sigmoid) -> Sequential:\n", + " \"\"\"\n", + " Create a Multi-Layer Perceptron (MLP) network.\n", + " \n", + " Args:\n", + " input_size: Number of input features\n", + " hidden_sizes: List of hidden layer sizes\n", + " output_size: Number of output features\n", + " activation: Activation function for hidden layers\n", + " output_activation: Activation function for output layer\n", + " \n", + " Returns:\n", + " Sequential network\n", + " \"\"\"\n", + " layers = []\n", + " \n", + " # Input layer\n", + " if hidden_sizes:\n", + " layers.append(Dense(input_size, hidden_sizes[0]))\n", + " layers.append(activation())\n", + " \n", + " # Hidden layers\n", + " for i in range(len(hidden_sizes) - 1):\n", + " layers.append(Dense(hidden_sizes[i], hidden_sizes[i + 1]))\n", + " layers.append(activation())\n", + " \n", + " # Output layer\n", + " layers.append(Dense(hidden_sizes[-1], output_size))\n", + " else:\n", + " # Direct input to output\n", + " layers.append(Dense(input_size, output_size))\n", + " \n", + " layers.append(output_activation())\n", + " \n", + " return Sequential(layers)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test MLP creation and visualization\n", + "try:\n", + " print(\"=== Testing MLP Creation and Visualization ===\")\n", + " \n", + " # Create different MLP architectures\n", + " mlp_small = create_mlp(input_size=3, hidden_sizes=[4], output_size=2)\n", + " mlp_medium = create_mlp(input_size=10, hidden_sizes=[16, 8], output_size=3)\n", + " mlp_large = create_mlp(input_size=784, hidden_sizes=[128, 64, 32], output_size=10)\n", + " \n", + " print(\"Created MLP architectures:\")\n", + " print(f\" Small: 3 \u2192 4 \u2192 2\")\n", + " print(f\" Medium: 10 \u2192 16 \u2192 8 \u2192 3\")\n", + " print(f\" Large: 784 \u2192 128 \u2192 64 \u2192 32 \u2192 10\")\n", + " \n", + " # Test with sample data\n", + " x = Tensor(np.random.randn(5, 3).astype(np.float32))\n", + " \n", + " # Visualize architectures\n", + " visualize_network_architecture(mlp_small, \"Small MLP Architecture\")\n", + " visualize_network_architecture(mlp_medium, \"Medium MLP Architecture\")\n", + " visualize_network_architecture(mlp_large, \"Large MLP Architecture\")\n", + " \n", + " # Visualize data flow\n", + " visualize_data_flow(mlp_small, x, \"Data Flow Through Small MLP\")\n", + " \n", + " # Compare networks\n", + " networks = [mlp_small, mlp_medium]\n", + " names = [\"Small MLP\", \"Medium MLP\"]\n", + " compare_networks(networks, names, x, \"MLP Architecture Comparison\")\n", + " \n", + " print(\"\u2705 MLP creation and visualization working!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Make sure to implement the visualization functions!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 4: Understanding Network Behavior\n", + "\n", + "Let's analyze how different network architectures behave with different types of input data.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def analyze_network_behavior(network: Sequential, input_data: Tensor, \n", + " title: str = \"Network Behavior Analysis\"):\n", + " \"\"\"\n", + " Analyze how a network behaves with different types of input.\n", + " \n", + " Args:\n", + " network: Network to analyze\n", + " input_data: Input tensor\n", + " title: Title for the plot\n", + " \"\"\"\n", + " if not _should_show_plots():\n", + " print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n", + " return\n", + " \n", + " fig, axes = plt.subplots(2, 3, figsize=(15, 10))\n", + " \n", + " # 1. Input vs Output relationship\n", + " ax1 = axes[0, 0]\n", + " input_flat = input_data.data.flatten()\n", + " output = network(input_data)\n", + " output_flat = output.data.flatten()\n", + " \n", + " ax1.scatter(input_flat, output_flat, alpha=0.6)\n", + " ax1.plot([input_flat.min(), input_flat.max()], \n", + " [input_flat.min(), input_flat.max()], 'r--', alpha=0.5, label='y=x')\n", + " ax1.set_xlabel('Input Values')\n", + " ax1.set_ylabel('Output Values')\n", + " ax1.set_title('Input vs Output')\n", + " ax1.legend()\n", + " ax1.grid(True, alpha=0.3)\n", + " \n", + " # 2. Output distribution\n", + " ax2 = axes[0, 1]\n", + " ax2.hist(output_flat, bins=20, alpha=0.7, edgecolor='black')\n", + " ax2.axvline(np.mean(output_flat), color='red', linestyle='--', \n", + " label=f'Mean: {np.mean(output_flat):.3f}')\n", + " ax2.set_xlabel('Output Values')\n", + " ax2.set_ylabel('Frequency')\n", + " ax2.set_title('Output Distribution')\n", + " ax2.legend()\n", + " ax2.grid(True, alpha=0.3)\n", + " \n", + " # 3. Layer-by-layer activation patterns\n", + " ax3 = axes[0, 2]\n", + " activations = []\n", + " x = input_data\n", + " \n", + " for layer in network.layers:\n", + " x = layer(x)\n", + " if hasattr(layer, 'input_size'): # Dense layer\n", + " activations.append(np.mean(x.data))\n", + " else: # Activation layer\n", + " activations.append(np.mean(x.data))\n", + " \n", + " ax3.plot(range(len(activations)), activations, 'bo-', linewidth=2, markersize=8)\n", + " ax3.set_xlabel('Layer Index')\n", + " ax3.set_ylabel('Mean Activation')\n", + " ax3.set_title('Layer-by-Layer Activations')\n", + " ax3.grid(True, alpha=0.3)\n", + " \n", + " # 4. Network depth analysis\n", + " ax4 = axes[1, 0]\n", + " layer_types = [type(layer).__name__ for layer in network.layers]\n", + " layer_counts = {}\n", + " for layer_type in layer_types:\n", + " layer_counts[layer_type] = layer_counts.get(layer_type, 0) + 1\n", + " \n", + " if layer_counts:\n", + " ax4.bar(layer_counts.keys(), layer_counts.values(), alpha=0.7)\n", + " ax4.set_xlabel('Layer Type')\n", + " ax4.set_ylabel('Count')\n", + " ax4.set_title('Layer Type Distribution')\n", + " ax4.grid(True, alpha=0.3)\n", + " \n", + " # 5. Shape transformation\n", + " ax5 = axes[1, 1]\n", + " shapes = [input_data.shape]\n", + " x = input_data\n", + " \n", + " for layer in network.layers:\n", + " x = layer(x)\n", + " shapes.append(x.shape)\n", + " \n", + " layer_indices = range(len(shapes))\n", + " shape_sizes = [np.prod(shape) for shape in shapes]\n", + " \n", + " ax5.plot(layer_indices, shape_sizes, 'go-', linewidth=2, markersize=8)\n", + " ax5.set_xlabel('Layer Index')\n", + " ax5.set_ylabel('Tensor Size')\n", + " ax5.set_title('Shape Transformation')\n", + " ax5.grid(True, alpha=0.3)\n", + " \n", + " # 6. Network summary\n", + " ax6 = axes[1, 2]\n", + " ax6.axis('off')\n", + " \n", + " summary_text = f\"\"\"\n", + "Network Summary:\n", + "\u2022 Total Layers: {len(network.layers)}\n", + "\u2022 Input Shape: {input_data.shape}\n", + "\u2022 Output Shape: {output.shape}\n", + "\u2022 Parameters: {sum(np.prod(layer.weights.data.shape) if hasattr(layer, 'weights') else 0 for layer in network.layers)}\n", + "\u2022 Architecture: {' \u2192 '.join([type(layer).__name__ for layer in network.layers])}\n", + " \"\"\"\n", + " \n", + " ax6.text(0.05, 0.95, summary_text, transform=ax6.transAxes, \n", + " fontsize=10, verticalalignment='top', fontfamily='monospace')\n", + " \n", + " plt.suptitle(title, fontsize=16, fontweight='bold')\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test network behavior analysis\n", + "try:\n", + " print(\"=== Testing Network Behavior Analysis ===\")\n", + " \n", + " # Create a network for analysis\n", + " network = create_mlp(input_size=5, hidden_sizes=[8, 4], output_size=2)\n", + " \n", + " # Test with different types of input\n", + " x_normal = Tensor(np.random.randn(10, 5).astype(np.float32))\n", + " x_uniform = Tensor(np.random.uniform(-1, 1, (10, 5)).astype(np.float32))\n", + " x_zeros = Tensor(np.zeros((10, 5)).astype(np.float32))\n", + " \n", + " print(\"Analyzing network behavior with different inputs...\")\n", + " \n", + " # Analyze behavior\n", + " analyze_network_behavior(network, x_normal, \"Network Behavior: Normal Input\")\n", + " analyze_network_behavior(network, x_uniform, \"Network Behavior: Uniform Input\")\n", + " analyze_network_behavior(network, x_zeros, \"Network Behavior: Zero Input\")\n", + " \n", + " print(\"\u2705 Network behavior analysis working!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Make sure to implement the behavior analysis function!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 5: Practical Applications\n", + "\n", + "Let's see how our networks can be applied to real-world problems!\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def create_classification_network(input_size: int, num_classes: int, \n", + " hidden_sizes: List[int] = None) -> Sequential:\n", + " \"\"\"\n", + " Create a network for classification problems.\n", + " \n", + " Args:\n", + " input_size: Number of input features\n", + " num_classes: Number of output classes\n", + " hidden_sizes: List of hidden layer sizes (default: [input_size//2])\n", + " \n", + " Returns:\n", + " Sequential network for classification\n", + " \"\"\"\n", + " if hidden_sizes is None:\n", + " hidden_sizes = [input_size // 2]\n", + " \n", + " return create_mlp(\n", + " input_size=input_size,\n", + " hidden_sizes=hidden_sizes,\n", + " output_size=num_classes,\n", + " activation=ReLU,\n", + " output_activation=Sigmoid\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def create_regression_network(input_size: int, output_size: int = 1,\n", + " hidden_sizes: List[int] = None) -> Sequential:\n", + " \"\"\"\n", + " Create a network for regression problems.\n", + " \n", + " Args:\n", + " input_size: Number of input features\n", + " output_size: Number of output values (default: 1)\n", + " hidden_sizes: List of hidden layer sizes (default: [input_size//2])\n", + " \n", + " Returns:\n", + " Sequential network for regression\n", + " \"\"\"\n", + " if hidden_sizes is None:\n", + " hidden_sizes = [input_size // 2]\n", + " \n", + " return create_mlp(\n", + " input_size=input_size,\n", + " hidden_sizes=hidden_sizes,\n", + " output_size=output_size,\n", + " activation=ReLU,\n", + " output_activation=Tanh # No activation for regression\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test practical applications\n", + "try:\n", + " print(\"=== Testing Practical Applications ===\")\n", + " \n", + " # Create networks for different tasks\n", + " digit_classifier = create_classification_network(\n", + " input_size=784, # 28x28 image\n", + " num_classes=10, # 10 digits\n", + " hidden_sizes=[128, 64]\n", + " )\n", + " \n", + " sentiment_analyzer = create_classification_network(\n", + " input_size=100, # 100-dimensional word embeddings\n", + " num_classes=2, # Positive/Negative\n", + " hidden_sizes=[32, 16]\n", + " )\n", + " \n", + " house_price_predictor = create_regression_network(\n", + " input_size=13, # 13 house features\n", + " output_size=1, # 1 price prediction\n", + " hidden_sizes=[8, 4]\n", + " )\n", + " \n", + " print(\"Created networks for different applications:\")\n", + " print(f\" Digit Classifier: 784 \u2192 128 \u2192 64 \u2192 10\")\n", + " print(f\" Sentiment Analyzer: 100 \u2192 32 \u2192 16 \u2192 2\")\n", + " print(f\" House Price Predictor: 13 \u2192 8 \u2192 4 \u2192 1\")\n", + " \n", + " # Test with sample data\n", + " digit_input = Tensor(np.random.randn(1, 784).astype(np.float32))\n", + " sentiment_input = Tensor(np.random.randn(1, 100).astype(np.float32))\n", + " house_input = Tensor(np.random.randn(1, 13).astype(np.float32))\n", + " \n", + " # Get predictions\n", + " digit_pred = digit_classifier(digit_input)\n", + " sentiment_pred = sentiment_analyzer(sentiment_input)\n", + " house_pred = house_price_predictor(house_input)\n", + " \n", + " print(f\"\\nSample predictions:\")\n", + " print(f\" Digit classifier output: {digit_pred.data[0]}\")\n", + " print(f\" Sentiment analyzer output: {sentiment_pred.data[0]}\")\n", + " print(f\" House price predictor output: {house_pred.data[0]}\")\n", + " \n", + " # Visualize architectures\n", + " visualize_network_architecture(digit_classifier, \"Digit Classification Network\")\n", + " visualize_network_architecture(sentiment_analyzer, \"Sentiment Analysis Network\")\n", + " visualize_network_architecture(house_price_predictor, \"House Price Prediction Network\")\n", + " \n", + " print(\"\u2705 Practical applications working!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Make sure to implement the application functions!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## \ud83c\udf93 Module Summary\n", + "\n", + "### What You Learned\n", + "1. **Network Composition**: Building complete networks from layers\n", + "2. **Architecture Design**: How to choose network structures\n", + "3. **Visualization**: Understanding networks through visual analysis\n", + "4. **Practical Applications**: Real-world network use cases\n", + "\n", + "### Key Architectural Insights\n", + "- **Function Composition**: Networks as `f(x) = layer_n(...layer_1(x))`\n", + "- **Modular Design**: Clean separation between layers and networks\n", + "- **Visual Understanding**: How to analyze network behavior\n", + "- **Application Patterns**: Classification vs regression architectures\n", + "\n", + "### Network Design Principles\n", + "- **Depth vs Width**: Trade-offs in network architecture\n", + "- **Activation Functions**: How they affect network behavior\n", + "- **Shape Management**: Understanding tensor transformations\n", + "- **Practical Considerations**: Choosing architectures for specific tasks\n", + "\n", + "### Next Steps\n", + "- **Training**: Learn how networks learn from data (autograd, optimization)\n", + "- **Advanced Architectures**: CNNs, RNNs, Transformers\n", + "- **Real Data**: Working with actual datasets\n", + "- **Production**: Deploying networks in real applications\n", + "\n", + "**Congratulations on mastering neural network architectures!** \ud83d\ude80\n", + "\"\"\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/modules/networks/networks_dev.py b/modules/networks/networks_dev.py new file mode 100644 index 00000000..8a485705 --- /dev/null +++ b/modules/networks/networks_dev.py @@ -0,0 +1,837 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.17.1 +# --- + +# %% [markdown] +""" +# Module 3: Networks - Neural Network Architectures + +Welcome to the Networks module! This is where we compose layers into complete neural network architectures. + +## Learning Goals +- Understand networks as function composition: `f(x) = layer_n(...layer_2(layer_1(x)))` +- Build common architectures (MLP, CNN) from layers +- Visualize network structure and data flow +- See how architecture affects capability +- Master forward pass inference (no training yet!) + +## Build โ†’ Use โ†’ Understand +1. **Build**: Compose layers into complete networks +2. **Use**: Create different architectures and run inference +3. **Understand**: How architecture design affects network behavior + +## Module Dependencies +This module builds on previous modules: +- **tensor** โ†’ **activations** โ†’ **layers** โ†’ **networks** +- Clean composition: math functions โ†’ building blocks โ†’ complete systems + +## Module โ†’ Package Structure +**๐ŸŽ“ Teaching vs. ๐Ÿ”ง Building**: +- **Learning side**: Work in `modules/networks/networks_dev.py` +- **Building side**: Exports to `tinytorch/core/networks.py` + +This module teaches how to compose layers into complete neural network architectures. +""" + +# %% +#| default_exp core.networks + +# Setup and imports +import numpy as np +import sys +from typing import List, Union, Optional, Callable +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from matplotlib.patches import FancyBboxPatch, ConnectionPatch +import seaborn as sns + +# Import our building blocks +from tinytorch.core.tensor import Tensor +from tinytorch.core.layers import Dense +from tinytorch.core.activations import ReLU, Sigmoid, Tanh + +print("๐Ÿ”ฅ TinyTorch Networks Module") +print(f"NumPy version: {np.__version__}") +print(f"Python version: {sys.version_info.major}.{sys.version_info.minor}") +print("Ready to build neural network architectures!") + +# %% +#| export +import numpy as np +import sys +from typing import List, Union, Optional, Callable +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from matplotlib.patches import FancyBboxPatch, ConnectionPatch +import seaborn as sns + +# Import our building blocks +from tinytorch.core.tensor import Tensor +from tinytorch.core.layers import Dense +from tinytorch.core.activations import ReLU, Sigmoid, Tanh + +# %% +#| hide +#| export +def _should_show_plots(): + """Check if we should show plots (disable during testing)""" + return 'pytest' not in sys.modules and 'test' not in sys.argv + +# %% [markdown] +""" +## Step 1: What is a Network? + +A **network** is a composition of layers that transforms input data into output predictions. Think of it as: + +``` +Input โ†’ Layer1 โ†’ Layer2 โ†’ Layer3 โ†’ Output +``` + +**The fundamental insight**: Neural networks are just function composition! +- Each layer is a function: `f_i(x)` +- The network is: `f(x) = f_n(...f_2(f_1(x)))` +- Complex behavior emerges from simple building blocks + +**Why networks matter**: +- They solve real problems (classification, regression, etc.) +- Architecture determines what problems you can solve +- Understanding networks = understanding deep learning +- They're the foundation for all modern AI + +Let's start by building the most fundamental network: **Sequential**. +""" + +# %% +#| export +class Sequential: + """ + Sequential Network: Composes layers in sequence + + The most fundamental network architecture. + Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x))) + + Args: + layers: List of layers to compose + + TODO: Implement the Sequential network with forward pass. + """ + + def __init__(self, layers: List): + """ + Initialize Sequential network with layers. + + Args: + layers: List of layers to compose in order + + TODO: Store the layers and implement forward pass + """ + raise NotImplementedError("Student implementation required") + + def forward(self, x: Tensor) -> Tensor: + """ + Forward pass through all layers in sequence. + + Args: + x: Input tensor + + Returns: + Output tensor after passing through all layers + + TODO: Implement sequential forward pass through all layers + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + """Make network callable: network(x) same as network.forward(x)""" + return self.forward(x) + +# %% +#| hide +#| export +class Sequential: + """ + Sequential Network: Composes layers in sequence + + The most fundamental network architecture. + Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x))) + """ + + def __init__(self, layers: List): + """Initialize Sequential network with layers.""" + self.layers = layers + + def forward(self, x: Tensor) -> Tensor: + """Forward pass through all layers in sequence.""" + # Apply each layer in order + for layer in self.layers: + x = layer(x) + return x + + def __call__(self, x: Tensor) -> Tensor: + """Make network callable: network(x) same as network.forward(x)""" + return self.forward(x) + +# %% [markdown] +""" +### ๐Ÿงช Test Your Sequential Network + +Once you implement the Sequential network above, run this cell to test it: +""" + +# %% +# Test the Sequential network +try: + print("=== Testing Sequential Network ===") + + # Create a simple 2-layer network: 3 โ†’ 4 โ†’ 2 + network = Sequential([ + Dense(3, 4), + ReLU(), + Dense(4, 2), + Sigmoid() + ]) + + # Test with sample data + x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + print(f"Input shape: {x.shape}") + print(f"Input data: {x.data}") + + # Forward pass + output = network(x) + print(f"Output shape: {output.shape}") + print(f"Output data: {output.data}") + + print("โœ… Sequential network working!") + +except Exception as e: + print(f"โŒ Error: {e}") + print("Make sure to implement the Sequential network!") + +# %% [markdown] +""" +## Step 2: Network Visualization + +Now let's create powerful visualizations to understand what our networks look like and how they work! +""" + +# %% +#| export +def visualize_network_architecture(network: Sequential, title: str = "Network Architecture"): + """ + Create a visual representation of network architecture. + + Args: + network: Sequential network to visualize + title: Title for the plot + """ + if not _should_show_plots(): + print("๐Ÿ“Š Plots disabled during testing - this is normal!") + return + + fig, ax = plt.subplots(1, 1, figsize=(12, 8)) + + # Network parameters + layer_count = len(network.layers) + layer_height = 0.8 + layer_spacing = 1.2 + + # Colors for different layer types + colors = { + 'Dense': '#4CAF50', # Green + 'ReLU': '#2196F3', # Blue + 'Sigmoid': '#FF9800', # Orange + 'Tanh': '#9C27B0', # Purple + 'default': '#757575' # Gray + } + + # Draw layers + for i, layer in enumerate(network.layers): + # Determine layer type and color + layer_type = type(layer).__name__ + color = colors.get(layer_type, colors['default']) + + # Layer position + x = i * layer_spacing + y = 0 + + # Create layer box + layer_box = FancyBboxPatch( + (x - 0.3, y - layer_height/2), + 0.6, layer_height, + boxstyle="round,pad=0.1", + facecolor=color, + edgecolor='black', + linewidth=2, + alpha=0.8 + ) + ax.add_patch(layer_box) + + # Add layer label + ax.text(x, y, layer_type, ha='center', va='center', + fontsize=10, fontweight='bold', color='white') + + # Add layer details + if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'): + details = f"{layer.input_size}โ†’{layer.output_size}" + ax.text(x, y - 0.3, details, ha='center', va='center', + fontsize=8, color='white') + + # Draw connections to next layer + if i < layer_count - 1: + next_x = (i + 1) * layer_spacing + connection = ConnectionPatch( + (x + 0.3, y), (next_x - 0.3, y), + "data", "data", + arrowstyle="->", shrinkA=5, shrinkB=5, + mutation_scale=20, fc="black", lw=2 + ) + ax.add_patch(connection) + + # Formatting + ax.set_xlim(-0.5, (layer_count - 1) * layer_spacing + 0.5) + ax.set_ylim(-1, 1) + ax.set_aspect('equal') + ax.axis('off') + + # Add title + plt.title(title, fontsize=16, fontweight='bold', pad=20) + + # Add legend + legend_elements = [] + for layer_type, color in colors.items(): + if layer_type != 'default': + legend_elements.append(patches.Patch(color=color, label=layer_type)) + + ax.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(1, 1)) + + plt.tight_layout() + plt.show() + +# %% +#| export +def visualize_data_flow(network: Sequential, input_data: Tensor, title: str = "Data Flow Through Network"): + """ + Visualize how data flows through the network. + + Args: + network: Sequential network + input_data: Input tensor + title: Title for the plot + """ + if not _should_show_plots(): + print("๐Ÿ“Š Plots disabled during testing - this is normal!") + return + + # Get intermediate outputs + intermediate_outputs = [] + x = input_data + + for i, layer in enumerate(network.layers): + x = layer(x) + intermediate_outputs.append({ + 'layer': network.layers[i], + 'output': x, + 'layer_index': i + }) + + # Create visualization + fig, axes = plt.subplots(2, len(network.layers), figsize=(4*len(network.layers), 8)) + if len(network.layers) == 1: + axes = axes.reshape(1, -1) + + for i, (layer, output) in enumerate(zip(network.layers, intermediate_outputs)): + # Top row: Layer information + ax_top = axes[0, i] if len(network.layers) > 1 else axes[0] + + # Layer type and details + layer_type = type(layer).__name__ + ax_top.text(0.5, 0.8, layer_type, ha='center', va='center', + fontsize=12, fontweight='bold') + + if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'): + ax_top.text(0.5, 0.6, f"{layer.input_size} โ†’ {layer.output_size}", + ha='center', va='center', fontsize=10) + + # Output shape + ax_top.text(0.5, 0.4, f"Shape: {output['output'].shape}", + ha='center', va='center', fontsize=9) + + # Output statistics + output_data = output['output'].data + ax_top.text(0.5, 0.2, f"Mean: {np.mean(output_data):.3f}", + ha='center', va='center', fontsize=9) + ax_top.text(0.5, 0.1, f"Std: {np.std(output_data):.3f}", + ha='center', va='center', fontsize=9) + + ax_top.set_xlim(0, 1) + ax_top.set_ylim(0, 1) + ax_top.axis('off') + + # Bottom row: Output visualization + ax_bottom = axes[1, i] if len(network.layers) > 1 else axes[1] + + # Show output as heatmap or histogram + output_data = output['output'].data.flatten() + + if len(output_data) <= 20: # Small output - show as bars + ax_bottom.bar(range(len(output_data)), output_data, alpha=0.7) + ax_bottom.set_title(f"Layer {i+1} Output") + ax_bottom.set_xlabel("Output Index") + ax_bottom.set_ylabel("Value") + else: # Large output - show histogram + ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black') + ax_bottom.set_title(f"Layer {i+1} Output Distribution") + ax_bottom.set_xlabel("Value") + ax_bottom.set_ylabel("Frequency") + + ax_bottom.grid(True, alpha=0.3) + + plt.suptitle(title, fontsize=14, fontweight='bold') + plt.tight_layout() + plt.show() + +# %% +#| export +def compare_networks(networks: List[Sequential], network_names: List[str], + input_data: Tensor, title: str = "Network Comparison"): + """ + Compare different network architectures side-by-side. + + Args: + networks: List of networks to compare + network_names: Names for each network + input_data: Input tensor to test with + title: Title for the plot + """ + if not _should_show_plots(): + print("๐Ÿ“Š Plots disabled during testing - this is normal!") + return + + fig, axes = plt.subplots(2, len(networks), figsize=(6*len(networks), 10)) + if len(networks) == 1: + axes = axes.reshape(2, -1) + + for i, (network, name) in enumerate(zip(networks, network_names)): + # Get network output + output = network(input_data) + + # Top row: Architecture visualization + ax_top = axes[0, i] if len(networks) > 1 else axes[0] + + # Count layer types + layer_types = {} + for layer in network.layers: + layer_type = type(layer).__name__ + layer_types[layer_type] = layer_types.get(layer_type, 0) + 1 + + # Create pie chart of layer types + if layer_types: + labels = list(layer_types.keys()) + sizes = list(layer_types.values()) + colors = plt.cm.Set3(np.linspace(0, 1, len(labels))) + + ax_top.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors) + ax_top.set_title(f"{name}\nLayer Distribution") + + # Bottom row: Output comparison + ax_bottom = axes[1, i] if len(networks) > 1 else axes[1] + + output_data = output.data.flatten() + + # Show output statistics + ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black') + ax_bottom.axvline(np.mean(output_data), color='red', linestyle='--', + label=f'Mean: {np.mean(output_data):.3f}') + ax_bottom.axvline(np.median(output_data), color='green', linestyle='--', + label=f'Median: {np.median(output_data):.3f}') + + ax_bottom.set_title(f"{name} Output Distribution") + ax_bottom.set_xlabel("Output Value") + ax_bottom.set_ylabel("Frequency") + ax_bottom.legend() + ax_bottom.grid(True, alpha=0.3) + + plt.suptitle(title, fontsize=16, fontweight='bold') + plt.tight_layout() + plt.show() + +# %% [markdown] +""" +## Step 3: Building Common Architectures + +Now let's build some common neural network architectures and visualize them! +""" + +# %% +#| export +def create_mlp(input_size: int, hidden_sizes: List[int], output_size: int, + activation=ReLU, output_activation=Sigmoid) -> Sequential: + """ + Create a Multi-Layer Perceptron (MLP) network. + + Args: + input_size: Number of input features + hidden_sizes: List of hidden layer sizes + output_size: Number of output features + activation: Activation function for hidden layers + output_activation: Activation function for output layer + + Returns: + Sequential network + """ + layers = [] + + # Input layer + if hidden_sizes: + layers.append(Dense(input_size, hidden_sizes[0])) + layers.append(activation()) + + # Hidden layers + for i in range(len(hidden_sizes) - 1): + layers.append(Dense(hidden_sizes[i], hidden_sizes[i + 1])) + layers.append(activation()) + + # Output layer + layers.append(Dense(hidden_sizes[-1], output_size)) + else: + # Direct input to output + layers.append(Dense(input_size, output_size)) + + layers.append(output_activation()) + + return Sequential(layers) + +# %% +# Test MLP creation and visualization +try: + print("=== Testing MLP Creation and Visualization ===") + + # Create different MLP architectures + mlp_small = create_mlp(input_size=3, hidden_sizes=[4], output_size=2) + mlp_medium = create_mlp(input_size=10, hidden_sizes=[16, 8], output_size=3) + mlp_large = create_mlp(input_size=784, hidden_sizes=[128, 64, 32], output_size=10) + + print("Created MLP architectures:") + print(f" Small: 3 โ†’ 4 โ†’ 2") + print(f" Medium: 10 โ†’ 16 โ†’ 8 โ†’ 3") + print(f" Large: 784 โ†’ 128 โ†’ 64 โ†’ 32 โ†’ 10") + + # Test with sample data + x = Tensor(np.random.randn(5, 3).astype(np.float32)) + + # Visualize architectures + visualize_network_architecture(mlp_small, "Small MLP Architecture") + visualize_network_architecture(mlp_medium, "Medium MLP Architecture") + visualize_network_architecture(mlp_large, "Large MLP Architecture") + + # Visualize data flow + visualize_data_flow(mlp_small, x, "Data Flow Through Small MLP") + + # Compare networks + networks = [mlp_small, mlp_medium] + names = ["Small MLP", "Medium MLP"] + compare_networks(networks, names, x, "MLP Architecture Comparison") + + print("โœ… MLP creation and visualization working!") + +except Exception as e: + print(f"โŒ Error: {e}") + print("Make sure to implement the visualization functions!") + +# %% [markdown] +""" +## Step 4: Understanding Network Behavior + +Let's analyze how different network architectures behave with different types of input data. +""" + +# %% +#| export +def analyze_network_behavior(network: Sequential, input_data: Tensor, + title: str = "Network Behavior Analysis"): + """ + Analyze how a network behaves with different types of input. + + Args: + network: Network to analyze + input_data: Input tensor + title: Title for the plot + """ + if not _should_show_plots(): + print("๐Ÿ“Š Plots disabled during testing - this is normal!") + return + + fig, axes = plt.subplots(2, 3, figsize=(15, 10)) + + # 1. Input vs Output relationship + ax1 = axes[0, 0] + input_flat = input_data.data.flatten() + output = network(input_data) + output_flat = output.data.flatten() + + ax1.scatter(input_flat, output_flat, alpha=0.6) + ax1.plot([input_flat.min(), input_flat.max()], + [input_flat.min(), input_flat.max()], 'r--', alpha=0.5, label='y=x') + ax1.set_xlabel('Input Values') + ax1.set_ylabel('Output Values') + ax1.set_title('Input vs Output') + ax1.legend() + ax1.grid(True, alpha=0.3) + + # 2. Output distribution + ax2 = axes[0, 1] + ax2.hist(output_flat, bins=20, alpha=0.7, edgecolor='black') + ax2.axvline(np.mean(output_flat), color='red', linestyle='--', + label=f'Mean: {np.mean(output_flat):.3f}') + ax2.set_xlabel('Output Values') + ax2.set_ylabel('Frequency') + ax2.set_title('Output Distribution') + ax2.legend() + ax2.grid(True, alpha=0.3) + + # 3. Layer-by-layer activation patterns + ax3 = axes[0, 2] + activations = [] + x = input_data + + for layer in network.layers: + x = layer(x) + if hasattr(layer, 'input_size'): # Dense layer + activations.append(np.mean(x.data)) + else: # Activation layer + activations.append(np.mean(x.data)) + + ax3.plot(range(len(activations)), activations, 'bo-', linewidth=2, markersize=8) + ax3.set_xlabel('Layer Index') + ax3.set_ylabel('Mean Activation') + ax3.set_title('Layer-by-Layer Activations') + ax3.grid(True, alpha=0.3) + + # 4. Network depth analysis + ax4 = axes[1, 0] + layer_types = [type(layer).__name__ for layer in network.layers] + layer_counts = {} + for layer_type in layer_types: + layer_counts[layer_type] = layer_counts.get(layer_type, 0) + 1 + + if layer_counts: + ax4.bar(layer_counts.keys(), layer_counts.values(), alpha=0.7) + ax4.set_xlabel('Layer Type') + ax4.set_ylabel('Count') + ax4.set_title('Layer Type Distribution') + ax4.grid(True, alpha=0.3) + + # 5. Shape transformation + ax5 = axes[1, 1] + shapes = [input_data.shape] + x = input_data + + for layer in network.layers: + x = layer(x) + shapes.append(x.shape) + + layer_indices = range(len(shapes)) + shape_sizes = [np.prod(shape) for shape in shapes] + + ax5.plot(layer_indices, shape_sizes, 'go-', linewidth=2, markersize=8) + ax5.set_xlabel('Layer Index') + ax5.set_ylabel('Tensor Size') + ax5.set_title('Shape Transformation') + ax5.grid(True, alpha=0.3) + + # 6. Network summary + ax6 = axes[1, 2] + ax6.axis('off') + + summary_text = f""" +Network Summary: +โ€ข Total Layers: {len(network.layers)} +โ€ข Input Shape: {input_data.shape} +โ€ข Output Shape: {output.shape} +โ€ข Parameters: {sum(np.prod(layer.weights.data.shape) if hasattr(layer, 'weights') else 0 for layer in network.layers)} +โ€ข Architecture: {' โ†’ '.join([type(layer).__name__ for layer in network.layers])} + """ + + ax6.text(0.05, 0.95, summary_text, transform=ax6.transAxes, + fontsize=10, verticalalignment='top', fontfamily='monospace') + + plt.suptitle(title, fontsize=16, fontweight='bold') + plt.tight_layout() + plt.show() + +# %% +# Test network behavior analysis +try: + print("=== Testing Network Behavior Analysis ===") + + # Create a network for analysis + network = create_mlp(input_size=5, hidden_sizes=[8, 4], output_size=2) + + # Test with different types of input + x_normal = Tensor(np.random.randn(10, 5).astype(np.float32)) + x_uniform = Tensor(np.random.uniform(-1, 1, (10, 5)).astype(np.float32)) + x_zeros = Tensor(np.zeros((10, 5)).astype(np.float32)) + + print("Analyzing network behavior with different inputs...") + + # Analyze behavior + analyze_network_behavior(network, x_normal, "Network Behavior: Normal Input") + analyze_network_behavior(network, x_uniform, "Network Behavior: Uniform Input") + analyze_network_behavior(network, x_zeros, "Network Behavior: Zero Input") + + print("โœ… Network behavior analysis working!") + +except Exception as e: + print(f"โŒ Error: {e}") + print("Make sure to implement the behavior analysis function!") + +# %% [markdown] +""" +## Step 5: Practical Applications + +Let's see how our networks can be applied to real-world problems! +""" + +# %% +#| export +def create_classification_network(input_size: int, num_classes: int, + hidden_sizes: List[int] = None) -> Sequential: + """ + Create a network for classification problems. + + Args: + input_size: Number of input features + num_classes: Number of output classes + hidden_sizes: List of hidden layer sizes (default: [input_size//2]) + + Returns: + Sequential network for classification + """ + if hidden_sizes is None: + hidden_sizes = [input_size // 2] + + return create_mlp( + input_size=input_size, + hidden_sizes=hidden_sizes, + output_size=num_classes, + activation=ReLU, + output_activation=Sigmoid + ) + +# %% +#| export +def create_regression_network(input_size: int, output_size: int = 1, + hidden_sizes: List[int] = None) -> Sequential: + """ + Create a network for regression problems. + + Args: + input_size: Number of input features + output_size: Number of output values (default: 1) + hidden_sizes: List of hidden layer sizes (default: [input_size//2]) + + Returns: + Sequential network for regression + """ + if hidden_sizes is None: + hidden_sizes = [input_size // 2] + + return create_mlp( + input_size=input_size, + hidden_sizes=hidden_sizes, + output_size=output_size, + activation=ReLU, + output_activation=Tanh # No activation for regression + ) + +# %% +# Test practical applications +try: + print("=== Testing Practical Applications ===") + + # Create networks for different tasks + digit_classifier = create_classification_network( + input_size=784, # 28x28 image + num_classes=10, # 10 digits + hidden_sizes=[128, 64] + ) + + sentiment_analyzer = create_classification_network( + input_size=100, # 100-dimensional word embeddings + num_classes=2, # Positive/Negative + hidden_sizes=[32, 16] + ) + + house_price_predictor = create_regression_network( + input_size=13, # 13 house features + output_size=1, # 1 price prediction + hidden_sizes=[8, 4] + ) + + print("Created networks for different applications:") + print(f" Digit Classifier: 784 โ†’ 128 โ†’ 64 โ†’ 10") + print(f" Sentiment Analyzer: 100 โ†’ 32 โ†’ 16 โ†’ 2") + print(f" House Price Predictor: 13 โ†’ 8 โ†’ 4 โ†’ 1") + + # Test with sample data + digit_input = Tensor(np.random.randn(1, 784).astype(np.float32)) + sentiment_input = Tensor(np.random.randn(1, 100).astype(np.float32)) + house_input = Tensor(np.random.randn(1, 13).astype(np.float32)) + + # Get predictions + digit_pred = digit_classifier(digit_input) + sentiment_pred = sentiment_analyzer(sentiment_input) + house_pred = house_price_predictor(house_input) + + print(f"\nSample predictions:") + print(f" Digit classifier output: {digit_pred.data[0]}") + print(f" Sentiment analyzer output: {sentiment_pred.data[0]}") + print(f" House price predictor output: {house_pred.data[0]}") + + # Visualize architectures + visualize_network_architecture(digit_classifier, "Digit Classification Network") + visualize_network_architecture(sentiment_analyzer, "Sentiment Analysis Network") + visualize_network_architecture(house_price_predictor, "House Price Prediction Network") + + print("โœ… Practical applications working!") + +except Exception as e: + print(f"โŒ Error: {e}") + print("Make sure to implement the application functions!") + +# %% [markdown] +""" +## ๐ŸŽ“ Module Summary + +### What You Learned +1. **Network Composition**: Building complete networks from layers +2. **Architecture Design**: How to choose network structures +3. **Visualization**: Understanding networks through visual analysis +4. **Practical Applications**: Real-world network use cases + +### Key Architectural Insights +- **Function Composition**: Networks as `f(x) = layer_n(...layer_1(x))` +- **Modular Design**: Clean separation between layers and networks +- **Visual Understanding**: How to analyze network behavior +- **Application Patterns**: Classification vs regression architectures + +### Network Design Principles +- **Depth vs Width**: Trade-offs in network architecture +- **Activation Functions**: How they affect network behavior +- **Shape Management**: Understanding tensor transformations +- **Practical Considerations**: Choosing architectures for specific tasks + +### Next Steps +- **Training**: Learn how networks learn from data (autograd, optimization) +- **Advanced Architectures**: CNNs, RNNs, Transformers +- **Real Data**: Working with actual datasets +- **Production**: Deploying networks in real applications + +**Congratulations on mastering neural network architectures!** ๐Ÿš€ +""" \ No newline at end of file diff --git a/modules/networks/tests/test_networks.py b/modules/networks/tests/test_networks.py new file mode 100644 index 00000000..6f2436bb --- /dev/null +++ b/modules/networks/tests/test_networks.py @@ -0,0 +1,420 @@ +""" +Tests for the Networks module. + +Tests network composition, visualization, and practical applications. +""" + +import pytest +import numpy as np +import sys +from pathlib import Path + +# Add the project root to the path +project_root = Path(__file__).parent.parent.parent.parent +sys.path.insert(0, str(project_root)) + +# Import the modules we're testing +from tinytorch.core.tensor import Tensor +from tinytorch.core.layers import Dense +from tinytorch.core.activations import ReLU, Sigmoid, Tanh + +# Import the networks module +try: + from modules.networks.networks_dev import ( + Sequential, + create_mlp, + create_classification_network, + create_regression_network, + visualize_network_architecture, + visualize_data_flow, + compare_networks, + analyze_network_behavior + ) +except ImportError: + # Fallback for when module isn't exported yet + sys.path.append(str(project_root / "modules" / "networks")) + from networks_dev import ( + Sequential, + create_mlp, + create_classification_network, + create_regression_network, + visualize_network_architecture, + visualize_data_flow, + compare_networks, + analyze_network_behavior + ) + + +class TestSequentialNetwork: + """Test the Sequential network class.""" + + def test_sequential_initialization(self): + """Test Sequential network initialization.""" + layers = [Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()] + network = Sequential(layers) + + assert len(network.layers) == 4 + assert isinstance(network.layers[0], Dense) + assert isinstance(network.layers[1], ReLU) + assert isinstance(network.layers[2], Dense) + assert isinstance(network.layers[3], Sigmoid) + + def test_sequential_forward_pass(self): + """Test Sequential network forward pass.""" + network = Sequential([ + Dense(3, 4), + ReLU(), + Dense(4, 2), + Sigmoid() + ]) + + x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + output = network(x) + + assert output.shape == (2, 2) + assert isinstance(output, Tensor) + # Sigmoid output should be between 0 and 1 + assert np.all(output.data >= 0) and np.all(output.data <= 1) + + def test_sequential_callable(self): + """Test that Sequential network is callable.""" + network = Sequential([Dense(2, 3), ReLU()]) + x = Tensor([[1.0, 2.0]]) + + # Test both forward() and __call__() + output1 = network.forward(x) + output2 = network(x) + + assert np.allclose(output1.data, output2.data) + + def test_empty_sequential(self): + """Test Sequential network with no layers.""" + network = Sequential([]) + x = Tensor([[1.0, 2.0, 3.0]]) + + # Should return input unchanged + output = network(x) + assert np.allclose(output.data, x.data) + + +class TestMLPCreation: + """Test MLP creation functions.""" + + def test_create_mlp_basic(self): + """Test basic MLP creation.""" + mlp = create_mlp(input_size=3, hidden_sizes=[4], output_size=2) + + assert len(mlp.layers) == 4 # Dense + ReLU + Dense + Sigmoid + assert isinstance(mlp.layers[0], Dense) + assert mlp.layers[0].input_size == 3 + assert mlp.layers[0].output_size == 4 + assert isinstance(mlp.layers[1], ReLU) + assert isinstance(mlp.layers[2], Dense) + assert mlp.layers[2].input_size == 4 + assert mlp.layers[2].output_size == 2 + assert isinstance(mlp.layers[3], Sigmoid) + + def test_create_mlp_multiple_hidden(self): + """Test MLP creation with multiple hidden layers.""" + mlp = create_mlp(input_size=10, hidden_sizes=[16, 8, 4], output_size=3) + + assert len(mlp.layers) == 8 # 3 Dense + 3 ReLU + 1 Dense + 1 Sigmoid + + # Check Dense layers + dense_layers = [layer for layer in mlp.layers if isinstance(layer, Dense)] + assert len(dense_layers) == 4 + + assert dense_layers[0].input_size == 10 + assert dense_layers[0].output_size == 16 + assert dense_layers[1].input_size == 16 + assert dense_layers[1].output_size == 8 + assert dense_layers[2].input_size == 8 + assert dense_layers[2].output_size == 4 + assert dense_layers[3].input_size == 4 + assert dense_layers[3].output_size == 3 + + def test_create_mlp_no_hidden(self): + """Test MLP creation with no hidden layers.""" + mlp = create_mlp(input_size=5, hidden_sizes=[], output_size=2) + + assert len(mlp.layers) == 2 # Dense + Sigmoid + assert isinstance(mlp.layers[0], Dense) + assert mlp.layers[0].input_size == 5 + assert mlp.layers[0].output_size == 2 + assert isinstance(mlp.layers[1], Sigmoid) + + def test_create_mlp_custom_activation(self): + """Test MLP creation with custom activation functions.""" + mlp = create_mlp( + input_size=3, + hidden_sizes=[4], + output_size=2, + activation=Tanh, + output_activation=Tanh + ) + + assert len(mlp.layers) == 4 + assert isinstance(mlp.layers[1], Tanh) # Hidden activation + assert isinstance(mlp.layers[3], Tanh) # Output activation + + +class TestSpecializedNetworks: + """Test specialized network creation functions.""" + + def test_create_classification_network(self): + """Test classification network creation.""" + classifier = create_classification_network( + input_size=100, + num_classes=5, + hidden_sizes=[32, 16] + ) + + assert len(classifier.layers) == 7 # 2 Dense + 2 ReLU + 1 Dense + 1 Sigmoid + + # Check output layer + dense_layers = [layer for layer in classifier.layers if isinstance(layer, Dense)] + assert dense_layers[-1].output_size == 5 + assert isinstance(classifier.layers[-1], Sigmoid) + + def test_create_classification_network_default(self): + """Test classification network with default hidden sizes.""" + classifier = create_classification_network(input_size=50, num_classes=3) + + # Should use default hidden size of input_size // 2 + expected_hidden = 50 // 2 + dense_layers = [layer for layer in classifier.layers if isinstance(layer, Dense)] + assert dense_layers[0].output_size == expected_hidden + assert dense_layers[1].output_size == 3 + + def test_create_regression_network(self): + """Test regression network creation.""" + regressor = create_regression_network( + input_size=13, + output_size=1, + hidden_sizes=[8, 4] + ) + + assert len(regressor.layers) == 7 # 2 Dense + 2 ReLU + 1 Dense + 1 Tanh + + # Check output layer + dense_layers = [layer for layer in regressor.layers if isinstance(layer, Dense)] + assert dense_layers[-1].output_size == 1 + assert isinstance(regressor.layers[-1], Tanh) + + def test_create_regression_network_default(self): + """Test regression network with default parameters.""" + regressor = create_regression_network(input_size=20) + + # Should use default output_size=1 and hidden_size=input_size//2 + expected_hidden = 20 // 2 + dense_layers = [layer for layer in regressor.layers if isinstance(layer, Dense)] + assert dense_layers[0].output_size == expected_hidden + assert dense_layers[1].output_size == 1 + + +class TestNetworkBehavior: + """Test network behavior and functionality.""" + + def test_network_shape_transformations(self): + """Test that networks properly transform tensor shapes.""" + network = Sequential([ + Dense(3, 4), + ReLU(), + Dense(4, 2), + Sigmoid() + ]) + + x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + output = network(x) + + assert x.shape == (2, 3) + assert output.shape == (2, 2) + + def test_network_activations(self): + """Test that activation functions are properly applied.""" + network = Sequential([ + Dense(2, 3), + ReLU(), + Dense(3, 1), + Sigmoid() + ]) + + x = Tensor([[-1.0, 1.0]]) + output = network(x) + + # ReLU should zero out negative values + # Sigmoid should output values between 0 and 1 + assert np.all(output.data >= 0) and np.all(output.data <= 1) + + def test_network_parameter_count(self): + """Test that networks have the expected number of parameters.""" + network = Sequential([ + Dense(3, 4), # 3*4 + 4 = 16 parameters + ReLU(), + Dense(4, 2), # 4*2 + 2 = 10 parameters + Sigmoid() + ]) + + # Count parameters (weights + biases) + total_params = 0 + for layer in network.layers: + if hasattr(layer, 'weights'): + total_params += layer.weights.data.size + if hasattr(layer, 'bias') and layer.bias is not None: + total_params += layer.bias.data.size + + assert total_params == 26 # 16 + 10 + + +class TestVisualizationFunctions: + """Test visualization functions (basic functionality, not visual output).""" + + def test_visualize_network_architecture_exists(self): + """Test that visualization function exists and is callable.""" + network = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()]) + + # Should not raise an error + try: + visualize_network_architecture(network, "Test Network") + except Exception as e: + pytest.fail(f"visualize_network_architecture raised {e}") + + def test_visualize_data_flow_exists(self): + """Test that data flow visualization function exists and is callable.""" + network = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()]) + x = Tensor([[1.0, 2.0, 3.0]]) + + # Should not raise an error + try: + visualize_data_flow(network, x, "Test Data Flow") + except Exception as e: + pytest.fail(f"visualize_data_flow raised {e}") + + def test_compare_networks_exists(self): + """Test that network comparison function exists and is callable.""" + network1 = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()]) + network2 = Sequential([Dense(3, 8), ReLU(), Dense(8, 2), Sigmoid()]) + x = Tensor([[1.0, 2.0, 3.0]]) + + # Should not raise an error + try: + compare_networks([network1, network2], ["Small", "Large"], x, "Test Comparison") + except Exception as e: + pytest.fail(f"compare_networks raised {e}") + + def test_analyze_network_behavior_exists(self): + """Test that behavior analysis function exists and is callable.""" + network = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()]) + x = Tensor([[1.0, 2.0, 3.0]]) + + # Should not raise an error + try: + analyze_network_behavior(network, x, "Test Behavior") + except Exception as e: + pytest.fail(f"analyze_network_behavior raised {e}") + + +class TestPracticalApplications: + """Test practical network applications.""" + + def test_digit_classification_network(self): + """Test creating a network for digit classification.""" + classifier = create_classification_network( + input_size=784, # 28x28 image + num_classes=10, # 10 digits + hidden_sizes=[128, 64] + ) + + # Test with fake image data + fake_image = Tensor(np.random.randn(1, 784).astype(np.float32)) + output = classifier(fake_image) + + assert output.shape == (1, 10) + assert np.all(output.data >= 0) and np.all(output.data <= 1) + # Should sum to approximately 1 (probability distribution) + assert np.abs(np.sum(output.data) - 1.0) < 0.1 + + def test_sentiment_analysis_network(self): + """Test creating a network for sentiment analysis.""" + classifier = create_classification_network( + input_size=100, # 100-dimensional embeddings + num_classes=2, # Positive/Negative + hidden_sizes=[32, 16] + ) + + # Test with fake text embeddings + fake_embeddings = Tensor(np.random.randn(1, 100).astype(np.float32)) + output = classifier(fake_embeddings) + + assert output.shape == (1, 2) + assert np.all(output.data >= 0) and np.all(output.data <= 1) + + def test_house_price_prediction_network(self): + """Test creating a network for house price prediction.""" + regressor = create_regression_network( + input_size=13, # 13 house features + output_size=1, # 1 price prediction + hidden_sizes=[8, 4] + ) + + # Test with fake house features + fake_features = Tensor(np.random.randn(1, 13).astype(np.float32)) + output = regressor(fake_features) + + assert output.shape == (1, 1) + # Tanh output should be between -1 and 1 + assert np.all(output.data >= -1) and np.all(output.data <= 1) + + +class TestNetworkIntegration: + """Test integration with other modules.""" + + def test_network_with_tensor_operations(self): + """Test that networks work with tensor operations.""" + network = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()]) + + # Create input using tensor operations + x1 = Tensor([[1.0, 2.0, 3.0]]) + x2 = Tensor([[4.0, 5.0, 6.0]]) + x_combined = Tensor(np.vstack([x1.data, x2.data])) + + output = network(x_combined) + assert output.shape == (2, 2) + + def test_network_with_activations_module(self): + """Test that networks properly use activations from the activations module.""" + # This test ensures we're using the activations from the activations module + # rather than re-implementing them + network = Sequential([ + Dense(2, 3), + ReLU(), # From activations module + Dense(3, 1), + Sigmoid() # From activations module + ]) + + x = Tensor([[-1.0, 1.0]]) + output = network(x) + + # Test that activations work correctly + assert np.all(output.data >= 0) and np.all(output.data <= 1) + + def test_network_with_layers_module(self): + """Test that networks properly use layers from the layers module.""" + # This test ensures we're using the Dense layers from the layers module + network = Sequential([ + Dense(3, 4), # From layers module + ReLU(), + Dense(4, 2), # From layers module + Sigmoid() + ]) + + x = Tensor([[1.0, 2.0, 3.0]]) + output = network(x) + + # Test that layers work correctly + assert output.shape == (1, 2) + + +if __name__ == "__main__": + # Run the tests + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/tito/commands/info.py b/tito/commands/info.py index b7462a19..21f25ee7 100644 --- a/tito/commands/info.py +++ b/tito/commands/info.py @@ -74,6 +74,8 @@ class InfoCommand(BaseCommand): modules = [ ("Setup", "hello_tinytorch function", self.check_setup_status), ("Tensor", "basic tensor operations", self.check_tensor_status), + ("Layers", "neural network building blocks", self.check_layers_status), + ("Networks", "neural network architectures", self.check_networks_status), ("MLP", "multi-layer perceptron (manual)", self.check_mlp_status), ("CNN", "convolutional networks (basic)", self.check_cnn_status), ("Data", "data loading pipeline", self.check_data_status), @@ -152,6 +154,32 @@ class InfoCommand(BaseCommand): return "โœ… Implemented" except (ImportError, NotImplementedError): return "โณ Not Started" + + def check_layers_status(self): + try: + from tinytorch.core.layers import Dense + from tinytorch.core.activations import ReLU + from tinytorch.core.tensor import Tensor + layer = Dense(3, 4) + activation = ReLU() + x = Tensor([[1, 2, 3]]) + _ = activation(layer(x)) + return "โœ… Implemented" + except (ImportError, NotImplementedError): + return "โณ Not Started" + + def check_networks_status(self): + try: + from tinytorch.core.networks import Sequential + from tinytorch.core.layers import Dense + from tinytorch.core.activations import ReLU, Sigmoid + from tinytorch.core.tensor import Tensor + network = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()]) + x = Tensor([[1, 2, 3]]) + _ = network(x) + return "โœ… Implemented" + except (ImportError, NotImplementedError): + return "โณ Not Started" def check_mlp_status(self): try: from tinytorch.core.modules import MLP diff --git a/tito/commands/test.py b/tito/commands/test.py index 74904a81..e33e803c 100644 --- a/tito/commands/test.py +++ b/tito/commands/test.py @@ -32,7 +32,7 @@ class TestCommand(BaseCommand): def run(self, args: Namespace) -> int: console = self.console - valid_modules = ["setup", "tensor", "activations", "layers", "cnn", "data", "training", + valid_modules = ["setup", "tensor", "activations", "layers", "networks", "cnn", "data", "training", "profiling", "compression", "kernels", "benchmarking", "mlops"] if args.all: