From 00231d98ea52aff988faf70232db1163a2de14e2 Mon Sep 17 00:00:00 2001
From: Vijay Janapa Reddi <vj@eecs.harvard.edu>
Date: Thu, 10 Jul 2025 23:16:12 -0400
Subject: [PATCH] feat: add Networks module with forward-pass and
 visualizations

- Add modules/networks/networks_dev.py and networks_dev.ipynb (Jupytext/nbdev educational pattern)
- Add comprehensive visualizations: architecture, data flow, layer analysis, network comparison
- Add modules/networks/README.md with learning goals, usage, and visualization docs
- Add modules/networks/tests/test_networks.py with thorough tests for composition, MLPs, and visualizations
- Register 'networks' in CLI info and test commands
- Update CLI info command to check layers/networks status
- This module focuses on forward pass only (no training yet)
---
 modules/networks/README.md              |  266 ++++++
 modules/networks/networks_dev.ipynb     | 1004 +++++++++++++++++++++++
 modules/networks/networks_dev.py        |  837 +++++++++++++++++++
 modules/networks/tests/test_networks.py |  420 ++++++++++
 tito/commands/info.py                   |   28 +
 tito/commands/test.py                   |    2 +-
 6 files changed, 2556 insertions(+), 1 deletion(-)
 create mode 100644 modules/networks/README.md
 create mode 100644 modules/networks/networks_dev.ipynb
 create mode 100644 modules/networks/networks_dev.py
 create mode 100644 modules/networks/tests/test_networks.py

diff --git a/modules/networks/README.md b/modules/networks/README.md
new file mode 100644
index 00000000..af175246
--- /dev/null
+++ b/modules/networks/README.md
@@ -0,0 +1,266 @@
+# 🧠 Module 3: Networks - Neural Network Architectures
+
+**Compose layers into complete neural network architectures with powerful visualizations**
+
+## 🎯 Learning Objectives
+
+After completing this module, you will:
+- Understand networks as function composition: `f(x) = layer_n(...layer_2(layer_1(x)))`
+- Build common architectures (MLP, CNN) from layers
+- Visualize network structure and data flow
+- See how architecture affects capability
+- Master forward pass inference (no training yet!)
+
+## 🧠 Build → Use → Understand
+
+This module follows the TinyTorch pedagogical framework:
+
+1. **Build**: Compose layers into complete networks
+2. **Use**: Create different architectures and run inference
+3. **Understand**: How architecture design affects network behavior
+
+## 📚 What You'll Build
+
+### **Sequential Network**
+```python
+# Basic network composition
+network = Sequential([
+    Dense(784, 128),
+    ReLU(),
+    Dense(128, 64),
+    ReLU(),
+    Dense(64, 10),
+    Sigmoid()
+])
+
+# Forward pass
+x = Tensor([[1.0, 2.0, 3.0, ...]])  # Input data
+output = network(x)  # Network prediction
+```
+
+### **MLP (Multi-Layer Perceptron)**
+```python
+# Create MLP for classification
+mlp = create_mlp(
+    input_size=784,      # 28x28 image
+    hidden_sizes=[128, 64],  # Hidden layers
+    output_size=10,      # 10 classes
+    activation=ReLU,
+    output_activation=Sigmoid
+)
+```
+
+### **Specialized Networks**
+```python
+# Classification network
+classifier = create_classification_network(
+    input_size=100, num_classes=2
+)
+
+# Regression network  
+regressor = create_regression_network(
+    input_size=13, output_size=1
+)
+```
+
+## 🎨 Visualization Features
+
+This module includes powerful visualizations to help you understand:
+
+### **Network Architecture Visualization**
+- **Layer-by-layer structure**: See how layers connect
+- **Color-coded layers**: Different colors for Dense, ReLU, Sigmoid, etc.
+- **Connection arrows**: Visualize data flow between layers
+- **Layer details**: Input/output sizes and parameters
+
+### **Data Flow Visualization**
+- **Shape transformations**: See how tensor shapes change through the network
+- **Activation patterns**: Visualize intermediate layer outputs
+- **Statistics tracking**: Mean, std, and distribution of activations
+- **Layer analysis**: Understand what each layer learns
+
+### **Network Comparison**
+- **Side-by-side analysis**: Compare different architectures
+- **Performance metrics**: Output distributions and statistics
+- **Architectural insights**: Layer type distributions and complexity
+
+### **Behavior Analysis**
+- **Input-output relationships**: How inputs map to outputs
+- **Activation patterns**: Layer-by-layer activation analysis
+- **Network depth**: Understanding the role of depth vs width
+- **Practical insights**: Real-world application considerations
+
+## 🚀 Getting Started
+
+### Prerequisites
+- Complete Module 1: Tensor ✅
+- Complete Module 2: Layers ✅
+- Understand basic function composition
+- Familiar with matplotlib for visualizations
+
+### Quick Start
+```bash
+# Navigate to the networks module
+cd modules/networks
+
+# Work in the development notebook
+jupyter notebook networks_dev.ipynb
+
+# Or work in the Python file
+code networks_dev.py
+```
+
+## 📖 Module Structure
+
+```
+modules/networks/
+├── networks_dev.py           # Main development file (work here!)
+├── networks_dev.ipynb        # Jupyter notebook version
+├── tests/
+│   └── test_networks.py      # Comprehensive tests
+├── README.md                # This file
+└── solutions/               # Reference implementations (if stuck)
+```
+
+## 🎓 Learning Path
+
+### Step 1: Sequential Network (Function Composition)
+- Understand `f(x) = layer_n(...layer_1(x))`
+- Implement basic network composition
+- Test with simple examples
+
+### Step 2: Network Visualization
+- Visualize network architectures
+- Understand data flow through networks
+- Compare different network designs
+
+### Step 3: Common Architectures
+- Build MLPs for different tasks
+- Create classification networks
+- Design regression networks
+
+### Step 4: Behavior Analysis
+- Analyze network behavior with different inputs
+- Understand architectural trade-offs
+- See how design affects capability
+
+### Step 5: Practical Applications
+- Build networks for real problems
+- Understand classification vs regression
+- See how architecture matches task
+
+## 🧪 Testing Your Implementation
+
+### Module-Level Tests
+```bash
+# Run comprehensive tests
+python -m pytest tests/test_networks.py -v
+
+# Quick test
+python -c "from networks_dev import Sequential; print('✅ Networks working!')"
+```
+
+### Package-Level Tests
+```bash
+# Export to package
+python ../../bin/tito sync
+
+# Test integration
+python ../../bin/tito test --module networks
+```
+
+## 🎯 Key Concepts
+
+### **Function Composition**
+- Networks as `f(x) = g(h(x))`
+- Each layer is a function
+- Composition creates complex behavior
+
+### **Architecture Design**
+- **Depth**: Number of layers
+- **Width**: Number of neurons per layer
+- **Activation**: Nonlinearity choices
+- **Output**: Task-specific final layer
+
+### **Visualization Benefits**
+- **Debugging**: See where things go wrong
+- **Understanding**: Visualize complex transformations
+- **Design**: Compare different architectures
+- **Intuition**: Build mental models of networks
+
+### **Practical Considerations**
+- **Input size**: Must match your data
+- **Output size**: Must match your task
+- **Hidden layers**: Balance complexity vs overfitting
+- **Activation functions**: Choose based on task
+
+## 🔍 Common Issues
+
+### **Import Errors**
+```python
+# Make sure you're in the right directory
+import sys
+sys.path.append('../../')
+from modules.layers.layers_dev import Dense
+from modules.activations.activations_dev import ReLU, Sigmoid
+```
+
+### **Shape Mismatches**
+```python
+# Check layer sizes match
+layer1 = Dense(3, 4)    # 3 inputs, 4 outputs
+layer2 = Dense(4, 2)    # 4 inputs (matches layer1 output), 2 outputs
+```
+
+### **Visualization Issues**
+```python
+# Make sure matplotlib is installed
+pip install matplotlib seaborn
+
+# Check if plots are disabled during testing
+if _should_show_plots():
+    # Your visualization code
+    pass
+```
+
+## 🎉 Success Criteria
+
+You've successfully completed this module when:
+- ✅ All tests pass (`pytest tests/test_networks.py`)
+- ✅ You can build and visualize different network architectures
+- ✅ You understand how architecture affects network behavior
+- ✅ You can create networks for classification and regression tasks
+- ✅ Package export works (`tito test --module networks`)
+
+## 🚀 What's Next
+
+After completing this module, you're ready for:
+- **Module 4: Training** - Learn how networks learn from data
+- **Module 5: Data** - Work with real datasets
+- **Module 6: Applications** - Solve real-world problems
+
+## 🤝 Getting Help
+
+- Check the tests for examples of expected behavior
+- Look at the solutions/ directory if you're stuck
+- Review the pedagogical principles in `docs/pedagogy/`
+- Remember: Build → Use → Understand!
+
+## 🎨 Visualization Examples
+
+### Network Architecture
+```
+Input → Dense(784,128) → ReLU → Dense(128,64) → ReLU → Dense(64,10) → Sigmoid → Output
+```
+
+### Data Flow
+```
+(1,784) → (1,128) → (1,128) → (1,64) → (1,64) → (1,10) → (1,10)
+```
+
+### Layer Analysis
+- **Dense layers**: Linear transformations
+- **ReLU**: Introduces nonlinearity
+- **Sigmoid**: Outputs probabilities
+
+**Build powerful neural networks with beautiful visualizations!** 🚀 
\ No newline at end of file
diff --git a/modules/networks/networks_dev.ipynb b/modules/networks/networks_dev.ipynb
new file mode 100644
index 00000000..a8f94222
--- /dev/null
+++ b/modules/networks/networks_dev.ipynb
@@ -0,0 +1,1004 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "---\n",
+        "jupyter:\n",
+        "  jupytext:\n",
+        "    text_representation:\n",
+        "      extension: .py\n",
+        "      format_name: percent\n",
+        "      format_version: '1.3'\n",
+        "      jupytext_version: 1.17.1\n",
+        "---\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\"\"\"\n",
+        "# Module 3: Networks - Neural Network Architectures\n",
+        "\n",
+        "Welcome to the Networks module! This is where we compose layers into complete neural network architectures.\n",
+        "\n",
+        "## Learning Goals\n",
+        "- Understand networks as function composition: `f(x) = layer_n(...layer_2(layer_1(x)))`\n",
+        "- Build common architectures (MLP, CNN) from layers\n",
+        "- Visualize network structure and data flow\n",
+        "- See how architecture affects capability\n",
+        "- Master forward pass inference (no training yet!)\n",
+        "\n",
+        "## Build \u2192 Use \u2192 Understand\n",
+        "1. **Build**: Compose layers into complete networks\n",
+        "2. **Use**: Create different architectures and run inference\n",
+        "3. **Understand**: How architecture design affects network behavior\n",
+        "\n",
+        "## Module Dependencies\n",
+        "This module builds on previous modules:\n",
+        "- **tensor** \u2192 **activations** \u2192 **layers** \u2192 **networks**\n",
+        "- Clean composition: math functions \u2192 building blocks \u2192 complete systems\n",
+        "\n",
+        "## Module \u2192 Package Structure\n",
+        "**\ud83c\udf93 Teaching vs. \ud83d\udd27 Building**: \n",
+        "- **Learning side**: Work in `modules/networks/networks_dev.py`  \n",
+        "- **Building side**: Exports to `tinytorch/core/networks.py`\n",
+        "\n",
+        "This module teaches how to compose layers into complete neural network architectures.\n",
+        "\"\"\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| default_exp core.networks\n",
+        "\n",
+        "# Setup and imports\n",
+        "import numpy as np\n",
+        "import sys\n",
+        "from typing import List, Union, Optional, Callable\n",
+        "import matplotlib.pyplot as plt\n",
+        "import matplotlib.patches as patches\n",
+        "from matplotlib.patches import FancyBboxPatch, ConnectionPatch\n",
+        "import seaborn as sns\n",
+        "\n",
+        "# Import our building blocks\n",
+        "from tinytorch.core.tensor import Tensor\n",
+        "from tinytorch.core.layers import Dense\n",
+        "from tinytorch.core.activations import ReLU, Sigmoid, Tanh\n",
+        "\n",
+        "print(\"\ud83d\udd25 TinyTorch Networks Module\")\n",
+        "print(f\"NumPy version: {np.__version__}\")\n",
+        "print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n",
+        "print(\"Ready to build neural network architectures!\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "import numpy as np\n",
+        "import sys\n",
+        "from typing import List, Union, Optional, Callable\n",
+        "import matplotlib.pyplot as plt\n",
+        "import matplotlib.patches as patches\n",
+        "from matplotlib.patches import FancyBboxPatch, ConnectionPatch\n",
+        "import seaborn as sns\n",
+        "\n",
+        "# Import our building blocks\n",
+        "from tinytorch.core.tensor import Tensor\n",
+        "from tinytorch.core.layers import Dense\n",
+        "from tinytorch.core.activations import ReLU, Sigmoid, Tanh"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| hide\n",
+        "#| export\n",
+        "def _should_show_plots():\n",
+        "    \"\"\"Check if we should show plots (disable during testing)\"\"\"\n",
+        "    return 'pytest' not in sys.modules and 'test' not in sys.argv"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\"\"\"\n",
+        "## Step 1: What is a Network?\n",
+        "\n",
+        "A **network** is a composition of layers that transforms input data into output predictions. Think of it as:\n",
+        "\n",
+        "```\n",
+        "Input \u2192 Layer1 \u2192 Layer2 \u2192 Layer3 \u2192 Output\n",
+        "```\n",
+        "\n",
+        "**The fundamental insight**: Neural networks are just function composition!\n",
+        "- Each layer is a function: `f_i(x)`\n",
+        "- The network is: `f(x) = f_n(...f_2(f_1(x)))`\n",
+        "- Complex behavior emerges from simple building blocks\n",
+        "\n",
+        "**Why networks matter**:\n",
+        "- They solve real problems (classification, regression, etc.)\n",
+        "- Architecture determines what problems you can solve\n",
+        "- Understanding networks = understanding deep learning\n",
+        "- They're the foundation for all modern AI\n",
+        "\n",
+        "Let's start by building the most fundamental network: **Sequential**.\n",
+        "\"\"\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "class Sequential:\n",
+        "    \"\"\"\n",
+        "    Sequential Network: Composes layers in sequence\n",
+        "    \n",
+        "    The most fundamental network architecture.\n",
+        "    Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))\n",
+        "    \n",
+        "    Args:\n",
+        "        layers: List of layers to compose\n",
+        "        \n",
+        "    TODO: Implement the Sequential network with forward pass.\n",
+        "    \"\"\"\n",
+        "    \n",
+        "    def __init__(self, layers: List):\n",
+        "        \"\"\"\n",
+        "        Initialize Sequential network with layers.\n",
+        "        \n",
+        "        Args:\n",
+        "            layers: List of layers to compose in order\n",
+        "            \n",
+        "        TODO: Store the layers and implement forward pass\n",
+        "        \"\"\"\n",
+        "        raise NotImplementedError(\"Student implementation required\")\n",
+        "    \n",
+        "    def forward(self, x: Tensor) -> Tensor:\n",
+        "        \"\"\"\n",
+        "        Forward pass through all layers in sequence.\n",
+        "        \n",
+        "        Args:\n",
+        "            x: Input tensor\n",
+        "            \n",
+        "        Returns:\n",
+        "            Output tensor after passing through all layers\n",
+        "            \n",
+        "        TODO: Implement sequential forward pass through all layers\n",
+        "        \"\"\"\n",
+        "        raise NotImplementedError(\"Student implementation required\")\n",
+        "    \n",
+        "    def __call__(self, x: Tensor) -> Tensor:\n",
+        "        \"\"\"Make network callable: network(x) same as network.forward(x)\"\"\"\n",
+        "        return self.forward(x)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| hide\n",
+        "#| export\n",
+        "class Sequential:\n",
+        "    \"\"\"\n",
+        "    Sequential Network: Composes layers in sequence\n",
+        "    \n",
+        "    The most fundamental network architecture.\n",
+        "    Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))\n",
+        "    \"\"\"\n",
+        "    \n",
+        "    def __init__(self, layers: List):\n",
+        "        \"\"\"Initialize Sequential network with layers.\"\"\"\n",
+        "        self.layers = layers\n",
+        "    \n",
+        "    def forward(self, x: Tensor) -> Tensor:\n",
+        "        \"\"\"Forward pass through all layers in sequence.\"\"\"\n",
+        "        # Apply each layer in order\n",
+        "        for layer in self.layers:\n",
+        "            x = layer(x)\n",
+        "        return x\n",
+        "    \n",
+        "    def __call__(self, x: Tensor) -> Tensor:\n",
+        "        \"\"\"Make network callable: network(x) same as network.forward(x)\"\"\"\n",
+        "        return self.forward(x)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\"\"\"\n",
+        "### \ud83e\uddea Test Your Sequential Network\n",
+        "\n",
+        "Once you implement the Sequential network above, run this cell to test it:\n",
+        "\"\"\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Test the Sequential network\n",
+        "try:\n",
+        "    print(\"=== Testing Sequential Network ===\")\n",
+        "    \n",
+        "    # Create a simple 2-layer network: 3 \u2192 4 \u2192 2\n",
+        "    network = Sequential([\n",
+        "        Dense(3, 4),\n",
+        "        ReLU(),\n",
+        "        Dense(4, 2),\n",
+        "        Sigmoid()\n",
+        "    ])\n",
+        "    \n",
+        "    # Test with sample data\n",
+        "    x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])\n",
+        "    print(f\"Input shape: {x.shape}\")\n",
+        "    print(f\"Input data: {x.data}\")\n",
+        "    \n",
+        "    # Forward pass\n",
+        "    output = network(x)\n",
+        "    print(f\"Output shape: {output.shape}\")\n",
+        "    print(f\"Output data: {output.data}\")\n",
+        "    \n",
+        "    print(\"\u2705 Sequential network working!\")\n",
+        "    \n",
+        "except Exception as e:\n",
+        "    print(f\"\u274c Error: {e}\")\n",
+        "    print(\"Make sure to implement the Sequential network!\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\"\"\"\n",
+        "## Step 2: Network Visualization\n",
+        "\n",
+        "Now let's create powerful visualizations to understand what our networks look like and how they work!\n",
+        "\"\"\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "def visualize_network_architecture(network: Sequential, title: str = \"Network Architecture\"):\n",
+        "    \"\"\"\n",
+        "    Create a visual representation of network architecture.\n",
+        "    \n",
+        "    Args:\n",
+        "        network: Sequential network to visualize\n",
+        "        title: Title for the plot\n",
+        "    \"\"\"\n",
+        "    if not _should_show_plots():\n",
+        "        print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n",
+        "        return\n",
+        "    \n",
+        "    fig, ax = plt.subplots(1, 1, figsize=(12, 8))\n",
+        "    \n",
+        "    # Network parameters\n",
+        "    layer_count = len(network.layers)\n",
+        "    layer_height = 0.8\n",
+        "    layer_spacing = 1.2\n",
+        "    \n",
+        "    # Colors for different layer types\n",
+        "    colors = {\n",
+        "        'Dense': '#4CAF50',      # Green\n",
+        "        'ReLU': '#2196F3',       # Blue\n",
+        "        'Sigmoid': '#FF9800',    # Orange\n",
+        "        'Tanh': '#9C27B0',       # Purple\n",
+        "        'default': '#757575'      # Gray\n",
+        "    }\n",
+        "    \n",
+        "    # Draw layers\n",
+        "    for i, layer in enumerate(network.layers):\n",
+        "        # Determine layer type and color\n",
+        "        layer_type = type(layer).__name__\n",
+        "        color = colors.get(layer_type, colors['default'])\n",
+        "        \n",
+        "        # Layer position\n",
+        "        x = i * layer_spacing\n",
+        "        y = 0\n",
+        "        \n",
+        "        # Create layer box\n",
+        "        layer_box = FancyBboxPatch(\n",
+        "            (x - 0.3, y - layer_height/2),\n",
+        "            0.6, layer_height,\n",
+        "            boxstyle=\"round,pad=0.1\",\n",
+        "            facecolor=color,\n",
+        "            edgecolor='black',\n",
+        "            linewidth=2,\n",
+        "            alpha=0.8\n",
+        "        )\n",
+        "        ax.add_patch(layer_box)\n",
+        "        \n",
+        "        # Add layer label\n",
+        "        ax.text(x, y, layer_type, ha='center', va='center', \n",
+        "                fontsize=10, fontweight='bold', color='white')\n",
+        "        \n",
+        "        # Add layer details\n",
+        "        if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'):\n",
+        "            details = f\"{layer.input_size}\u2192{layer.output_size}\"\n",
+        "            ax.text(x, y - 0.3, details, ha='center', va='center',\n",
+        "                   fontsize=8, color='white')\n",
+        "        \n",
+        "        # Draw connections to next layer\n",
+        "        if i < layer_count - 1:\n",
+        "            next_x = (i + 1) * layer_spacing\n",
+        "            connection = ConnectionPatch(\n",
+        "                (x + 0.3, y), (next_x - 0.3, y),\n",
+        "                \"data\", \"data\",\n",
+        "                arrowstyle=\"->\", shrinkA=5, shrinkB=5,\n",
+        "                mutation_scale=20, fc=\"black\", lw=2\n",
+        "            )\n",
+        "            ax.add_patch(connection)\n",
+        "    \n",
+        "    # Formatting\n",
+        "    ax.set_xlim(-0.5, (layer_count - 1) * layer_spacing + 0.5)\n",
+        "    ax.set_ylim(-1, 1)\n",
+        "    ax.set_aspect('equal')\n",
+        "    ax.axis('off')\n",
+        "    \n",
+        "    # Add title\n",
+        "    plt.title(title, fontsize=16, fontweight='bold', pad=20)\n",
+        "    \n",
+        "    # Add legend\n",
+        "    legend_elements = []\n",
+        "    for layer_type, color in colors.items():\n",
+        "        if layer_type != 'default':\n",
+        "            legend_elements.append(patches.Patch(color=color, label=layer_type))\n",
+        "    \n",
+        "    ax.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(1, 1))\n",
+        "    \n",
+        "    plt.tight_layout()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "def visualize_data_flow(network: Sequential, input_data: Tensor, title: str = \"Data Flow Through Network\"):\n",
+        "    \"\"\"\n",
+        "    Visualize how data flows through the network.\n",
+        "    \n",
+        "    Args:\n",
+        "        network: Sequential network\n",
+        "        input_data: Input tensor\n",
+        "        title: Title for the plot\n",
+        "    \"\"\"\n",
+        "    if not _should_show_plots():\n",
+        "        print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n",
+        "        return\n",
+        "    \n",
+        "    # Get intermediate outputs\n",
+        "    intermediate_outputs = []\n",
+        "    x = input_data\n",
+        "    \n",
+        "    for i, layer in enumerate(network.layers):\n",
+        "        x = layer(x)\n",
+        "        intermediate_outputs.append({\n",
+        "            'layer': network.layers[i],\n",
+        "            'output': x,\n",
+        "            'layer_index': i\n",
+        "        })\n",
+        "    \n",
+        "    # Create visualization\n",
+        "    fig, axes = plt.subplots(2, len(network.layers), figsize=(4*len(network.layers), 8))\n",
+        "    if len(network.layers) == 1:\n",
+        "        axes = axes.reshape(1, -1)\n",
+        "    \n",
+        "    for i, (layer, output) in enumerate(zip(network.layers, intermediate_outputs)):\n",
+        "        # Top row: Layer information\n",
+        "        ax_top = axes[0, i] if len(network.layers) > 1 else axes[0]\n",
+        "        \n",
+        "        # Layer type and details\n",
+        "        layer_type = type(layer).__name__\n",
+        "        ax_top.text(0.5, 0.8, layer_type, ha='center', va='center',\n",
+        "                   fontsize=12, fontweight='bold')\n",
+        "        \n",
+        "        if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'):\n",
+        "            ax_top.text(0.5, 0.6, f\"{layer.input_size} \u2192 {layer.output_size}\", \n",
+        "                       ha='center', va='center', fontsize=10)\n",
+        "        \n",
+        "        # Output shape\n",
+        "        ax_top.text(0.5, 0.4, f\"Shape: {output['output'].shape}\", \n",
+        "                   ha='center', va='center', fontsize=9)\n",
+        "        \n",
+        "        # Output statistics\n",
+        "        output_data = output['output'].data\n",
+        "        ax_top.text(0.5, 0.2, f\"Mean: {np.mean(output_data):.3f}\", \n",
+        "                   ha='center', va='center', fontsize=9)\n",
+        "        ax_top.text(0.5, 0.1, f\"Std: {np.std(output_data):.3f}\", \n",
+        "                   ha='center', va='center', fontsize=9)\n",
+        "        \n",
+        "        ax_top.set_xlim(0, 1)\n",
+        "        ax_top.set_ylim(0, 1)\n",
+        "        ax_top.axis('off')\n",
+        "        \n",
+        "        # Bottom row: Output visualization\n",
+        "        ax_bottom = axes[1, i] if len(network.layers) > 1 else axes[1]\n",
+        "        \n",
+        "        # Show output as heatmap or histogram\n",
+        "        output_data = output['output'].data.flatten()\n",
+        "        \n",
+        "        if len(output_data) <= 20:  # Small output - show as bars\n",
+        "            ax_bottom.bar(range(len(output_data)), output_data, alpha=0.7)\n",
+        "            ax_bottom.set_title(f\"Layer {i+1} Output\")\n",
+        "            ax_bottom.set_xlabel(\"Output Index\")\n",
+        "            ax_bottom.set_ylabel(\"Value\")\n",
+        "        else:  # Large output - show histogram\n",
+        "            ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black')\n",
+        "            ax_bottom.set_title(f\"Layer {i+1} Output Distribution\")\n",
+        "            ax_bottom.set_xlabel(\"Value\")\n",
+        "            ax_bottom.set_ylabel(\"Frequency\")\n",
+        "        \n",
+        "        ax_bottom.grid(True, alpha=0.3)\n",
+        "    \n",
+        "    plt.suptitle(title, fontsize=14, fontweight='bold')\n",
+        "    plt.tight_layout()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "def compare_networks(networks: List[Sequential], network_names: List[str], \n",
+        "                    input_data: Tensor, title: str = \"Network Comparison\"):\n",
+        "    \"\"\"\n",
+        "    Compare different network architectures side-by-side.\n",
+        "    \n",
+        "    Args:\n",
+        "        networks: List of networks to compare\n",
+        "        network_names: Names for each network\n",
+        "        input_data: Input tensor to test with\n",
+        "        title: Title for the plot\n",
+        "    \"\"\"\n",
+        "    if not _should_show_plots():\n",
+        "        print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n",
+        "        return\n",
+        "    \n",
+        "    fig, axes = plt.subplots(2, len(networks), figsize=(6*len(networks), 10))\n",
+        "    if len(networks) == 1:\n",
+        "        axes = axes.reshape(2, -1)\n",
+        "    \n",
+        "    for i, (network, name) in enumerate(zip(networks, network_names)):\n",
+        "        # Get network output\n",
+        "        output = network(input_data)\n",
+        "        \n",
+        "        # Top row: Architecture visualization\n",
+        "        ax_top = axes[0, i] if len(networks) > 1 else axes[0]\n",
+        "        \n",
+        "        # Count layer types\n",
+        "        layer_types = {}\n",
+        "        for layer in network.layers:\n",
+        "            layer_type = type(layer).__name__\n",
+        "            layer_types[layer_type] = layer_types.get(layer_type, 0) + 1\n",
+        "        \n",
+        "        # Create pie chart of layer types\n",
+        "        if layer_types:\n",
+        "            labels = list(layer_types.keys())\n",
+        "            sizes = list(layer_types.values())\n",
+        "            colors = plt.cm.Set3(np.linspace(0, 1, len(labels)))\n",
+        "            \n",
+        "            ax_top.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors)\n",
+        "            ax_top.set_title(f\"{name}\\nLayer Distribution\")\n",
+        "        \n",
+        "        # Bottom row: Output comparison\n",
+        "        ax_bottom = axes[1, i] if len(networks) > 1 else axes[1]\n",
+        "        \n",
+        "        output_data = output.data.flatten()\n",
+        "        \n",
+        "        # Show output statistics\n",
+        "        ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black')\n",
+        "        ax_bottom.axvline(np.mean(output_data), color='red', linestyle='--', \n",
+        "                         label=f'Mean: {np.mean(output_data):.3f}')\n",
+        "        ax_bottom.axvline(np.median(output_data), color='green', linestyle='--',\n",
+        "                         label=f'Median: {np.median(output_data):.3f}')\n",
+        "        \n",
+        "        ax_bottom.set_title(f\"{name} Output Distribution\")\n",
+        "        ax_bottom.set_xlabel(\"Output Value\")\n",
+        "        ax_bottom.set_ylabel(\"Frequency\")\n",
+        "        ax_bottom.legend()\n",
+        "        ax_bottom.grid(True, alpha=0.3)\n",
+        "    \n",
+        "    plt.suptitle(title, fontsize=16, fontweight='bold')\n",
+        "    plt.tight_layout()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\"\"\"\n",
+        "## Step 3: Building Common Architectures\n",
+        "\n",
+        "Now let's build some common neural network architectures and visualize them!\n",
+        "\"\"\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "def create_mlp(input_size: int, hidden_sizes: List[int], output_size: int, \n",
+        "               activation=ReLU, output_activation=Sigmoid) -> Sequential:\n",
+        "    \"\"\"\n",
+        "    Create a Multi-Layer Perceptron (MLP) network.\n",
+        "    \n",
+        "    Args:\n",
+        "        input_size: Number of input features\n",
+        "        hidden_sizes: List of hidden layer sizes\n",
+        "        output_size: Number of output features\n",
+        "        activation: Activation function for hidden layers\n",
+        "        output_activation: Activation function for output layer\n",
+        "        \n",
+        "    Returns:\n",
+        "        Sequential network\n",
+        "    \"\"\"\n",
+        "    layers = []\n",
+        "    \n",
+        "    # Input layer\n",
+        "    if hidden_sizes:\n",
+        "        layers.append(Dense(input_size, hidden_sizes[0]))\n",
+        "        layers.append(activation())\n",
+        "        \n",
+        "        # Hidden layers\n",
+        "        for i in range(len(hidden_sizes) - 1):\n",
+        "            layers.append(Dense(hidden_sizes[i], hidden_sizes[i + 1]))\n",
+        "            layers.append(activation())\n",
+        "        \n",
+        "        # Output layer\n",
+        "        layers.append(Dense(hidden_sizes[-1], output_size))\n",
+        "    else:\n",
+        "        # Direct input to output\n",
+        "        layers.append(Dense(input_size, output_size))\n",
+        "    \n",
+        "    layers.append(output_activation())\n",
+        "    \n",
+        "    return Sequential(layers)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Test MLP creation and visualization\n",
+        "try:\n",
+        "    print(\"=== Testing MLP Creation and Visualization ===\")\n",
+        "    \n",
+        "    # Create different MLP architectures\n",
+        "    mlp_small = create_mlp(input_size=3, hidden_sizes=[4], output_size=2)\n",
+        "    mlp_medium = create_mlp(input_size=10, hidden_sizes=[16, 8], output_size=3)\n",
+        "    mlp_large = create_mlp(input_size=784, hidden_sizes=[128, 64, 32], output_size=10)\n",
+        "    \n",
+        "    print(\"Created MLP architectures:\")\n",
+        "    print(f\"  Small: 3 \u2192 4 \u2192 2\")\n",
+        "    print(f\"  Medium: 10 \u2192 16 \u2192 8 \u2192 3\")\n",
+        "    print(f\"  Large: 784 \u2192 128 \u2192 64 \u2192 32 \u2192 10\")\n",
+        "    \n",
+        "    # Test with sample data\n",
+        "    x = Tensor(np.random.randn(5, 3).astype(np.float32))\n",
+        "    \n",
+        "    # Visualize architectures\n",
+        "    visualize_network_architecture(mlp_small, \"Small MLP Architecture\")\n",
+        "    visualize_network_architecture(mlp_medium, \"Medium MLP Architecture\")\n",
+        "    visualize_network_architecture(mlp_large, \"Large MLP Architecture\")\n",
+        "    \n",
+        "    # Visualize data flow\n",
+        "    visualize_data_flow(mlp_small, x, \"Data Flow Through Small MLP\")\n",
+        "    \n",
+        "    # Compare networks\n",
+        "    networks = [mlp_small, mlp_medium]\n",
+        "    names = [\"Small MLP\", \"Medium MLP\"]\n",
+        "    compare_networks(networks, names, x, \"MLP Architecture Comparison\")\n",
+        "    \n",
+        "    print(\"\u2705 MLP creation and visualization working!\")\n",
+        "    \n",
+        "except Exception as e:\n",
+        "    print(f\"\u274c Error: {e}\")\n",
+        "    print(\"Make sure to implement the visualization functions!\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\"\"\"\n",
+        "## Step 4: Understanding Network Behavior\n",
+        "\n",
+        "Let's analyze how different network architectures behave with different types of input data.\n",
+        "\"\"\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "def analyze_network_behavior(network: Sequential, input_data: Tensor, \n",
+        "                           title: str = \"Network Behavior Analysis\"):\n",
+        "    \"\"\"\n",
+        "    Analyze how a network behaves with different types of input.\n",
+        "    \n",
+        "    Args:\n",
+        "        network: Network to analyze\n",
+        "        input_data: Input tensor\n",
+        "        title: Title for the plot\n",
+        "    \"\"\"\n",
+        "    if not _should_show_plots():\n",
+        "        print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n",
+        "        return\n",
+        "    \n",
+        "    fig, axes = plt.subplots(2, 3, figsize=(15, 10))\n",
+        "    \n",
+        "    # 1. Input vs Output relationship\n",
+        "    ax1 = axes[0, 0]\n",
+        "    input_flat = input_data.data.flatten()\n",
+        "    output = network(input_data)\n",
+        "    output_flat = output.data.flatten()\n",
+        "    \n",
+        "    ax1.scatter(input_flat, output_flat, alpha=0.6)\n",
+        "    ax1.plot([input_flat.min(), input_flat.max()], \n",
+        "             [input_flat.min(), input_flat.max()], 'r--', alpha=0.5, label='y=x')\n",
+        "    ax1.set_xlabel('Input Values')\n",
+        "    ax1.set_ylabel('Output Values')\n",
+        "    ax1.set_title('Input vs Output')\n",
+        "    ax1.legend()\n",
+        "    ax1.grid(True, alpha=0.3)\n",
+        "    \n",
+        "    # 2. Output distribution\n",
+        "    ax2 = axes[0, 1]\n",
+        "    ax2.hist(output_flat, bins=20, alpha=0.7, edgecolor='black')\n",
+        "    ax2.axvline(np.mean(output_flat), color='red', linestyle='--', \n",
+        "                label=f'Mean: {np.mean(output_flat):.3f}')\n",
+        "    ax2.set_xlabel('Output Values')\n",
+        "    ax2.set_ylabel('Frequency')\n",
+        "    ax2.set_title('Output Distribution')\n",
+        "    ax2.legend()\n",
+        "    ax2.grid(True, alpha=0.3)\n",
+        "    \n",
+        "    # 3. Layer-by-layer activation patterns\n",
+        "    ax3 = axes[0, 2]\n",
+        "    activations = []\n",
+        "    x = input_data\n",
+        "    \n",
+        "    for layer in network.layers:\n",
+        "        x = layer(x)\n",
+        "        if hasattr(layer, 'input_size'):  # Dense layer\n",
+        "            activations.append(np.mean(x.data))\n",
+        "        else:  # Activation layer\n",
+        "            activations.append(np.mean(x.data))\n",
+        "    \n",
+        "    ax3.plot(range(len(activations)), activations, 'bo-', linewidth=2, markersize=8)\n",
+        "    ax3.set_xlabel('Layer Index')\n",
+        "    ax3.set_ylabel('Mean Activation')\n",
+        "    ax3.set_title('Layer-by-Layer Activations')\n",
+        "    ax3.grid(True, alpha=0.3)\n",
+        "    \n",
+        "    # 4. Network depth analysis\n",
+        "    ax4 = axes[1, 0]\n",
+        "    layer_types = [type(layer).__name__ for layer in network.layers]\n",
+        "    layer_counts = {}\n",
+        "    for layer_type in layer_types:\n",
+        "        layer_counts[layer_type] = layer_counts.get(layer_type, 0) + 1\n",
+        "    \n",
+        "    if layer_counts:\n",
+        "        ax4.bar(layer_counts.keys(), layer_counts.values(), alpha=0.7)\n",
+        "        ax4.set_xlabel('Layer Type')\n",
+        "        ax4.set_ylabel('Count')\n",
+        "        ax4.set_title('Layer Type Distribution')\n",
+        "        ax4.grid(True, alpha=0.3)\n",
+        "    \n",
+        "    # 5. Shape transformation\n",
+        "    ax5 = axes[1, 1]\n",
+        "    shapes = [input_data.shape]\n",
+        "    x = input_data\n",
+        "    \n",
+        "    for layer in network.layers:\n",
+        "        x = layer(x)\n",
+        "        shapes.append(x.shape)\n",
+        "    \n",
+        "    layer_indices = range(len(shapes))\n",
+        "    shape_sizes = [np.prod(shape) for shape in shapes]\n",
+        "    \n",
+        "    ax5.plot(layer_indices, shape_sizes, 'go-', linewidth=2, markersize=8)\n",
+        "    ax5.set_xlabel('Layer Index')\n",
+        "    ax5.set_ylabel('Tensor Size')\n",
+        "    ax5.set_title('Shape Transformation')\n",
+        "    ax5.grid(True, alpha=0.3)\n",
+        "    \n",
+        "    # 6. Network summary\n",
+        "    ax6 = axes[1, 2]\n",
+        "    ax6.axis('off')\n",
+        "    \n",
+        "    summary_text = f\"\"\"\n",
+        "Network Summary:\n",
+        "\u2022 Total Layers: {len(network.layers)}\n",
+        "\u2022 Input Shape: {input_data.shape}\n",
+        "\u2022 Output Shape: {output.shape}\n",
+        "\u2022 Parameters: {sum(np.prod(layer.weights.data.shape) if hasattr(layer, 'weights') else 0 for layer in network.layers)}\n",
+        "\u2022 Architecture: {' \u2192 '.join([type(layer).__name__ for layer in network.layers])}\n",
+        "    \"\"\"\n",
+        "    \n",
+        "    ax6.text(0.05, 0.95, summary_text, transform=ax6.transAxes, \n",
+        "             fontsize=10, verticalalignment='top', fontfamily='monospace')\n",
+        "    \n",
+        "    plt.suptitle(title, fontsize=16, fontweight='bold')\n",
+        "    plt.tight_layout()\n",
+        "    plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Test network behavior analysis\n",
+        "try:\n",
+        "    print(\"=== Testing Network Behavior Analysis ===\")\n",
+        "    \n",
+        "    # Create a network for analysis\n",
+        "    network = create_mlp(input_size=5, hidden_sizes=[8, 4], output_size=2)\n",
+        "    \n",
+        "    # Test with different types of input\n",
+        "    x_normal = Tensor(np.random.randn(10, 5).astype(np.float32))\n",
+        "    x_uniform = Tensor(np.random.uniform(-1, 1, (10, 5)).astype(np.float32))\n",
+        "    x_zeros = Tensor(np.zeros((10, 5)).astype(np.float32))\n",
+        "    \n",
+        "    print(\"Analyzing network behavior with different inputs...\")\n",
+        "    \n",
+        "    # Analyze behavior\n",
+        "    analyze_network_behavior(network, x_normal, \"Network Behavior: Normal Input\")\n",
+        "    analyze_network_behavior(network, x_uniform, \"Network Behavior: Uniform Input\")\n",
+        "    analyze_network_behavior(network, x_zeros, \"Network Behavior: Zero Input\")\n",
+        "    \n",
+        "    print(\"\u2705 Network behavior analysis working!\")\n",
+        "    \n",
+        "except Exception as e:\n",
+        "    print(f\"\u274c Error: {e}\")\n",
+        "    print(\"Make sure to implement the behavior analysis function!\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\"\"\"\n",
+        "## Step 5: Practical Applications\n",
+        "\n",
+        "Let's see how our networks can be applied to real-world problems!\n",
+        "\"\"\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "def create_classification_network(input_size: int, num_classes: int, \n",
+        "                                hidden_sizes: List[int] = None) -> Sequential:\n",
+        "    \"\"\"\n",
+        "    Create a network for classification problems.\n",
+        "    \n",
+        "    Args:\n",
+        "        input_size: Number of input features\n",
+        "        num_classes: Number of output classes\n",
+        "        hidden_sizes: List of hidden layer sizes (default: [input_size//2])\n",
+        "        \n",
+        "    Returns:\n",
+        "        Sequential network for classification\n",
+        "    \"\"\"\n",
+        "    if hidden_sizes is None:\n",
+        "        hidden_sizes = [input_size // 2]\n",
+        "    \n",
+        "    return create_mlp(\n",
+        "        input_size=input_size,\n",
+        "        hidden_sizes=hidden_sizes,\n",
+        "        output_size=num_classes,\n",
+        "        activation=ReLU,\n",
+        "        output_activation=Sigmoid\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "def create_regression_network(input_size: int, output_size: int = 1,\n",
+        "                             hidden_sizes: List[int] = None) -> Sequential:\n",
+        "    \"\"\"\n",
+        "    Create a network for regression problems.\n",
+        "    \n",
+        "    Args:\n",
+        "        input_size: Number of input features\n",
+        "        output_size: Number of output values (default: 1)\n",
+        "        hidden_sizes: List of hidden layer sizes (default: [input_size//2])\n",
+        "        \n",
+        "    Returns:\n",
+        "        Sequential network for regression\n",
+        "    \"\"\"\n",
+        "    if hidden_sizes is None:\n",
+        "        hidden_sizes = [input_size // 2]\n",
+        "    \n",
+        "    return create_mlp(\n",
+        "        input_size=input_size,\n",
+        "        hidden_sizes=hidden_sizes,\n",
+        "        output_size=output_size,\n",
+        "        activation=ReLU,\n",
+        "        output_activation=Tanh  # No activation for regression\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Test practical applications\n",
+        "try:\n",
+        "    print(\"=== Testing Practical Applications ===\")\n",
+        "    \n",
+        "    # Create networks for different tasks\n",
+        "    digit_classifier = create_classification_network(\n",
+        "        input_size=784,  # 28x28 image\n",
+        "        num_classes=10,  # 10 digits\n",
+        "        hidden_sizes=[128, 64]\n",
+        "    )\n",
+        "    \n",
+        "    sentiment_analyzer = create_classification_network(\n",
+        "        input_size=100,  # 100-dimensional word embeddings\n",
+        "        num_classes=2,   # Positive/Negative\n",
+        "        hidden_sizes=[32, 16]\n",
+        "    )\n",
+        "    \n",
+        "    house_price_predictor = create_regression_network(\n",
+        "        input_size=13,   # 13 house features\n",
+        "        output_size=1,   # 1 price prediction\n",
+        "        hidden_sizes=[8, 4]\n",
+        "    )\n",
+        "    \n",
+        "    print(\"Created networks for different applications:\")\n",
+        "    print(f\"  Digit Classifier: 784 \u2192 128 \u2192 64 \u2192 10\")\n",
+        "    print(f\"  Sentiment Analyzer: 100 \u2192 32 \u2192 16 \u2192 2\")\n",
+        "    print(f\"  House Price Predictor: 13 \u2192 8 \u2192 4 \u2192 1\")\n",
+        "    \n",
+        "    # Test with sample data\n",
+        "    digit_input = Tensor(np.random.randn(1, 784).astype(np.float32))\n",
+        "    sentiment_input = Tensor(np.random.randn(1, 100).astype(np.float32))\n",
+        "    house_input = Tensor(np.random.randn(1, 13).astype(np.float32))\n",
+        "    \n",
+        "    # Get predictions\n",
+        "    digit_pred = digit_classifier(digit_input)\n",
+        "    sentiment_pred = sentiment_analyzer(sentiment_input)\n",
+        "    house_pred = house_price_predictor(house_input)\n",
+        "    \n",
+        "    print(f\"\\nSample predictions:\")\n",
+        "    print(f\"  Digit classifier output: {digit_pred.data[0]}\")\n",
+        "    print(f\"  Sentiment analyzer output: {sentiment_pred.data[0]}\")\n",
+        "    print(f\"  House price predictor output: {house_pred.data[0]}\")\n",
+        "    \n",
+        "    # Visualize architectures\n",
+        "    visualize_network_architecture(digit_classifier, \"Digit Classification Network\")\n",
+        "    visualize_network_architecture(sentiment_analyzer, \"Sentiment Analysis Network\")\n",
+        "    visualize_network_architecture(house_price_predictor, \"House Price Prediction Network\")\n",
+        "    \n",
+        "    print(\"\u2705 Practical applications working!\")\n",
+        "    \n",
+        "except Exception as e:\n",
+        "    print(f\"\u274c Error: {e}\")\n",
+        "    print(\"Make sure to implement the application functions!\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\"\"\"\n",
+        "## \ud83c\udf93 Module Summary\n",
+        "\n",
+        "### What You Learned\n",
+        "1. **Network Composition**: Building complete networks from layers\n",
+        "2. **Architecture Design**: How to choose network structures\n",
+        "3. **Visualization**: Understanding networks through visual analysis\n",
+        "4. **Practical Applications**: Real-world network use cases\n",
+        "\n",
+        "### Key Architectural Insights\n",
+        "- **Function Composition**: Networks as `f(x) = layer_n(...layer_1(x))`\n",
+        "- **Modular Design**: Clean separation between layers and networks\n",
+        "- **Visual Understanding**: How to analyze network behavior\n",
+        "- **Application Patterns**: Classification vs regression architectures\n",
+        "\n",
+        "### Network Design Principles\n",
+        "- **Depth vs Width**: Trade-offs in network architecture\n",
+        "- **Activation Functions**: How they affect network behavior\n",
+        "- **Shape Management**: Understanding tensor transformations\n",
+        "- **Practical Considerations**: Choosing architectures for specific tasks\n",
+        "\n",
+        "### Next Steps\n",
+        "- **Training**: Learn how networks learn from data (autograd, optimization)\n",
+        "- **Advanced Architectures**: CNNs, RNNs, Transformers\n",
+        "- **Real Data**: Working with actual datasets\n",
+        "- **Production**: Deploying networks in real applications\n",
+        "\n",
+        "**Congratulations on mastering neural network architectures!** \ud83d\ude80\n",
+        "\"\"\""
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.8.0"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 4
+}
\ No newline at end of file
diff --git a/modules/networks/networks_dev.py b/modules/networks/networks_dev.py
new file mode 100644
index 00000000..8a485705
--- /dev/null
+++ b/modules/networks/networks_dev.py
@@ -0,0 +1,837 @@
+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.17.1
+# ---
+
+# %% [markdown]
+"""
+# Module 3: Networks - Neural Network Architectures
+
+Welcome to the Networks module! This is where we compose layers into complete neural network architectures.
+
+## Learning Goals
+- Understand networks as function composition: `f(x) = layer_n(...layer_2(layer_1(x)))`
+- Build common architectures (MLP, CNN) from layers
+- Visualize network structure and data flow
+- See how architecture affects capability
+- Master forward pass inference (no training yet!)
+
+## Build → Use → Understand
+1. **Build**: Compose layers into complete networks
+2. **Use**: Create different architectures and run inference
+3. **Understand**: How architecture design affects network behavior
+
+## Module Dependencies
+This module builds on previous modules:
+- **tensor** → **activations** → **layers** → **networks**
+- Clean composition: math functions → building blocks → complete systems
+
+## Module → Package Structure
+**🎓 Teaching vs. 🔧 Building**: 
+- **Learning side**: Work in `modules/networks/networks_dev.py`  
+- **Building side**: Exports to `tinytorch/core/networks.py`
+
+This module teaches how to compose layers into complete neural network architectures.
+"""
+
+# %%
+#| default_exp core.networks
+
+# Setup and imports
+import numpy as np
+import sys
+from typing import List, Union, Optional, Callable
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+from matplotlib.patches import FancyBboxPatch, ConnectionPatch
+import seaborn as sns
+
+# Import our building blocks
+from tinytorch.core.tensor import Tensor
+from tinytorch.core.layers import Dense
+from tinytorch.core.activations import ReLU, Sigmoid, Tanh
+
+print("🔥 TinyTorch Networks Module")
+print(f"NumPy version: {np.__version__}")
+print(f"Python version: {sys.version_info.major}.{sys.version_info.minor}")
+print("Ready to build neural network architectures!")
+
+# %%
+#| export
+import numpy as np
+import sys
+from typing import List, Union, Optional, Callable
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+from matplotlib.patches import FancyBboxPatch, ConnectionPatch
+import seaborn as sns
+
+# Import our building blocks
+from tinytorch.core.tensor import Tensor
+from tinytorch.core.layers import Dense
+from tinytorch.core.activations import ReLU, Sigmoid, Tanh
+
+# %%
+#| hide
+#| export
+def _should_show_plots():
+    """Check if we should show plots (disable during testing)"""
+    return 'pytest' not in sys.modules and 'test' not in sys.argv
+
+# %% [markdown]
+"""
+## Step 1: What is a Network?
+
+A **network** is a composition of layers that transforms input data into output predictions. Think of it as:
+
+```
+Input → Layer1 → Layer2 → Layer3 → Output
+```
+
+**The fundamental insight**: Neural networks are just function composition!
+- Each layer is a function: `f_i(x)`
+- The network is: `f(x) = f_n(...f_2(f_1(x)))`
+- Complex behavior emerges from simple building blocks
+
+**Why networks matter**:
+- They solve real problems (classification, regression, etc.)
+- Architecture determines what problems you can solve
+- Understanding networks = understanding deep learning
+- They're the foundation for all modern AI
+
+Let's start by building the most fundamental network: **Sequential**.
+"""
+
+# %%
+#| export
+class Sequential:
+    """
+    Sequential Network: Composes layers in sequence
+    
+    The most fundamental network architecture.
+    Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))
+    
+    Args:
+        layers: List of layers to compose
+        
+    TODO: Implement the Sequential network with forward pass.
+    """
+    
+    def __init__(self, layers: List):
+        """
+        Initialize Sequential network with layers.
+        
+        Args:
+            layers: List of layers to compose in order
+            
+        TODO: Store the layers and implement forward pass
+        """
+        raise NotImplementedError("Student implementation required")
+    
+    def forward(self, x: Tensor) -> Tensor:
+        """
+        Forward pass through all layers in sequence.
+        
+        Args:
+            x: Input tensor
+            
+        Returns:
+            Output tensor after passing through all layers
+            
+        TODO: Implement sequential forward pass through all layers
+        """
+        raise NotImplementedError("Student implementation required")
+    
+    def __call__(self, x: Tensor) -> Tensor:
+        """Make network callable: network(x) same as network.forward(x)"""
+        return self.forward(x)
+
+# %%
+#| hide
+#| export
+class Sequential:
+    """
+    Sequential Network: Composes layers in sequence
+    
+    The most fundamental network architecture.
+    Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))
+    """
+    
+    def __init__(self, layers: List):
+        """Initialize Sequential network with layers."""
+        self.layers = layers
+    
+    def forward(self, x: Tensor) -> Tensor:
+        """Forward pass through all layers in sequence."""
+        # Apply each layer in order
+        for layer in self.layers:
+            x = layer(x)
+        return x
+    
+    def __call__(self, x: Tensor) -> Tensor:
+        """Make network callable: network(x) same as network.forward(x)"""
+        return self.forward(x)
+
+# %% [markdown]
+"""
+### 🧪 Test Your Sequential Network
+
+Once you implement the Sequential network above, run this cell to test it:
+"""
+
+# %%
+# Test the Sequential network
+try:
+    print("=== Testing Sequential Network ===")
+    
+    # Create a simple 2-layer network: 3 → 4 → 2
+    network = Sequential([
+        Dense(3, 4),
+        ReLU(),
+        Dense(4, 2),
+        Sigmoid()
+    ])
+    
+    # Test with sample data
+    x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    print(f"Input shape: {x.shape}")
+    print(f"Input data: {x.data}")
+    
+    # Forward pass
+    output = network(x)
+    print(f"Output shape: {output.shape}")
+    print(f"Output data: {output.data}")
+    
+    print("✅ Sequential network working!")
+    
+except Exception as e:
+    print(f"❌ Error: {e}")
+    print("Make sure to implement the Sequential network!")
+
+# %% [markdown]
+"""
+## Step 2: Network Visualization
+
+Now let's create powerful visualizations to understand what our networks look like and how they work!
+"""
+
+# %%
+#| export
+def visualize_network_architecture(network: Sequential, title: str = "Network Architecture"):
+    """
+    Create a visual representation of network architecture.
+    
+    Args:
+        network: Sequential network to visualize
+        title: Title for the plot
+    """
+    if not _should_show_plots():
+        print("📊 Plots disabled during testing - this is normal!")
+        return
+    
+    fig, ax = plt.subplots(1, 1, figsize=(12, 8))
+    
+    # Network parameters
+    layer_count = len(network.layers)
+    layer_height = 0.8
+    layer_spacing = 1.2
+    
+    # Colors for different layer types
+    colors = {
+        'Dense': '#4CAF50',      # Green
+        'ReLU': '#2196F3',       # Blue
+        'Sigmoid': '#FF9800',    # Orange
+        'Tanh': '#9C27B0',       # Purple
+        'default': '#757575'      # Gray
+    }
+    
+    # Draw layers
+    for i, layer in enumerate(network.layers):
+        # Determine layer type and color
+        layer_type = type(layer).__name__
+        color = colors.get(layer_type, colors['default'])
+        
+        # Layer position
+        x = i * layer_spacing
+        y = 0
+        
+        # Create layer box
+        layer_box = FancyBboxPatch(
+            (x - 0.3, y - layer_height/2),
+            0.6, layer_height,
+            boxstyle="round,pad=0.1",
+            facecolor=color,
+            edgecolor='black',
+            linewidth=2,
+            alpha=0.8
+        )
+        ax.add_patch(layer_box)
+        
+        # Add layer label
+        ax.text(x, y, layer_type, ha='center', va='center', 
+                fontsize=10, fontweight='bold', color='white')
+        
+        # Add layer details
+        if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'):
+            details = f"{layer.input_size}→{layer.output_size}"
+            ax.text(x, y - 0.3, details, ha='center', va='center',
+                   fontsize=8, color='white')
+        
+        # Draw connections to next layer
+        if i < layer_count - 1:
+            next_x = (i + 1) * layer_spacing
+            connection = ConnectionPatch(
+                (x + 0.3, y), (next_x - 0.3, y),
+                "data", "data",
+                arrowstyle="->", shrinkA=5, shrinkB=5,
+                mutation_scale=20, fc="black", lw=2
+            )
+            ax.add_patch(connection)
+    
+    # Formatting
+    ax.set_xlim(-0.5, (layer_count - 1) * layer_spacing + 0.5)
+    ax.set_ylim(-1, 1)
+    ax.set_aspect('equal')
+    ax.axis('off')
+    
+    # Add title
+    plt.title(title, fontsize=16, fontweight='bold', pad=20)
+    
+    # Add legend
+    legend_elements = []
+    for layer_type, color in colors.items():
+        if layer_type != 'default':
+            legend_elements.append(patches.Patch(color=color, label=layer_type))
+    
+    ax.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(1, 1))
+    
+    plt.tight_layout()
+    plt.show()
+
+# %%
+#| export
+def visualize_data_flow(network: Sequential, input_data: Tensor, title: str = "Data Flow Through Network"):
+    """
+    Visualize how data flows through the network.
+    
+    Args:
+        network: Sequential network
+        input_data: Input tensor
+        title: Title for the plot
+    """
+    if not _should_show_plots():
+        print("📊 Plots disabled during testing - this is normal!")
+        return
+    
+    # Get intermediate outputs
+    intermediate_outputs = []
+    x = input_data
+    
+    for i, layer in enumerate(network.layers):
+        x = layer(x)
+        intermediate_outputs.append({
+            'layer': network.layers[i],
+            'output': x,
+            'layer_index': i
+        })
+    
+    # Create visualization
+    fig, axes = plt.subplots(2, len(network.layers), figsize=(4*len(network.layers), 8))
+    if len(network.layers) == 1:
+        axes = axes.reshape(1, -1)
+    
+    for i, (layer, output) in enumerate(zip(network.layers, intermediate_outputs)):
+        # Top row: Layer information
+        ax_top = axes[0, i] if len(network.layers) > 1 else axes[0]
+        
+        # Layer type and details
+        layer_type = type(layer).__name__
+        ax_top.text(0.5, 0.8, layer_type, ha='center', va='center',
+                   fontsize=12, fontweight='bold')
+        
+        if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'):
+            ax_top.text(0.5, 0.6, f"{layer.input_size} → {layer.output_size}", 
+                       ha='center', va='center', fontsize=10)
+        
+        # Output shape
+        ax_top.text(0.5, 0.4, f"Shape: {output['output'].shape}", 
+                   ha='center', va='center', fontsize=9)
+        
+        # Output statistics
+        output_data = output['output'].data
+        ax_top.text(0.5, 0.2, f"Mean: {np.mean(output_data):.3f}", 
+                   ha='center', va='center', fontsize=9)
+        ax_top.text(0.5, 0.1, f"Std: {np.std(output_data):.3f}", 
+                   ha='center', va='center', fontsize=9)
+        
+        ax_top.set_xlim(0, 1)
+        ax_top.set_ylim(0, 1)
+        ax_top.axis('off')
+        
+        # Bottom row: Output visualization
+        ax_bottom = axes[1, i] if len(network.layers) > 1 else axes[1]
+        
+        # Show output as heatmap or histogram
+        output_data = output['output'].data.flatten()
+        
+        if len(output_data) <= 20:  # Small output - show as bars
+            ax_bottom.bar(range(len(output_data)), output_data, alpha=0.7)
+            ax_bottom.set_title(f"Layer {i+1} Output")
+            ax_bottom.set_xlabel("Output Index")
+            ax_bottom.set_ylabel("Value")
+        else:  # Large output - show histogram
+            ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black')
+            ax_bottom.set_title(f"Layer {i+1} Output Distribution")
+            ax_bottom.set_xlabel("Value")
+            ax_bottom.set_ylabel("Frequency")
+        
+        ax_bottom.grid(True, alpha=0.3)
+    
+    plt.suptitle(title, fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    plt.show()
+
+# %%
+#| export
+def compare_networks(networks: List[Sequential], network_names: List[str], 
+                    input_data: Tensor, title: str = "Network Comparison"):
+    """
+    Compare different network architectures side-by-side.
+    
+    Args:
+        networks: List of networks to compare
+        network_names: Names for each network
+        input_data: Input tensor to test with
+        title: Title for the plot
+    """
+    if not _should_show_plots():
+        print("📊 Plots disabled during testing - this is normal!")
+        return
+    
+    fig, axes = plt.subplots(2, len(networks), figsize=(6*len(networks), 10))
+    if len(networks) == 1:
+        axes = axes.reshape(2, -1)
+    
+    for i, (network, name) in enumerate(zip(networks, network_names)):
+        # Get network output
+        output = network(input_data)
+        
+        # Top row: Architecture visualization
+        ax_top = axes[0, i] if len(networks) > 1 else axes[0]
+        
+        # Count layer types
+        layer_types = {}
+        for layer in network.layers:
+            layer_type = type(layer).__name__
+            layer_types[layer_type] = layer_types.get(layer_type, 0) + 1
+        
+        # Create pie chart of layer types
+        if layer_types:
+            labels = list(layer_types.keys())
+            sizes = list(layer_types.values())
+            colors = plt.cm.Set3(np.linspace(0, 1, len(labels)))
+            
+            ax_top.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors)
+            ax_top.set_title(f"{name}\nLayer Distribution")
+        
+        # Bottom row: Output comparison
+        ax_bottom = axes[1, i] if len(networks) > 1 else axes[1]
+        
+        output_data = output.data.flatten()
+        
+        # Show output statistics
+        ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black')
+        ax_bottom.axvline(np.mean(output_data), color='red', linestyle='--', 
+                         label=f'Mean: {np.mean(output_data):.3f}')
+        ax_bottom.axvline(np.median(output_data), color='green', linestyle='--',
+                         label=f'Median: {np.median(output_data):.3f}')
+        
+        ax_bottom.set_title(f"{name} Output Distribution")
+        ax_bottom.set_xlabel("Output Value")
+        ax_bottom.set_ylabel("Frequency")
+        ax_bottom.legend()
+        ax_bottom.grid(True, alpha=0.3)
+    
+    plt.suptitle(title, fontsize=16, fontweight='bold')
+    plt.tight_layout()
+    plt.show()
+
+# %% [markdown]
+"""
+## Step 3: Building Common Architectures
+
+Now let's build some common neural network architectures and visualize them!
+"""
+
+# %%
+#| export
+def create_mlp(input_size: int, hidden_sizes: List[int], output_size: int, 
+               activation=ReLU, output_activation=Sigmoid) -> Sequential:
+    """
+    Create a Multi-Layer Perceptron (MLP) network.
+    
+    Args:
+        input_size: Number of input features
+        hidden_sizes: List of hidden layer sizes
+        output_size: Number of output features
+        activation: Activation function for hidden layers
+        output_activation: Activation function for output layer
+        
+    Returns:
+        Sequential network
+    """
+    layers = []
+    
+    # Input layer
+    if hidden_sizes:
+        layers.append(Dense(input_size, hidden_sizes[0]))
+        layers.append(activation())
+        
+        # Hidden layers
+        for i in range(len(hidden_sizes) - 1):
+            layers.append(Dense(hidden_sizes[i], hidden_sizes[i + 1]))
+            layers.append(activation())
+        
+        # Output layer
+        layers.append(Dense(hidden_sizes[-1], output_size))
+    else:
+        # Direct input to output
+        layers.append(Dense(input_size, output_size))
+    
+    layers.append(output_activation())
+    
+    return Sequential(layers)
+
+# %%
+# Test MLP creation and visualization
+try:
+    print("=== Testing MLP Creation and Visualization ===")
+    
+    # Create different MLP architectures
+    mlp_small = create_mlp(input_size=3, hidden_sizes=[4], output_size=2)
+    mlp_medium = create_mlp(input_size=10, hidden_sizes=[16, 8], output_size=3)
+    mlp_large = create_mlp(input_size=784, hidden_sizes=[128, 64, 32], output_size=10)
+    
+    print("Created MLP architectures:")
+    print(f"  Small: 3 → 4 → 2")
+    print(f"  Medium: 10 → 16 → 8 → 3")
+    print(f"  Large: 784 → 128 → 64 → 32 → 10")
+    
+    # Test with sample data
+    x = Tensor(np.random.randn(5, 3).astype(np.float32))
+    
+    # Visualize architectures
+    visualize_network_architecture(mlp_small, "Small MLP Architecture")
+    visualize_network_architecture(mlp_medium, "Medium MLP Architecture")
+    visualize_network_architecture(mlp_large, "Large MLP Architecture")
+    
+    # Visualize data flow
+    visualize_data_flow(mlp_small, x, "Data Flow Through Small MLP")
+    
+    # Compare networks
+    networks = [mlp_small, mlp_medium]
+    names = ["Small MLP", "Medium MLP"]
+    compare_networks(networks, names, x, "MLP Architecture Comparison")
+    
+    print("✅ MLP creation and visualization working!")
+    
+except Exception as e:
+    print(f"❌ Error: {e}")
+    print("Make sure to implement the visualization functions!")
+
+# %% [markdown]
+"""
+## Step 4: Understanding Network Behavior
+
+Let's analyze how different network architectures behave with different types of input data.
+"""
+
+# %%
+#| export
+def analyze_network_behavior(network: Sequential, input_data: Tensor, 
+                           title: str = "Network Behavior Analysis"):
+    """
+    Analyze how a network behaves with different types of input.
+    
+    Args:
+        network: Network to analyze
+        input_data: Input tensor
+        title: Title for the plot
+    """
+    if not _should_show_plots():
+        print("📊 Plots disabled during testing - this is normal!")
+        return
+    
+    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
+    
+    # 1. Input vs Output relationship
+    ax1 = axes[0, 0]
+    input_flat = input_data.data.flatten()
+    output = network(input_data)
+    output_flat = output.data.flatten()
+    
+    ax1.scatter(input_flat, output_flat, alpha=0.6)
+    ax1.plot([input_flat.min(), input_flat.max()], 
+             [input_flat.min(), input_flat.max()], 'r--', alpha=0.5, label='y=x')
+    ax1.set_xlabel('Input Values')
+    ax1.set_ylabel('Output Values')
+    ax1.set_title('Input vs Output')
+    ax1.legend()
+    ax1.grid(True, alpha=0.3)
+    
+    # 2. Output distribution
+    ax2 = axes[0, 1]
+    ax2.hist(output_flat, bins=20, alpha=0.7, edgecolor='black')
+    ax2.axvline(np.mean(output_flat), color='red', linestyle='--', 
+                label=f'Mean: {np.mean(output_flat):.3f}')
+    ax2.set_xlabel('Output Values')
+    ax2.set_ylabel('Frequency')
+    ax2.set_title('Output Distribution')
+    ax2.legend()
+    ax2.grid(True, alpha=0.3)
+    
+    # 3. Layer-by-layer activation patterns
+    ax3 = axes[0, 2]
+    activations = []
+    x = input_data
+    
+    for layer in network.layers:
+        x = layer(x)
+        if hasattr(layer, 'input_size'):  # Dense layer
+            activations.append(np.mean(x.data))
+        else:  # Activation layer
+            activations.append(np.mean(x.data))
+    
+    ax3.plot(range(len(activations)), activations, 'bo-', linewidth=2, markersize=8)
+    ax3.set_xlabel('Layer Index')
+    ax3.set_ylabel('Mean Activation')
+    ax3.set_title('Layer-by-Layer Activations')
+    ax3.grid(True, alpha=0.3)
+    
+    # 4. Network depth analysis
+    ax4 = axes[1, 0]
+    layer_types = [type(layer).__name__ for layer in network.layers]
+    layer_counts = {}
+    for layer_type in layer_types:
+        layer_counts[layer_type] = layer_counts.get(layer_type, 0) + 1
+    
+    if layer_counts:
+        ax4.bar(layer_counts.keys(), layer_counts.values(), alpha=0.7)
+        ax4.set_xlabel('Layer Type')
+        ax4.set_ylabel('Count')
+        ax4.set_title('Layer Type Distribution')
+        ax4.grid(True, alpha=0.3)
+    
+    # 5. Shape transformation
+    ax5 = axes[1, 1]
+    shapes = [input_data.shape]
+    x = input_data
+    
+    for layer in network.layers:
+        x = layer(x)
+        shapes.append(x.shape)
+    
+    layer_indices = range(len(shapes))
+    shape_sizes = [np.prod(shape) for shape in shapes]
+    
+    ax5.plot(layer_indices, shape_sizes, 'go-', linewidth=2, markersize=8)
+    ax5.set_xlabel('Layer Index')
+    ax5.set_ylabel('Tensor Size')
+    ax5.set_title('Shape Transformation')
+    ax5.grid(True, alpha=0.3)
+    
+    # 6. Network summary
+    ax6 = axes[1, 2]
+    ax6.axis('off')
+    
+    summary_text = f"""
+Network Summary:
+• Total Layers: {len(network.layers)}
+• Input Shape: {input_data.shape}
+• Output Shape: {output.shape}
+• Parameters: {sum(np.prod(layer.weights.data.shape) if hasattr(layer, 'weights') else 0 for layer in network.layers)}
+• Architecture: {' → '.join([type(layer).__name__ for layer in network.layers])}
+    """
+    
+    ax6.text(0.05, 0.95, summary_text, transform=ax6.transAxes, 
+             fontsize=10, verticalalignment='top', fontfamily='monospace')
+    
+    plt.suptitle(title, fontsize=16, fontweight='bold')
+    plt.tight_layout()
+    plt.show()
+
+# %%
+# Test network behavior analysis
+try:
+    print("=== Testing Network Behavior Analysis ===")
+    
+    # Create a network for analysis
+    network = create_mlp(input_size=5, hidden_sizes=[8, 4], output_size=2)
+    
+    # Test with different types of input
+    x_normal = Tensor(np.random.randn(10, 5).astype(np.float32))
+    x_uniform = Tensor(np.random.uniform(-1, 1, (10, 5)).astype(np.float32))
+    x_zeros = Tensor(np.zeros((10, 5)).astype(np.float32))
+    
+    print("Analyzing network behavior with different inputs...")
+    
+    # Analyze behavior
+    analyze_network_behavior(network, x_normal, "Network Behavior: Normal Input")
+    analyze_network_behavior(network, x_uniform, "Network Behavior: Uniform Input")
+    analyze_network_behavior(network, x_zeros, "Network Behavior: Zero Input")
+    
+    print("✅ Network behavior analysis working!")
+    
+except Exception as e:
+    print(f"❌ Error: {e}")
+    print("Make sure to implement the behavior analysis function!")
+
+# %% [markdown]
+"""
+## Step 5: Practical Applications
+
+Let's see how our networks can be applied to real-world problems!
+"""
+
+# %%
+#| export
+def create_classification_network(input_size: int, num_classes: int, 
+                                hidden_sizes: List[int] = None) -> Sequential:
+    """
+    Create a network for classification problems.
+    
+    Args:
+        input_size: Number of input features
+        num_classes: Number of output classes
+        hidden_sizes: List of hidden layer sizes (default: [input_size//2])
+        
+    Returns:
+        Sequential network for classification
+    """
+    if hidden_sizes is None:
+        hidden_sizes = [input_size // 2]
+    
+    return create_mlp(
+        input_size=input_size,
+        hidden_sizes=hidden_sizes,
+        output_size=num_classes,
+        activation=ReLU,
+        output_activation=Sigmoid
+    )
+
+# %%
+#| export
+def create_regression_network(input_size: int, output_size: int = 1,
+                             hidden_sizes: List[int] = None) -> Sequential:
+    """
+    Create a network for regression problems.
+    
+    Args:
+        input_size: Number of input features
+        output_size: Number of output values (default: 1)
+        hidden_sizes: List of hidden layer sizes (default: [input_size//2])
+        
+    Returns:
+        Sequential network for regression
+    """
+    if hidden_sizes is None:
+        hidden_sizes = [input_size // 2]
+    
+    return create_mlp(
+        input_size=input_size,
+        hidden_sizes=hidden_sizes,
+        output_size=output_size,
+        activation=ReLU,
+        output_activation=Tanh  # No activation for regression
+    )
+
+# %%
+# Test practical applications
+try:
+    print("=== Testing Practical Applications ===")
+    
+    # Create networks for different tasks
+    digit_classifier = create_classification_network(
+        input_size=784,  # 28x28 image
+        num_classes=10,  # 10 digits
+        hidden_sizes=[128, 64]
+    )
+    
+    sentiment_analyzer = create_classification_network(
+        input_size=100,  # 100-dimensional word embeddings
+        num_classes=2,   # Positive/Negative
+        hidden_sizes=[32, 16]
+    )
+    
+    house_price_predictor = create_regression_network(
+        input_size=13,   # 13 house features
+        output_size=1,   # 1 price prediction
+        hidden_sizes=[8, 4]
+    )
+    
+    print("Created networks for different applications:")
+    print(f"  Digit Classifier: 784 → 128 → 64 → 10")
+    print(f"  Sentiment Analyzer: 100 → 32 → 16 → 2")
+    print(f"  House Price Predictor: 13 → 8 → 4 → 1")
+    
+    # Test with sample data
+    digit_input = Tensor(np.random.randn(1, 784).astype(np.float32))
+    sentiment_input = Tensor(np.random.randn(1, 100).astype(np.float32))
+    house_input = Tensor(np.random.randn(1, 13).astype(np.float32))
+    
+    # Get predictions
+    digit_pred = digit_classifier(digit_input)
+    sentiment_pred = sentiment_analyzer(sentiment_input)
+    house_pred = house_price_predictor(house_input)
+    
+    print(f"\nSample predictions:")
+    print(f"  Digit classifier output: {digit_pred.data[0]}")
+    print(f"  Sentiment analyzer output: {sentiment_pred.data[0]}")
+    print(f"  House price predictor output: {house_pred.data[0]}")
+    
+    # Visualize architectures
+    visualize_network_architecture(digit_classifier, "Digit Classification Network")
+    visualize_network_architecture(sentiment_analyzer, "Sentiment Analysis Network")
+    visualize_network_architecture(house_price_predictor, "House Price Prediction Network")
+    
+    print("✅ Practical applications working!")
+    
+except Exception as e:
+    print(f"❌ Error: {e}")
+    print("Make sure to implement the application functions!")
+
+# %% [markdown]
+"""
+## 🎓 Module Summary
+
+### What You Learned
+1. **Network Composition**: Building complete networks from layers
+2. **Architecture Design**: How to choose network structures
+3. **Visualization**: Understanding networks through visual analysis
+4. **Practical Applications**: Real-world network use cases
+
+### Key Architectural Insights
+- **Function Composition**: Networks as `f(x) = layer_n(...layer_1(x))`
+- **Modular Design**: Clean separation between layers and networks
+- **Visual Understanding**: How to analyze network behavior
+- **Application Patterns**: Classification vs regression architectures
+
+### Network Design Principles
+- **Depth vs Width**: Trade-offs in network architecture
+- **Activation Functions**: How they affect network behavior
+- **Shape Management**: Understanding tensor transformations
+- **Practical Considerations**: Choosing architectures for specific tasks
+
+### Next Steps
+- **Training**: Learn how networks learn from data (autograd, optimization)
+- **Advanced Architectures**: CNNs, RNNs, Transformers
+- **Real Data**: Working with actual datasets
+- **Production**: Deploying networks in real applications
+
+**Congratulations on mastering neural network architectures!** 🚀
+""" 
\ No newline at end of file
diff --git a/modules/networks/tests/test_networks.py b/modules/networks/tests/test_networks.py
new file mode 100644
index 00000000..6f2436bb
--- /dev/null
+++ b/modules/networks/tests/test_networks.py
@@ -0,0 +1,420 @@
+"""
+Tests for the Networks module.
+
+Tests network composition, visualization, and practical applications.
+"""
+
+import pytest
+import numpy as np
+import sys
+from pathlib import Path
+
+# Add the project root to the path
+project_root = Path(__file__).parent.parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+# Import the modules we're testing
+from tinytorch.core.tensor import Tensor
+from tinytorch.core.layers import Dense
+from tinytorch.core.activations import ReLU, Sigmoid, Tanh
+
+# Import the networks module
+try:
+    from modules.networks.networks_dev import (
+        Sequential, 
+        create_mlp, 
+        create_classification_network,
+        create_regression_network,
+        visualize_network_architecture,
+        visualize_data_flow,
+        compare_networks,
+        analyze_network_behavior
+    )
+except ImportError:
+    # Fallback for when module isn't exported yet
+    sys.path.append(str(project_root / "modules" / "networks"))
+    from networks_dev import (
+        Sequential, 
+        create_mlp, 
+        create_classification_network,
+        create_regression_network,
+        visualize_network_architecture,
+        visualize_data_flow,
+        compare_networks,
+        analyze_network_behavior
+    )
+
+
+class TestSequentialNetwork:
+    """Test the Sequential network class."""
+    
+    def test_sequential_initialization(self):
+        """Test Sequential network initialization."""
+        layers = [Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()]
+        network = Sequential(layers)
+        
+        assert len(network.layers) == 4
+        assert isinstance(network.layers[0], Dense)
+        assert isinstance(network.layers[1], ReLU)
+        assert isinstance(network.layers[2], Dense)
+        assert isinstance(network.layers[3], Sigmoid)
+    
+    def test_sequential_forward_pass(self):
+        """Test Sequential network forward pass."""
+        network = Sequential([
+            Dense(3, 4),
+            ReLU(),
+            Dense(4, 2),
+            Sigmoid()
+        ])
+        
+        x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        output = network(x)
+        
+        assert output.shape == (2, 2)
+        assert isinstance(output, Tensor)
+        # Sigmoid output should be between 0 and 1
+        assert np.all(output.data >= 0) and np.all(output.data <= 1)
+    
+    def test_sequential_callable(self):
+        """Test that Sequential network is callable."""
+        network = Sequential([Dense(2, 3), ReLU()])
+        x = Tensor([[1.0, 2.0]])
+        
+        # Test both forward() and __call__()
+        output1 = network.forward(x)
+        output2 = network(x)
+        
+        assert np.allclose(output1.data, output2.data)
+    
+    def test_empty_sequential(self):
+        """Test Sequential network with no layers."""
+        network = Sequential([])
+        x = Tensor([[1.0, 2.0, 3.0]])
+        
+        # Should return input unchanged
+        output = network(x)
+        assert np.allclose(output.data, x.data)
+
+
+class TestMLPCreation:
+    """Test MLP creation functions."""
+    
+    def test_create_mlp_basic(self):
+        """Test basic MLP creation."""
+        mlp = create_mlp(input_size=3, hidden_sizes=[4], output_size=2)
+        
+        assert len(mlp.layers) == 4  # Dense + ReLU + Dense + Sigmoid
+        assert isinstance(mlp.layers[0], Dense)
+        assert mlp.layers[0].input_size == 3
+        assert mlp.layers[0].output_size == 4
+        assert isinstance(mlp.layers[1], ReLU)
+        assert isinstance(mlp.layers[2], Dense)
+        assert mlp.layers[2].input_size == 4
+        assert mlp.layers[2].output_size == 2
+        assert isinstance(mlp.layers[3], Sigmoid)
+    
+    def test_create_mlp_multiple_hidden(self):
+        """Test MLP creation with multiple hidden layers."""
+        mlp = create_mlp(input_size=10, hidden_sizes=[16, 8, 4], output_size=3)
+        
+        assert len(mlp.layers) == 8  # 3 Dense + 3 ReLU + 1 Dense + 1 Sigmoid
+        
+        # Check Dense layers
+        dense_layers = [layer for layer in mlp.layers if isinstance(layer, Dense)]
+        assert len(dense_layers) == 4
+        
+        assert dense_layers[0].input_size == 10
+        assert dense_layers[0].output_size == 16
+        assert dense_layers[1].input_size == 16
+        assert dense_layers[1].output_size == 8
+        assert dense_layers[2].input_size == 8
+        assert dense_layers[2].output_size == 4
+        assert dense_layers[3].input_size == 4
+        assert dense_layers[3].output_size == 3
+    
+    def test_create_mlp_no_hidden(self):
+        """Test MLP creation with no hidden layers."""
+        mlp = create_mlp(input_size=5, hidden_sizes=[], output_size=2)
+        
+        assert len(mlp.layers) == 2  # Dense + Sigmoid
+        assert isinstance(mlp.layers[0], Dense)
+        assert mlp.layers[0].input_size == 5
+        assert mlp.layers[0].output_size == 2
+        assert isinstance(mlp.layers[1], Sigmoid)
+    
+    def test_create_mlp_custom_activation(self):
+        """Test MLP creation with custom activation functions."""
+        mlp = create_mlp(
+            input_size=3, 
+            hidden_sizes=[4], 
+            output_size=2,
+            activation=Tanh,
+            output_activation=Tanh
+        )
+        
+        assert len(mlp.layers) == 4
+        assert isinstance(mlp.layers[1], Tanh)  # Hidden activation
+        assert isinstance(mlp.layers[3], Tanh)  # Output activation
+
+
+class TestSpecializedNetworks:
+    """Test specialized network creation functions."""
+    
+    def test_create_classification_network(self):
+        """Test classification network creation."""
+        classifier = create_classification_network(
+            input_size=100, 
+            num_classes=5,
+            hidden_sizes=[32, 16]
+        )
+        
+        assert len(classifier.layers) == 7  # 2 Dense + 2 ReLU + 1 Dense + 1 Sigmoid
+        
+        # Check output layer
+        dense_layers = [layer for layer in classifier.layers if isinstance(layer, Dense)]
+        assert dense_layers[-1].output_size == 5
+        assert isinstance(classifier.layers[-1], Sigmoid)
+    
+    def test_create_classification_network_default(self):
+        """Test classification network with default hidden sizes."""
+        classifier = create_classification_network(input_size=50, num_classes=3)
+        
+        # Should use default hidden size of input_size // 2
+        expected_hidden = 50 // 2
+        dense_layers = [layer for layer in classifier.layers if isinstance(layer, Dense)]
+        assert dense_layers[0].output_size == expected_hidden
+        assert dense_layers[1].output_size == 3
+    
+    def test_create_regression_network(self):
+        """Test regression network creation."""
+        regressor = create_regression_network(
+            input_size=13, 
+            output_size=1,
+            hidden_sizes=[8, 4]
+        )
+        
+        assert len(regressor.layers) == 7  # 2 Dense + 2 ReLU + 1 Dense + 1 Tanh
+        
+        # Check output layer
+        dense_layers = [layer for layer in regressor.layers if isinstance(layer, Dense)]
+        assert dense_layers[-1].output_size == 1
+        assert isinstance(regressor.layers[-1], Tanh)
+    
+    def test_create_regression_network_default(self):
+        """Test regression network with default parameters."""
+        regressor = create_regression_network(input_size=20)
+        
+        # Should use default output_size=1 and hidden_size=input_size//2
+        expected_hidden = 20 // 2
+        dense_layers = [layer for layer in regressor.layers if isinstance(layer, Dense)]
+        assert dense_layers[0].output_size == expected_hidden
+        assert dense_layers[1].output_size == 1
+
+
+class TestNetworkBehavior:
+    """Test network behavior and functionality."""
+    
+    def test_network_shape_transformations(self):
+        """Test that networks properly transform tensor shapes."""
+        network = Sequential([
+            Dense(3, 4),
+            ReLU(),
+            Dense(4, 2),
+            Sigmoid()
+        ])
+        
+        x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        output = network(x)
+        
+        assert x.shape == (2, 3)
+        assert output.shape == (2, 2)
+    
+    def test_network_activations(self):
+        """Test that activation functions are properly applied."""
+        network = Sequential([
+            Dense(2, 3),
+            ReLU(),
+            Dense(3, 1),
+            Sigmoid()
+        ])
+        
+        x = Tensor([[-1.0, 1.0]])
+        output = network(x)
+        
+        # ReLU should zero out negative values
+        # Sigmoid should output values between 0 and 1
+        assert np.all(output.data >= 0) and np.all(output.data <= 1)
+    
+    def test_network_parameter_count(self):
+        """Test that networks have the expected number of parameters."""
+        network = Sequential([
+            Dense(3, 4),  # 3*4 + 4 = 16 parameters
+            ReLU(),
+            Dense(4, 2),  # 4*2 + 2 = 10 parameters
+            Sigmoid()
+        ])
+        
+        # Count parameters (weights + biases)
+        total_params = 0
+        for layer in network.layers:
+            if hasattr(layer, 'weights'):
+                total_params += layer.weights.data.size
+                if hasattr(layer, 'bias') and layer.bias is not None:
+                    total_params += layer.bias.data.size
+        
+        assert total_params == 26  # 16 + 10
+
+
+class TestVisualizationFunctions:
+    """Test visualization functions (basic functionality, not visual output)."""
+    
+    def test_visualize_network_architecture_exists(self):
+        """Test that visualization function exists and is callable."""
+        network = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()])
+        
+        # Should not raise an error
+        try:
+            visualize_network_architecture(network, "Test Network")
+        except Exception as e:
+            pytest.fail(f"visualize_network_architecture raised {e}")
+    
+    def test_visualize_data_flow_exists(self):
+        """Test that data flow visualization function exists and is callable."""
+        network = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()])
+        x = Tensor([[1.0, 2.0, 3.0]])
+        
+        # Should not raise an error
+        try:
+            visualize_data_flow(network, x, "Test Data Flow")
+        except Exception as e:
+            pytest.fail(f"visualize_data_flow raised {e}")
+    
+    def test_compare_networks_exists(self):
+        """Test that network comparison function exists and is callable."""
+        network1 = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()])
+        network2 = Sequential([Dense(3, 8), ReLU(), Dense(8, 2), Sigmoid()])
+        x = Tensor([[1.0, 2.0, 3.0]])
+        
+        # Should not raise an error
+        try:
+            compare_networks([network1, network2], ["Small", "Large"], x, "Test Comparison")
+        except Exception as e:
+            pytest.fail(f"compare_networks raised {e}")
+    
+    def test_analyze_network_behavior_exists(self):
+        """Test that behavior analysis function exists and is callable."""
+        network = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()])
+        x = Tensor([[1.0, 2.0, 3.0]])
+        
+        # Should not raise an error
+        try:
+            analyze_network_behavior(network, x, "Test Behavior")
+        except Exception as e:
+            pytest.fail(f"analyze_network_behavior raised {e}")
+
+
+class TestPracticalApplications:
+    """Test practical network applications."""
+    
+    def test_digit_classification_network(self):
+        """Test creating a network for digit classification."""
+        classifier = create_classification_network(
+            input_size=784,  # 28x28 image
+            num_classes=10,   # 10 digits
+            hidden_sizes=[128, 64]
+        )
+        
+        # Test with fake image data
+        fake_image = Tensor(np.random.randn(1, 784).astype(np.float32))
+        output = classifier(fake_image)
+        
+        assert output.shape == (1, 10)
+        assert np.all(output.data >= 0) and np.all(output.data <= 1)
+        # Should sum to approximately 1 (probability distribution)
+        assert np.abs(np.sum(output.data) - 1.0) < 0.1
+    
+    def test_sentiment_analysis_network(self):
+        """Test creating a network for sentiment analysis."""
+        classifier = create_classification_network(
+            input_size=100,  # 100-dimensional embeddings
+            num_classes=2,    # Positive/Negative
+            hidden_sizes=[32, 16]
+        )
+        
+        # Test with fake text embeddings
+        fake_embeddings = Tensor(np.random.randn(1, 100).astype(np.float32))
+        output = classifier(fake_embeddings)
+        
+        assert output.shape == (1, 2)
+        assert np.all(output.data >= 0) and np.all(output.data <= 1)
+    
+    def test_house_price_prediction_network(self):
+        """Test creating a network for house price prediction."""
+        regressor = create_regression_network(
+            input_size=13,   # 13 house features
+            output_size=1,   # 1 price prediction
+            hidden_sizes=[8, 4]
+        )
+        
+        # Test with fake house features
+        fake_features = Tensor(np.random.randn(1, 13).astype(np.float32))
+        output = regressor(fake_features)
+        
+        assert output.shape == (1, 1)
+        # Tanh output should be between -1 and 1
+        assert np.all(output.data >= -1) and np.all(output.data <= 1)
+
+
+class TestNetworkIntegration:
+    """Test integration with other modules."""
+    
+    def test_network_with_tensor_operations(self):
+        """Test that networks work with tensor operations."""
+        network = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()])
+        
+        # Create input using tensor operations
+        x1 = Tensor([[1.0, 2.0, 3.0]])
+        x2 = Tensor([[4.0, 5.0, 6.0]])
+        x_combined = Tensor(np.vstack([x1.data, x2.data]))
+        
+        output = network(x_combined)
+        assert output.shape == (2, 2)
+    
+    def test_network_with_activations_module(self):
+        """Test that networks properly use activations from the activations module."""
+        # This test ensures we're using the activations from the activations module
+        # rather than re-implementing them
+        network = Sequential([
+            Dense(2, 3),
+            ReLU(),  # From activations module
+            Dense(3, 1),
+            Sigmoid()  # From activations module
+        ])
+        
+        x = Tensor([[-1.0, 1.0]])
+        output = network(x)
+        
+        # Test that activations work correctly
+        assert np.all(output.data >= 0) and np.all(output.data <= 1)
+    
+    def test_network_with_layers_module(self):
+        """Test that networks properly use layers from the layers module."""
+        # This test ensures we're using the Dense layers from the layers module
+        network = Sequential([
+            Dense(3, 4),  # From layers module
+            ReLU(),
+            Dense(4, 2),  # From layers module
+            Sigmoid()
+        ])
+        
+        x = Tensor([[1.0, 2.0, 3.0]])
+        output = network(x)
+        
+        # Test that layers work correctly
+        assert output.shape == (1, 2)
+
+
+if __name__ == "__main__":
+    # Run the tests
+    pytest.main([__file__, "-v"]) 
\ No newline at end of file
diff --git a/tito/commands/info.py b/tito/commands/info.py
index b7462a19..21f25ee7 100644
--- a/tito/commands/info.py
+++ b/tito/commands/info.py
@@ -74,6 +74,8 @@ class InfoCommand(BaseCommand):
         modules = [
             ("Setup", "hello_tinytorch function", self.check_setup_status),
             ("Tensor", "basic tensor operations", self.check_tensor_status),
+            ("Layers", "neural network building blocks", self.check_layers_status),
+            ("Networks", "neural network architectures", self.check_networks_status),
             ("MLP", "multi-layer perceptron (manual)", self.check_mlp_status),
             ("CNN", "convolutional networks (basic)", self.check_cnn_status),
             ("Data", "data loading pipeline", self.check_data_status),
@@ -152,6 +154,32 @@ class InfoCommand(BaseCommand):
             return "✅ Implemented"
         except (ImportError, NotImplementedError):
             return "⏳ Not Started"
+    
+    def check_layers_status(self):
+        try:
+            from tinytorch.core.layers import Dense
+            from tinytorch.core.activations import ReLU
+            from tinytorch.core.tensor import Tensor
+            layer = Dense(3, 4)
+            activation = ReLU()
+            x = Tensor([[1, 2, 3]])
+            _ = activation(layer(x))
+            return "✅ Implemented"
+        except (ImportError, NotImplementedError):
+            return "⏳ Not Started"
+    
+    def check_networks_status(self):
+        try:
+            from tinytorch.core.networks import Sequential
+            from tinytorch.core.layers import Dense
+            from tinytorch.core.activations import ReLU, Sigmoid
+            from tinytorch.core.tensor import Tensor
+            network = Sequential([Dense(3, 4), ReLU(), Dense(4, 2), Sigmoid()])
+            x = Tensor([[1, 2, 3]])
+            _ = network(x)
+            return "✅ Implemented"
+        except (ImportError, NotImplementedError):
+            return "⏳ Not Started"
     def check_mlp_status(self):
         try:
             from tinytorch.core.modules import MLP
diff --git a/tito/commands/test.py b/tito/commands/test.py
index 74904a81..e33e803c 100644
--- a/tito/commands/test.py
+++ b/tito/commands/test.py
@@ -32,7 +32,7 @@ class TestCommand(BaseCommand):
 
     def run(self, args: Namespace) -> int:
         console = self.console
-        valid_modules = ["setup", "tensor", "activations", "layers", "cnn", "data", "training", 
+        valid_modules = ["setup", "tensor", "activations", "layers", "networks", "cnn", "data", "training", 
                          "profiling", "compression", "kernels", "benchmarking", "mlops"]
         
         if args.all: