From 3d81f76897b28e74ce54eeeedbe0be19c623e89a Mon Sep 17 00:00:00 2001
From: Vijay Janapa Reddi <vj@eecs.harvard.edu>
Date: Sat, 12 Jul 2025 12:36:31 -0400
Subject: [PATCH] Clean up stale documentation - remove outdated workflow
 patterns

- Remove 5 outdated development guides that contradicted clean NBGrader/nbdev architecture
- Update all documentation to reflect assignments/ directory structure
- Remove references to deprecated #| hide approach and old command patterns
- Ensure clean separation: NBGrader for assignments, nbdev for package export
- Update README, Student Guide, and Instructor Guide with current workflows
---
 README.md                                     |  231 +--
 assignments/source/00_setup/00_setup.ipynb    |  252 +++
 assignments/source/00_setup/setup_dev.ipynb   |  434 +----
 assignments/source/00_setup/setup_dev.py      |  288 +--
 assignments/source/01_tensor/01_tensor.ipynb  | 1047 ----------
 assignments/source/01_tensor/tensor_dev.ipynb | 1047 ----------
 assignments/source/02_activations/README.md   |    2 +-
 .../02_activations/activations_dev.ipynb      | 1143 -----------
 assignments/source/03_layers/layers_dev.ipynb |  797 --------
 .../source/04_networks/networks_dev.ipynb     | 1437 --------------
 assignments/source/05_cnn/cnn_dev.ipynb       |  816 --------
 .../source/06_dataloader/dataloader_dev.ipynb | 1699 -----------------
 docs/INSTRUCTOR_GUIDE.md                      |   16 +-
 docs/README.md                                |    4 +-
 docs/STUDENT_GUIDE.md                         |   10 +-
 docs/development/README.md                    |   81 +-
 docs/development/module-creation-checklist.md |  165 --
 docs/development/module-development-guide.md  |  318 ---
 docs/development/module-template.md           |   55 -
 docs/development/nbgrader-integration.md      |  386 ----
 docs/development/quick-module-reference.md    |  239 ---
 docs/pedagogy/testing-architecture.md         |    2 +-
 tinytorch/_modidx.py                          |    4 +
 tinytorch/core/utils.py                       |  158 +-
 24 files changed, 653 insertions(+), 9978 deletions(-)
 create mode 100644 assignments/source/00_setup/00_setup.ipynb
 delete mode 100644 assignments/source/01_tensor/01_tensor.ipynb
 delete mode 100644 assignments/source/01_tensor/tensor_dev.ipynb
 delete mode 100644 assignments/source/02_activations/activations_dev.ipynb
 delete mode 100644 assignments/source/03_layers/layers_dev.ipynb
 delete mode 100644 assignments/source/04_networks/networks_dev.ipynb
 delete mode 100644 assignments/source/05_cnn/cnn_dev.ipynb
 delete mode 100644 assignments/source/06_dataloader/dataloader_dev.ipynb
 delete mode 100644 docs/development/module-creation-checklist.md
 delete mode 100644 docs/development/module-development-guide.md
 delete mode 100644 docs/development/module-template.md
 delete mode 100644 docs/development/nbgrader-integration.md
 delete mode 100644 docs/development/quick-module-reference.md

diff --git a/README.md b/README.md
index 98342b29..391f52d1 100644
--- a/README.md
+++ b/README.md
@@ -1,71 +1,41 @@
-# Tiny🔥Torch: Build ML Systems from Scratch
+# 🔥 TinyTorch: Build ML Systems from Scratch
 
-> A hands-on ML Systems course where students implement every component from scratch
+**A complete Machine Learning Systems course where students build their own ML framework.**
 
-[![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
-[![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
-[![nbdev](https://img.shields.io/badge/built%20with-nbdev-orange.svg)](https://nbdev.fast.ai/)
+## 🎯 What You'll Build
 
-> **Disclaimer**: TinyTorch is an educational framework developed independently and is not affiliated with or endorsed by Meta or the PyTorch project.
+- **Complete ML Framework**: Build your own PyTorch-style framework from scratch
+- **Real Applications**: Use your framework to classify CIFAR-10 images  
+- **Production Skills**: Learn ML systems engineering, not just algorithms
+- **Immediate Feedback**: See your code working at every step
 
-**Tiny🔥Torch** is a complete ML Systems course where students build their own machine learning framework from scratch. Rather than just learning *about* ML systems, students implement every component and then use their own implementation to solve real problems.
+## 🚀 Quick Start (2 minutes)
 
-## 🚀 **Quick Start - Choose Your Path**
-
-### **👨‍🏫 For Instructors**
-**[📖 Instructor Guide](docs/INSTRUCTOR_GUIDE.md)** - Complete teaching guide with verified modules, class structure, and commands
-- 6+ weeks of proven curriculum content
-- Verified module status and teaching sequence
-- Class session structure and troubleshooting guide
-
-### **👨‍🎓 For Students**
-**[🔥 Student Guide](docs/STUDENT_GUIDE.md)** - Complete learning path with clear workflow
-- Step-by-step progress tracker
-- 5-step daily workflow for each module
-- Getting help and study tips
-
-### **🛠️ For Developers**
-**[📚 Documentation](docs/)** - Complete documentation including pedagogy and development guides
-
-### **🎯 Python-First Development + NBGrader**
-**Philosophy**: Raw Python files → Jupyter notebooks on demand → NBGrader compliance
-- **Core Development**: Work in `modules/XX/XX_dev.py` (Python files)
-- **Package Building**: `nbdev` exports to `tinytorch` package  
-- **Assignment Generation**: `jupytext` + `NBGrader` create student versions
-- **Auto-Grading**: `pytest` integration for automated testing
-
-**Instructor Workflow**: 
+### **Students**
 ```bash
-code modules/XX/XX_dev.py        # Edit Python source
-tito module export XX            # Build package (nbdev)
-tito nbgrader generate XX        # Create assignment (Python→Jupyter→NBGrader)
-tito nbgrader release XX         # Deploy to students
+git clone https://github.com/your-org/tinytorch.git
+cd TinyTorch
+make install                                    # Install dependencies
+tito system doctor                              # Verify setup
+cd assignments/source/00_setup                  # Start with setup
+jupyter lab setup_dev.py                       # Open first assignment
 ```
 
-## 🎯 **What Students Build**
+### **Instructors**
+```bash
+# System check
+tito system info                                # Check course status
+tito system doctor                              # Verify environment  
 
-By completing TinyTorch, students implement a complete ML framework:
+# Assignment management
+tito nbgrader generate 00_setup                 # Create student assignments
+tito nbgrader release 00_setup                  # Release to students
+tito nbgrader autograde 00_setup                # Auto-grade submissions
+```
 
-- ✅ **Activation functions** (ReLU, Sigmoid, Tanh)
-- ✅ **Neural network layers** (Dense, Conv2D)
-- ✅ **Network architectures** (Sequential, MLP)
-- ✅ **Data loading** (CIFAR-10 pipeline)
-- ✅ **Development workflow** (export, test, use)
-- 🚧 **Tensor operations** (arithmetic, broadcasting)
-- 🚧 **Automatic differentiation** (backpropagation)
-- 🚧 **Training systems** (optimizers, loss functions)
+## 📚 Course Structure
 
-## 🎓 **Learning Philosophy: Build → Use → Understand → Repeat**
-
-Students experience the complete cycle:
-1. **Build**: Implement `ReLU()` function from scratch
-2. **Use**: Import `from tinytorch.core.activations import ReLU` with their own code
-3. **Understand**: See how it works in real neural networks
-4. **Repeat**: Each module builds on previous implementations
-
-## 📊 **Current Status** (Ready for Classroom Use)
-
-### **✅ Fully Working Modules** (6+ weeks of content)
+### **Core Assignments** (6+ weeks of proven content)
 - **00_setup** (20/20 tests) - Development workflow & CLI tools
 - **02_activations** (24/24 tests) - ReLU, Sigmoid, Tanh functions
 - **03_layers** (17/22 tests) - Dense layers & neural building blocks
@@ -73,103 +43,108 @@ Students experience the complete cycle:
 - **06_dataloader** (15/15 tests) - CIFAR-10 data loading
 - **05_cnn** (2/2 tests) - Convolution operations
 
-### **🚧 In Development**
+### **Advanced Features** (in development)
 - **01_tensor** (22/33 tests) - Tensor arithmetic
-- **07-13** - Advanced features (autograd, training, MLOps)
+- **07-13** - Autograd, optimizers, training, MLOps
 
-## 🚀 **Quick Commands**
+## 🛠️ Development Workflow
 
-### **System Status**
+### **NBGrader** (Assignment Creation & Testing)
 ```bash
-tito system info              # Check system and module status
-tito system doctor            # Verify environment setup
-tito module status            # View all module progress
+tito nbgrader generate 00_setup     # Create student assignments
+tito nbgrader release 00_setup      # Release to students
+tito nbgrader collect 00_setup      # Collect submissions
+tito nbgrader autograde 00_setup    # Auto-grade with pytest
 ```
 
-### **Student Workflow**
+### **nbdev** (Package Export & Building)
 ```bash
-cd modules/00_setup           # Navigate to first module
-jupyter lab setup_dev.py     # Open development notebook
-python -m pytest tests/ -v   # Run tests
-python bin/tito module export 00_setup  # Export to package
+tito module export 00_setup         # Export to tinytorch package
+tito module test 00_setup           # Test package integration
 ```
 
-### **Verify Implementation**
-```bash
-# Use student's own implementations
-python -c "from tinytorch.core.utils import hello_tinytorch; hello_tinytorch()"
-python -c "from tinytorch.core.activations import ReLU; print(ReLU()([-1, 0, 1]))"
+## 📈 Student Success Path
+
+### **Build → Use → Understand → Repeat**
+1. **Build**: Implement `ReLU()` function from scratch
+2. **Use**: `from tinytorch.core.activations import ReLU` - your own code!
+3. **Understand**: See how it works in real neural networks
+4. **Repeat**: Each assignment builds on previous work
+
+### **Example: First Assignment**
+```python
+# You implement this:
+def hello_tinytorch():
+    print("Welcome to TinyTorch!")
+
+# Then immediately use it:
+from tinytorch.core.utils import hello_tinytorch
+hello_tinytorch()  # Your code working!
 ```
 
-## 🌟 **Why Build from Scratch?**
+## 🎓 Educational Philosophy
 
-**Even in the age of AI-generated code, building systems from scratch remains educationally essential:**
+### **Real Data, Real Systems**
+- Work with CIFAR-10 (not toy datasets)
+- Production-style code organization
+- Performance and engineering considerations
+- Immediate visual feedback
 
-- **Understanding vs. Using**: AI shows *what* works, TinyTorch teaches *why* it works
-- **Systems Literacy**: Debugging real ML requires understanding abstractions like autograd and data loaders
-- **AI-Augmented Engineers**: The best engineers collaborate with AI tools, not rely on them blindly
-- **Intentional Design**: Systems thinking about memory, performance, and architecture can't be outsourced
+### **Build Everything from Scratch**
+- No black boxes or "magic" functions
+- Understanding through implementation
+- Connect every concept to production systems
+- See your code working immediately
 
-## 🏗️ **Repository Structure**
+## 📁 Repository Structure
 
 ```
 TinyTorch/
-├── README.md                 # This file - main entry point
-├── docs/
-│   ├── INSTRUCTOR_GUIDE.md   # Complete teaching guide
-│   ├── STUDENT_GUIDE.md      # Complete learning path
-│   └── [detailed docs]       # Pedagogy and development guides
-├── modules/
-│   ├── 00_setup/            # Development workflow
-│   ├── 01_tensor/           # Tensor operations
-│   ├── 02_activations/      # Activation functions
-│   ├── 03_layers/           # Neural network layers
-│   ├── 04_networks/         # Network architectures
-│   ├── 05_cnn/              # Convolution operations
-│   ├── 06_dataloader/       # Data loading pipeline
-│   └── 07-13/               # Advanced features
-├── tinytorch/               # The actual Python package
-├── bin/                     # CLI tools (tito)
-└── tests/                   # Integration tests
+├── assignments/source/XX/          # Assignment source files
+│   ├── XX_dev.py                   # Development assignment
+│   └── tests/                      # Assignment tests
+├── tinytorch/                      # Your built framework
+│   └── core/                       # Exported student code
+├── tito/                           # CLI tools
+└── docs/                           # Documentation
 ```
 
-## 📚 **Educational Approach**
+## 🔧 Technical Requirements
 
-### **Real Data, Real Systems**
-- Work with CIFAR-10 (10,000 real images)
-- Production-style code organization
-- Performance and engineering considerations
+- **Python 3.8+**
+- **Jupyter Lab** for development
+- **PyTorch** for comparison and final projects
+- **NBGrader** for assignment management
+- **nbdev** for package building
 
-### **Immediate Feedback**
-- Tests provide instant verification
-- Students see their code working quickly
-- Progress is visible and measurable
+## 🎯 Getting Started
 
-### **Progressive Complexity**
-- Start simple (activation functions)
-- Build complexity gradually (layers → networks → training)
-- Connect to real ML engineering practices
+### **Students**
+1. **System Check**: `tito system doctor`
+2. **First Assignment**: `cd assignments/source/00_setup && jupyter lab setup_dev.py`
+3. **Build & Test**: Follow the notebook, export when complete
+4. **Use Your Code**: `from tinytorch.core.utils import hello_tinytorch`
 
-## 🤝 **Contributing**
+### **Instructors** 
+1. **Course Status**: `tito system info`
+2. **Assignment Management**: `tito nbgrader generate 00_setup`
+3. **Student Release**: `tito nbgrader release 00_setup`
+4. **Auto-grading**: `tito nbgrader autograde 00_setup`
 
-We welcome contributions! See our [development documentation](docs/development/) for guidelines on creating new modules or improving existing ones.
+## 📊 Success Metrics
 
-## 📄 **License**
+**Students can currently:**
+- Build and test multi-layer perceptrons
+- Implement custom activation functions  
+- Load and process CIFAR-10 data
+- Create basic convolution operations
+- Export their code to a working package
 
-Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
+**Verified workflows:**
+- ✅ **Student Journey**: receive assignment → implement → export → use
+- ✅ **Instructor Journey**: create → release → collect → grade
+- ✅ **Package Integration**: All core imports work correctly
 
 ---
 
-## 🎉 **Ready to Start?**
-
-### **Instructors**
-1. Read the [📖 Instructor Guide](docs/INSTRUCTOR_GUIDE.md)
-2. Test your setup: `tito system doctor`
-3. Start with: `cd modules/00_setup && jupyter lab setup_dev.py`
-
-### **Students**
-1. Read the [🔥 Student Guide](docs/STUDENT_GUIDE.md)
-2. Begin with: `cd modules/00_setup && jupyter lab setup_dev.py`
-3. Follow the 5-step workflow for each module
-
-**🚀 TinyTorch is ready for classroom use with 6+ weeks of proven curriculum content!**
+**🎉 TinyTorch is ready for classroom use with 6+ weeks of proven curriculum content!**
diff --git a/assignments/source/00_setup/00_setup.ipynb b/assignments/source/00_setup/00_setup.ipynb
new file mode 100644
index 00000000..f2d4e84e
--- /dev/null
+++ b/assignments/source/00_setup/00_setup.ipynb
@@ -0,0 +1,252 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "fc1bae4e",
+      "metadata": {
+        "cell_marker": "\"\"\""
+      },
+      "source": [
+        "# Assignment 0: Setup - TinyTorch Development Environment (INSTRUCTOR VERSION)\n",
+        "\n",
+        "This is the instructor solution version showing how solutions are filled in."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "876aef2e",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| default_exp core.utils"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "e9d346d7",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "# Required imports for TinyTorch utilities\n",
+        "import sys\n",
+        "import platform\n",
+        "from datetime import datetime\n",
+        "import os\n",
+        "from pathlib import Path"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "687b365f",
+      "metadata": {
+        "cell_marker": "\"\"\"",
+        "lines_to_next_cell": 1
+      },
+      "source": [
+        "## Problem 1: Hello Function (10 points)\n",
+        "\n",
+        "Write a function that displays a welcome message for TinyTorch."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "c8f5870e",
+      "metadata": {
+        "lines_to_next_cell": 1,
+        "nbgrader": {
+          "grade": false,
+          "grade_id": "hello_function",
+          "locked": false,
+          "schema_version": 3,
+          "solution": true,
+          "task": false
+        }
+      },
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "def hello_tinytorch():\n",
+        "    \"\"\"\n",
+        "    Display a welcome message for TinyTorch.\n",
+        "    \n",
+        "    This function should:\n",
+        "    1. Try to load ASCII art from 'tinytorch_flame.txt' if it exists\n",
+        "    2. If the file doesn't exist, display a simple text banner\n",
+        "    3. Print \"TinyTorch\" and \"Build ML Systems from Scratch!\"\n",
+        "    4. Handle any exceptions gracefully\n",
+        "    \"\"\"\n",
+        "    ### BEGIN SOLUTION\n",
+        "    # YOUR CODE HERE\n",
+        "    raise NotImplementedError()\n",
+        "    ### END SOLUTION"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "3a6d8a5a",
+      "metadata": {
+        "lines_to_next_cell": 1,
+        "nbgrader": {
+          "grade": false,
+          "grade_id": "add_function",
+          "locked": false,
+          "schema_version": 3,
+          "solution": true,
+          "task": false
+        }
+      },
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "def add_numbers(a, b):\n",
+        "    \"\"\"\n",
+        "    Add two numbers together.\n",
+        "    \n",
+        "    Args:\n",
+        "        a: First number (int or float)\n",
+        "        b: Second number (int or float)\n",
+        "        \n",
+        "    Returns:\n",
+        "        Sum of a and b\n",
+        "    \"\"\"\n",
+        "    ### BEGIN SOLUTION\n",
+        "    # YOUR CODE HERE\n",
+        "    raise NotImplementedError()\n",
+        "    ### END SOLUTION"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "693d10ea",
+      "metadata": {
+        "lines_to_next_cell": 1,
+        "nbgrader": {
+          "grade": false,
+          "grade_id": "systeminfo_class",
+          "locked": false,
+          "schema_version": 3,
+          "solution": true,
+          "task": false
+        }
+      },
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "class SystemInfo:\n",
+        "    \"\"\"\n",
+        "    A class for collecting and displaying system information.\n",
+        "    \"\"\"\n",
+        "    \n",
+        "    def __init__(self):\n",
+        "        \"\"\"\n",
+        "        Initialize the SystemInfo object.\n",
+        "        Collect Python version, platform, and machine information.\n",
+        "        \"\"\"\n",
+        "        ### BEGIN SOLUTION\n",
+        "    # YOUR CODE HERE\n",
+        "    raise NotImplementedError()\n",
+        "        ### END SOLUTION\n",
+        "    \n",
+        "    def __str__(self):\n",
+        "        \"\"\"\n",
+        "        Return a formatted string representation of system information.\n",
+        "        Format: \"Python X.Y.Z on Platform (Architecture)\"\n",
+        "        \"\"\"\n",
+        "        ### BEGIN SOLUTION\n",
+        "    # YOUR CODE HERE\n",
+        "    raise NotImplementedError()\n",
+        "        ### END SOLUTION\n",
+        "    \n",
+        "    def is_compatible(self):\n",
+        "        \"\"\"\n",
+        "        Check if the Python version is compatible (>= 3.8).\n",
+        "        Returns True if compatible, False otherwise.\n",
+        "        \"\"\"\n",
+        "        ### BEGIN SOLUTION\n",
+        "    # YOUR CODE HERE\n",
+        "    raise NotImplementedError()\n",
+        "        ### END SOLUTION"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "77e585a9",
+      "metadata": {
+        "nbgrader": {
+          "grade": false,
+          "grade_id": "developer_profile_class",
+          "locked": false,
+          "schema_version": 3,
+          "solution": true,
+          "task": false
+        }
+      },
+      "outputs": [],
+      "source": [
+        "#| export\n",
+        "class DeveloperProfile:\n",
+        "    \"\"\"\n",
+        "    A class representing a developer profile.\n",
+        "    \"\"\"\n",
+        "    \n",
+        "    def __init__(self, name=\"Student\", email=\"student@example.com\", affiliation=\"TinyTorch Community\", specialization=\"ML Systems\"):\n",
+        "        \"\"\"\n",
+        "        Initialize a developer profile.\n",
+        "        \n",
+        "        Args:\n",
+        "            name: Developer's name\n",
+        "            email: Developer's email\n",
+        "            affiliation: Developer's affiliation or organization\n",
+        "            specialization: Developer's area of specialization\n",
+        "        \"\"\"\n",
+        "        ### BEGIN SOLUTION\n",
+        "    # YOUR CODE HERE\n",
+        "    raise NotImplementedError()\n",
+        "        ### END SOLUTION\n",
+        "    \n",
+        "    def __str__(self):\n",
+        "        \"\"\"\n",
+        "        Return a basic string representation of the developer.\n",
+        "        Format: \"Name (email)\"\n",
+        "        \"\"\"\n",
+        "        ### BEGIN SOLUTION\n",
+        "    # YOUR CODE HERE\n",
+        "    raise NotImplementedError()\n",
+        "        ### END SOLUTION\n",
+        "    \n",
+        "    def get_signature(self):\n",
+        "        \"\"\"\n",
+        "        Return a formatted signature for the developer.\n",
+        "        Should include name, affiliation, and specialization.\n",
+        "        \"\"\"\n",
+        "        ### BEGIN SOLUTION\n",
+        "    # YOUR CODE HERE\n",
+        "    raise NotImplementedError()\n",
+        "        ### END SOLUTION\n",
+        "    \n",
+        "    def get_profile_info(self):\n",
+        "        \"\"\"\n",
+        "        Return comprehensive profile information as a dictionary.\n",
+        "        \"\"\"\n",
+        "        ### BEGIN SOLUTION\n",
+        "    # YOUR CODE HERE\n",
+        "    raise NotImplementedError()\n",
+        "        ### END SOLUTION "
+      ]
+    }
+  ],
+  "metadata": {
+    "jupytext": {
+      "main_language": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/assignments/source/00_setup/setup_dev.ipynb b/assignments/source/00_setup/setup_dev.ipynb
index d99de1bd..4de5c06b 100644
--- a/assignments/source/00_setup/setup_dev.ipynb
+++ b/assignments/source/00_setup/setup_dev.ipynb
@@ -2,86 +2,64 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "8dad0504",
+   "id": "fc1bae4e",
    "metadata": {
     "cell_marker": "\"\"\""
    },
    "source": [
-    "# Module 0: Setup - Tiny🔥Torch Development Workflow\n",
+    "# Assignment 0: Setup - TinyTorch Development Environment (INSTRUCTOR VERSION)\n",
     "\n",
-    "Welcome to TinyTorch! This module teaches you the development workflow you'll use throughout the course.\n",
-    "\n",
-    "## Learning Goals\n",
-    "- Understand the nbdev notebook-to-Python workflow\n",
-    "- Write your first TinyTorch code\n",
-    "- Run tests and use the CLI tools\n",
-    "- Get comfortable with the development rhythm\n",
-    "\n",
-    "## The TinyTorch Development Cycle\n",
-    "\n",
-    "1. **Write code** in this notebook using `#| export` \n",
-    "2. **Export code** with `python bin/tito.py sync --module setup`\n",
-    "3. **Run tests** with `python bin/tito.py test --module setup`\n",
-    "4. **Check progress** with `python bin/tito.py info`\n",
-    "\n",
-    "Let's get started!"
+    "This is the instructor solution version showing how solutions are filled in."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d8986dab",
+   "id": "876aef2e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "#| default_exp core.utils\n",
-    "\n",
-    "# Setup imports and environment\n",
+    "#| default_exp core.utils"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9d346d7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "# Required imports for TinyTorch utilities\n",
     "import sys\n",
     "import platform\n",
     "from datetime import datetime\n",
     "import os\n",
-    "from pathlib import Path\n",
-    "\n",
-    "print(\"🔥 TinyTorch Development Environment\")\n",
-    "print(f\"Python {sys.version}\")\n",
-    "print(f\"Platform: {platform.system()} {platform.release()}\")\n",
-    "print(f\"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")"
+    "from pathlib import Path"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "9389fc51",
+   "id": "687b365f",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
    },
    "source": [
-    "## Step 1: Understanding the Module → Package Structure\n",
+    "## Problem 1: Hello Function (10 points)\n",
     "\n",
-    "**🎓 Teaching vs. 🔧 Building**: This course has two sides:\n",
-    "- **Teaching side**: You work in `modules/setup/setup_dev.ipynb` (learning-focused)\n",
-    "- **Building side**: Your code exports to `tinytorch/core/utils.py` (production package)\n",
-    "\n",
-    "**Key Concept**: The `#| default_exp core.utils` directive at the top tells nbdev to export all `#| export` cells to `tinytorch/core/utils.py`.\n",
-    "\n",
-    "This separation allows us to:\n",
-    "- Organize learning by **concepts** (modules)  \n",
-    "- Organize code by **function** (package structure)\n",
-    "- Build a real ML framework while learning systematically\n",
-    "\n",
-    "Let's write a simple \"Hello World\" function with the `#| export` directive:"
+    "Write a function that displays a welcome message for TinyTorch."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "17145cf3",
+   "id": "c8f5870e",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
      "grade": false,
-     "grade_id": "hello-function",
+     "grade_id": "hello_function",
      "locked": false,
      "schema_version": 3,
      "solution": true,
@@ -93,36 +71,29 @@
     "#| export\n",
     "def hello_tinytorch():\n",
     "    \"\"\"\n",
-    "    A simple hello world function for TinyTorch.\n",
+    "    Display a welcome message for TinyTorch.\n",
     "    \n",
-    "    TODO: Implement this function to display TinyTorch ASCII art and welcome message.\n",
-    "    Load the flame art from tinytorch_flame.txt file with graceful fallback.\n",
-    "    \n",
-    "    HINTS:\n",
-    "    1. Try to load ASCII art from 'tinytorch_flame.txt' in current directory\n",
-    "    2. If file exists, read and print the content\n",
-    "    3. Add \"Tiny🔥Torch\" and \"Build ML Systems from Scratch!\" messages\n",
-    "    4. If file doesn't exist, just print the emoji version\n",
-    "    5. Handle any exceptions gracefully\n",
-    "    \n",
-    "    EXAMPLE OUTPUT:\n",
-    "    [ASCII art from file]\n",
-    "    Tiny🔥Torch\n",
-    "    Build ML Systems from Scratch!\n",
+    "    This function should:\n",
+    "    1. Try to load ASCII art from 'tinytorch_flame.txt' if it exists\n",
+    "    2. If the file doesn't exist, display a simple text banner\n",
+    "    3. Print \"TinyTorch\" and \"Build ML Systems from Scratch!\"\n",
+    "    4. Handle any exceptions gracefully\n",
     "    \"\"\"\n",
+    "    ### BEGIN SOLUTION\n",
     "    # YOUR CODE HERE\n",
-    "    raise NotImplementedError()"
+    "    raise NotImplementedError()\n",
+    "    ### END SOLUTION"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6793b66f",
+   "id": "3a6d8a5a",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
      "grade": false,
-     "grade_id": "add-function",
+     "grade_id": "add_function",
      "locked": false,
      "schema_version": 3,
      "solution": true,
@@ -136,111 +107,28 @@
     "    \"\"\"\n",
     "    Add two numbers together.\n",
     "    \n",
-    "    TODO: Implement addition of two numbers.\n",
-    "    This is the foundation of all mathematical operations in ML.\n",
-    "    \n",
     "    Args:\n",
     "        a: First number (int or float)\n",
     "        b: Second number (int or float)\n",
     "        \n",
     "    Returns:\n",
     "        Sum of a and b\n",
-    "        \n",
-    "    EXAMPLE:\n",
-    "    add_numbers(2, 3) should return 5\n",
-    "    add_numbers(1.5, 2.5) should return 4.0\n",
     "    \"\"\"\n",
+    "    ### BEGIN SOLUTION\n",
     "    # YOUR CODE HERE\n",
-    "    raise NotImplementedError()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6840983e",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Implementation\n",
-    "\n",
-    "Once you implement the functions above, run this cell to test them:"
+    "    raise NotImplementedError()\n",
+    "    ### END SOLUTION"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fb7631b3",
-   "metadata": {
-    "nbgrader": {
-     "grade": true,
-     "grade_id": "test-hello-function",
-     "locked": true,
-     "points": 3,
-     "schema_version": 3,
-     "solution": false,
-     "task": false
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Test hello_tinytorch function\n",
-    "print(\"Testing hello_tinytorch():\")\n",
-    "try:\n",
-    "    hello_tinytorch()\n",
-    "    print(\"✅ hello_tinytorch() executed successfully!\")\n",
-    "except NotImplementedError:\n",
-    "    print(\"❌ hello_tinytorch() not implemented yet\")\n",
-    "    raise"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "60c14231",
-   "metadata": {
-    "nbgrader": {
-     "grade": true,
-     "grade_id": "test-add-function",
-     "locked": true,
-     "points": 2,
-     "schema_version": 3,
-     "solution": false,
-     "task": false
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Test add_numbers function\n",
-    "print(\"Testing add_numbers():\")\n",
-    "assert add_numbers(2, 3) == 5, \"add_numbers(2, 3) should return 5\"\n",
-    "assert add_numbers(0, 0) == 0, \"add_numbers(0, 0) should return 0\"\n",
-    "assert add_numbers(-1, 1) == 0, \"add_numbers(-1, 1) should return 0\"\n",
-    "assert abs(add_numbers(1.5, 2.5) - 4.0) < 1e-10, \"add_numbers(1.5, 2.5) should return 4.0\"\n",
-    "print(\"✅ All addition tests passed!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ec449d60",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 2: A Simple Class\n",
-    "\n",
-    "Let's create a simple class that will help us understand system information. This is still basic, but shows how to structure classes in TinyTorch."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "43affa0b",
+   "id": "693d10ea",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
      "grade": false,
-     "grade_id": "systeminfo-class",
+     "grade_id": "systeminfo_class",
      "locked": false,
      "schema_version": 3,
      "solution": true,
@@ -252,168 +140,48 @@
     "#| export\n",
     "class SystemInfo:\n",
     "    \"\"\"\n",
-    "    Simple system information class.\n",
-    "    \n",
-    "    TODO: Implement this class to collect and display system information.\n",
-    "    \n",
-    "    REQUIREMENTS:\n",
-    "    1. __init__: Collect Python version, platform, and machine information\n",
-    "    2. __str__: Return formatted system info string\n",
-    "    3. is_compatible: Check if Python version >= 3.8\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use sys.version_info for Python version\n",
-    "    - Use platform.system() for platform name  \n",
-    "    - Use platform.machine() for machine architecture\n",
-    "    - Store these as instance attributes in __init__\n",
+    "    A class for collecting and displaying system information.\n",
     "    \"\"\"\n",
     "    \n",
     "    def __init__(self):\n",
     "        \"\"\"\n",
-    "        Initialize system information collection.\n",
-    "        \n",
-    "        TODO: Collect Python version, platform, and machine information.\n",
-    "        Store as instance attributes: self.python_version, self.platform, self.machine\n",
+    "        Initialize the SystemInfo object.\n",
+    "        Collect Python version, platform, and machine information.\n",
     "        \"\"\"\n",
+    "        ### BEGIN SOLUTION\n",
     "        # YOUR CODE HERE\n",
     "        raise NotImplementedError()\n",
+    "        ### END SOLUTION\n",
     "    \n",
     "    def __str__(self):\n",
     "        \"\"\"\n",
-    "        Return human-readable system information.\n",
-    "        \n",
-    "        TODO: Format system info as a readable string.\n",
-    "        FORMAT: \"Python X.Y on Platform (Architecture)\"\n",
-    "        EXAMPLE: \"Python 3.9 on Darwin (arm64)\"\n",
+    "        Return a formatted string representation of system information.\n",
+    "        Format: \"Python X.Y.Z on Platform (Architecture)\"\n",
     "        \"\"\"\n",
+    "        ### BEGIN SOLUTION\n",
     "        # YOUR CODE HERE\n",
     "        raise NotImplementedError()\n",
+    "        ### END SOLUTION\n",
     "    \n",
     "    def is_compatible(self):\n",
     "        \"\"\"\n",
-    "        Check if system meets minimum requirements.\n",
-    "        \n",
-    "        TODO: Check if Python version >= 3.8\n",
-    "        Return True if compatible, False otherwise\n",
+    "        Check if the Python version is compatible (>= 3.8).\n",
+    "        Returns True if compatible, False otherwise.\n",
     "        \"\"\"\n",
+    "        ### BEGIN SOLUTION\n",
     "        # YOUR CODE HERE\n",
-    "        raise NotImplementedError()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "81445d5d",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your SystemInfo Class\n",
-    "\n",
-    "Once you implement the SystemInfo class above, run this cell to test it:"
+    "        raise NotImplementedError()\n",
+    "        ### END SOLUTION"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "23bbf329",
+   "id": "77e585a9",
    "metadata": {
-    "nbgrader": {
-     "grade": true,
-     "grade_id": "test-systeminfo-creation",
-     "locked": true,
-     "points": 2,
-     "schema_version": 3,
-     "solution": false,
-     "task": false
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Test SystemInfo creation\n",
-    "print(\"Testing SystemInfo creation...\")\n",
-    "info = SystemInfo()\n",
-    "assert hasattr(info, 'python_version'), \"SystemInfo should have python_version attribute\"\n",
-    "assert hasattr(info, 'platform'), \"SystemInfo should have platform attribute\"\n",
-    "assert hasattr(info, 'machine'), \"SystemInfo should have machine attribute\"\n",
-    "print(\"✅ SystemInfo creation test passed!\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "621f5acf",
-   "metadata": {
-    "nbgrader": {
-     "grade": true,
-     "grade_id": "test-systeminfo-str",
-     "locked": true,
-     "points": 2,
-     "schema_version": 3,
-     "solution": false,
-     "task": false
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Test SystemInfo string representation\n",
-    "print(\"Testing SystemInfo string representation...\")\n",
-    "info = SystemInfo()\n",
-    "info_str = str(info)\n",
-    "assert isinstance(info_str, str), \"SystemInfo.__str__() should return a string\"\n",
-    "assert len(info_str) > 0, \"SystemInfo string should not be empty\"\n",
-    "assert 'Python' in info_str, \"SystemInfo string should contain 'Python'\"\n",
-    "print(f\"✅ SystemInfo string: {info_str}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "da5e0d37",
-   "metadata": {
-    "nbgrader": {
-     "grade": true,
-     "grade_id": "test-systeminfo-compatibility",
-     "locked": true,
-     "points": 1,
-     "schema_version": 3,
-     "solution": false,
-     "task": false
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Test SystemInfo compatibility check\n",
-    "print(\"Testing SystemInfo compatibility...\")\n",
-    "info = SystemInfo()\n",
-    "compatible = info.is_compatible()\n",
-    "assert isinstance(compatible, bool), \"is_compatible() should return a boolean\"\n",
-    "# Since we're running this test, Python should be >= 3.8\n",
-    "assert compatible == True, \"Current Python version should be compatible (>= 3.8)\"\n",
-    "print(\"✅ SystemInfo compatibility test passed!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "55e72365",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 3: Developer Profile (Optional Challenge)\n",
-    "\n",
-    "For students who want an extra challenge, implement a DeveloperProfile class:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "75f3279f",
-   "metadata": {
-    "lines_to_next_cell": 1,
     "nbgrader": {
      "grade": false,
-     "grade_id": "developer-profile",
+     "grade_id": "developer_profile_class",
      "locked": false,
      "schema_version": 3,
      "solution": true,
@@ -425,72 +193,52 @@
     "#| export\n",
     "class DeveloperProfile:\n",
     "    \"\"\"\n",
-    "    Developer profile for personalizing TinyTorch experience.\n",
-    "    \n",
-    "    TODO: OPTIONAL CHALLENGE - Implement this class for extra credit!\n",
-    "    \n",
-    "    REQUIREMENTS:\n",
-    "    1. Store developer information (name, email, etc.)\n",
-    "    2. Load ASCII art from file with fallback\n",
-    "    3. Generate formatted profile display\n",
-    "    4. Create professional signature\n",
-    "    \n",
-    "    This is an advanced exercise - only attempt after completing the required parts!\n",
+    "    A class representing a developer profile.\n",
     "    \"\"\"\n",
     "    \n",
-    "    def __init__(self, name=\"Student\", email=\"student@example.com\"):\n",
+    "    def __init__(self, name=\"Student\", email=\"student@example.com\", affiliation=\"TinyTorch Community\", specialization=\"ML Systems\"):\n",
     "        \"\"\"\n",
-    "        Initialize developer profile.\n",
+    "        Initialize a developer profile.\n",
     "        \n",
-    "        TODO: Store developer information with defaults.\n",
-    "        Feel free to customize with your own info!\n",
+    "        Args:\n",
+    "            name: Developer's name\n",
+    "            email: Developer's email\n",
+    "            affiliation: Developer's affiliation or organization\n",
+    "            specialization: Developer's area of specialization\n",
     "        \"\"\"\n",
-    "        # YOUR CODE HERE (OPTIONAL)\n",
-    "        self.name = name\n",
-    "        self.email = email\n",
+    "        ### BEGIN SOLUTION\n",
+    "        # YOUR CODE HERE\n",
+    "        raise NotImplementedError()\n",
+    "        ### END SOLUTION\n",
+    "    \n",
+    "    def __str__(self):\n",
+    "        \"\"\"\n",
+    "        Return a basic string representation of the developer.\n",
+    "        Format: \"Name (email)\"\n",
+    "        \"\"\"\n",
+    "        ### BEGIN SOLUTION\n",
+    "        # YOUR CODE HERE\n",
+    "        raise NotImplementedError()\n",
+    "        ### END SOLUTION\n",
     "    \n",
     "    def get_signature(self):\n",
     "        \"\"\"\n",
-    "        Get a short signature for code headers.\n",
-    "        \n",
-    "        TODO: Return a signature like \"Built by Name (email)\"\n",
+    "        Return a formatted signature for the developer.\n",
+    "        Should include name, affiliation, and specialization.\n",
     "        \"\"\"\n",
-    "        # YOUR CODE HERE (OPTIONAL)\n",
-    "        return f\"Built by {self.name} ({self.email})\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6efefa44",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 🎯 Module Summary\n",
-    "\n",
-    "Congratulations! You've completed the TinyTorch setup module:\n",
-    "\n",
-    "### What You've Accomplished\n",
-    "✅ **Environment Setup**: Learned the development workflow  \n",
-    "✅ **First Function**: Implemented hello_tinytorch() with file handling  \n",
-    "✅ **Math Operations**: Built add_numbers() for ML foundations  \n",
-    "✅ **Object-Oriented Programming**: Created SystemInfo class with properties  \n",
-    "✅ **Testing**: Verified your implementations with automated tests  \n",
-    "✅ **Package Export**: Used nbdev to build the tinytorch package  \n",
-    "\n",
-    "### Key Concepts You've Learned\n",
-    "- **nbdev workflow**: From notebook to production package\n",
-    "- **File handling**: Reading ASCII art with graceful fallbacks\n",
-    "- **System information**: Collecting platform and version data\n",
-    "- **Object-oriented design**: Classes, properties, and methods\n",
-    "- **Error handling**: Using try/except and fallback strategies\n",
-    "\n",
-    "### Next Steps\n",
-    "1. **Export your code**: Run `python bin/tito.py sync --module setup`\n",
-    "2. **Run tests**: Use `python bin/tito.py test --module setup`\n",
-    "3. **Check your work**: Import your functions with `from tinytorch.core.utils import hello_tinytorch`\n",
-    "\n",
-    "**Ready for the next challenge?** Let's move on to building tensors!"
+    "        ### BEGIN SOLUTION\n",
+    "        # YOUR CODE HERE\n",
+    "        raise NotImplementedError()\n",
+    "        ### END SOLUTION\n",
+    "    \n",
+    "    def get_profile_info(self):\n",
+    "        \"\"\"\n",
+    "        Return comprehensive profile information as a dictionary.\n",
+    "        \"\"\"\n",
+    "        ### BEGIN SOLUTION\n",
+    "        # YOUR CODE HERE\n",
+    "        raise NotImplementedError()\n",
+    "        ### END SOLUTION "
    ]
   }
  ],
diff --git a/assignments/source/00_setup/setup_dev.py b/assignments/source/00_setup/setup_dev.py
index 13b48956..25bc0022 100644
--- a/assignments/source/00_setup/setup_dev.py
+++ b/assignments/source/00_setup/setup_dev.py
@@ -10,295 +10,149 @@
 
 # %% [markdown]
 """
-# Module 0: Setup - Tiny🔥Torch Development Workflow
+# Assignment 0: Setup - TinyTorch Development Environment (INSTRUCTOR VERSION)
 
-Welcome to TinyTorch! This module teaches you the development workflow you'll use throughout the course.
-
-## Learning Goals
-- Understand the nbdev notebook-to-Python workflow
-- Write your first TinyTorch code
-- Run tests and use the CLI tools
-- Get comfortable with the development rhythm
-
-## The TinyTorch Development Cycle
-
-1. **Write code** in this notebook using `#| export` 
-2. **Export code** with `python bin/tito.py sync --module setup`
-3. **Run tests** with `python bin/tito.py test --module setup`
-4. **Check progress** with `python bin/tito.py info`
-
-Let's get started!
+This is the instructor solution version showing how solutions are filled in.
 """
 
 # %%
 #| default_exp core.utils
 
-# Setup imports and environment
+# %%
+#| export
+# Required imports for TinyTorch utilities
 import sys
 import platform
 from datetime import datetime
 import os
 from pathlib import Path
 
-print("🔥 TinyTorch Development Environment")
-print(f"Python {sys.version}")
-print(f"Platform: {platform.system()} {platform.release()}")
-print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
-
 # %% [markdown]
 """
-## Step 1: Understanding the Module → Package Structure
+## Problem 1: Hello Function (10 points)
 
-**🎓 Teaching vs. 🔧 Building**: This course has two sides:
-- **Teaching side**: You work in `modules/setup/setup_dev.ipynb` (learning-focused)
-- **Building side**: Your code exports to `tinytorch/core/utils.py` (production package)
-
-**Key Concept**: The `#| default_exp core.utils` directive at the top tells nbdev to export all `#| export` cells to `tinytorch/core/utils.py`.
-
-This separation allows us to:
-- Organize learning by **concepts** (modules)  
-- Organize code by **function** (package structure)
-- Build a real ML framework while learning systematically
-
-Let's write a simple "Hello World" function with the `#| export` directive:
+Write a function that displays a welcome message for TinyTorch.
 """
 
-# %% nbgrader={"grade": false, "grade_id": "hello-function", "locked": false, "schema_version": 3, "solution": true, "task": false}
+# %% nbgrader={"grade": false, "grade_id": "hello_function", "locked": false, "schema_version": 3, "solution": true, "task": false}
 #| export
 def hello_tinytorch():
     """
-    A simple hello world function for TinyTorch.
+    Display a welcome message for TinyTorch.
     
-    TODO: Implement this function to display TinyTorch ASCII art and welcome message.
-    Load the flame art from tinytorch_flame.txt file with graceful fallback.
-    
-    HINTS:
-    1. Try to load ASCII art from 'tinytorch_flame.txt' in current directory
-    2. If file exists, read and print the content
-    3. Add "Tiny🔥Torch" and "Build ML Systems from Scratch!" messages
-    4. If file doesn't exist, just print the emoji version
-    5. Handle any exceptions gracefully
-    
-    EXAMPLE OUTPUT:
-    [ASCII art from file]
-    Tiny🔥Torch
-    Build ML Systems from Scratch!
+    This function should:
+    1. Try to load ASCII art from 'tinytorch_flame.txt' if it exists
+    2. If the file doesn't exist, display a simple text banner
+    3. Print "TinyTorch" and "Build ML Systems from Scratch!"
+    4. Handle any exceptions gracefully
     """
+    ### BEGIN SOLUTION
     # YOUR CODE HERE
     raise NotImplementedError()
+    ### END SOLUTION
 
-# %% nbgrader={"grade": false, "grade_id": "add-function", "locked": false, "schema_version": 3, "solution": true, "task": false}
+# %% nbgrader={"grade": false, "grade_id": "add_function", "locked": false, "schema_version": 3, "solution": true, "task": false}
 #| export
 def add_numbers(a, b):
     """
     Add two numbers together.
     
-    TODO: Implement addition of two numbers.
-    This is the foundation of all mathematical operations in ML.
-    
     Args:
         a: First number (int or float)
         b: Second number (int or float)
         
     Returns:
         Sum of a and b
-        
-    EXAMPLE:
-    add_numbers(2, 3) should return 5
-    add_numbers(1.5, 2.5) should return 4.0
     """
+    ### BEGIN SOLUTION
     # YOUR CODE HERE
     raise NotImplementedError()
+    ### END SOLUTION
 
-# %% [markdown]
-"""
-### 🧪 Test Your Implementation
-
-Once you implement the functions above, run this cell to test them:
-"""
-
-# %% nbgrader={"grade": true, "grade_id": "test-hello-function", "locked": true, "points": 3, "schema_version": 3, "solution": false, "task": false}
-# Test hello_tinytorch function
-print("Testing hello_tinytorch():")
-try:
-    hello_tinytorch()
-    print("✅ hello_tinytorch() executed successfully!")
-except NotImplementedError:
-    print("❌ hello_tinytorch() not implemented yet")
-    raise
-
-# %% nbgrader={"grade": true, "grade_id": "test-add-function", "locked": true, "points": 2, "schema_version": 3, "solution": false, "task": false}
-# Test add_numbers function
-print("Testing add_numbers():")
-assert add_numbers(2, 3) == 5, "add_numbers(2, 3) should return 5"
-assert add_numbers(0, 0) == 0, "add_numbers(0, 0) should return 0"
-assert add_numbers(-1, 1) == 0, "add_numbers(-1, 1) should return 0"
-assert abs(add_numbers(1.5, 2.5) - 4.0) < 1e-10, "add_numbers(1.5, 2.5) should return 4.0"
-print("✅ All addition tests passed!")
-
-# %% [markdown]
-"""
-## Step 2: A Simple Class
-
-Let's create a simple class that will help us understand system information. This is still basic, but shows how to structure classes in TinyTorch.
-"""
-
-# %% nbgrader={"grade": false, "grade_id": "systeminfo-class", "locked": false, "schema_version": 3, "solution": true, "task": false}
+# %% nbgrader={"grade": false, "grade_id": "systeminfo_class", "locked": false, "schema_version": 3, "solution": true, "task": false}
 #| export
 class SystemInfo:
     """
-    Simple system information class.
-    
-    TODO: Implement this class to collect and display system information.
-    
-    REQUIREMENTS:
-    1. __init__: Collect Python version, platform, and machine information
-    2. __str__: Return formatted system info string
-    3. is_compatible: Check if Python version >= 3.8
-    
-    HINTS:
-    - Use sys.version_info for Python version
-    - Use platform.system() for platform name  
-    - Use platform.machine() for machine architecture
-    - Store these as instance attributes in __init__
+    A class for collecting and displaying system information.
     """
     
     def __init__(self):
         """
-        Initialize system information collection.
-        
-        TODO: Collect Python version, platform, and machine information.
-        Store as instance attributes: self.python_version, self.platform, self.machine
+        Initialize the SystemInfo object.
+        Collect Python version, platform, and machine information.
         """
+        ### BEGIN SOLUTION
         # YOUR CODE HERE
         raise NotImplementedError()
+        ### END SOLUTION
     
     def __str__(self):
         """
-        Return human-readable system information.
-        
-        TODO: Format system info as a readable string.
-        FORMAT: "Python X.Y on Platform (Architecture)"
-        EXAMPLE: "Python 3.9 on Darwin (arm64)"
+        Return a formatted string representation of system information.
+        Format: "Python X.Y.Z on Platform (Architecture)"
         """
+        ### BEGIN SOLUTION
         # YOUR CODE HERE
         raise NotImplementedError()
+        ### END SOLUTION
     
     def is_compatible(self):
         """
-        Check if system meets minimum requirements.
-        
-        TODO: Check if Python version >= 3.8
-        Return True if compatible, False otherwise
+        Check if the Python version is compatible (>= 3.8).
+        Returns True if compatible, False otherwise.
         """
+        ### BEGIN SOLUTION
         # YOUR CODE HERE
         raise NotImplementedError()
+        ### END SOLUTION
 
-# %% [markdown]
-"""
-### 🧪 Test Your SystemInfo Class
-
-Once you implement the SystemInfo class above, run this cell to test it:
-"""
-
-# %% nbgrader={"grade": true, "grade_id": "test-systeminfo-creation", "locked": true, "points": 2, "schema_version": 3, "solution": false, "task": false}
-# Test SystemInfo creation
-print("Testing SystemInfo creation...")
-info = SystemInfo()
-assert hasattr(info, 'python_version'), "SystemInfo should have python_version attribute"
-assert hasattr(info, 'platform'), "SystemInfo should have platform attribute"
-assert hasattr(info, 'machine'), "SystemInfo should have machine attribute"
-print("✅ SystemInfo creation test passed!")
-
-# %% nbgrader={"grade": true, "grade_id": "test-systeminfo-str", "locked": true, "points": 2, "schema_version": 3, "solution": false, "task": false}
-# Test SystemInfo string representation
-print("Testing SystemInfo string representation...")
-info = SystemInfo()
-info_str = str(info)
-assert isinstance(info_str, str), "SystemInfo.__str__() should return a string"
-assert len(info_str) > 0, "SystemInfo string should not be empty"
-assert 'Python' in info_str, "SystemInfo string should contain 'Python'"
-print(f"✅ SystemInfo string: {info_str}")
-
-# %% nbgrader={"grade": true, "grade_id": "test-systeminfo-compatibility", "locked": true, "points": 1, "schema_version": 3, "solution": false, "task": false}
-# Test SystemInfo compatibility check
-print("Testing SystemInfo compatibility...")
-info = SystemInfo()
-compatible = info.is_compatible()
-assert isinstance(compatible, bool), "is_compatible() should return a boolean"
-# Since we're running this test, Python should be >= 3.8
-assert compatible == True, "Current Python version should be compatible (>= 3.8)"
-print("✅ SystemInfo compatibility test passed!")
-
-# %% [markdown]
-"""
-## Step 3: Developer Profile (Optional Challenge)
-
-For students who want an extra challenge, implement a DeveloperProfile class:
-"""
-
-# %% nbgrader={"grade": false, "grade_id": "developer-profile", "locked": false, "schema_version": 3, "solution": true, "task": false}
+# %% nbgrader={"grade": false, "grade_id": "developer_profile_class", "locked": false, "schema_version": 3, "solution": true, "task": false}
 #| export
 class DeveloperProfile:
     """
-    Developer profile for personalizing TinyTorch experience.
-    
-    TODO: OPTIONAL CHALLENGE - Implement this class for extra credit!
-    
-    REQUIREMENTS:
-    1. Store developer information (name, email, etc.)
-    2. Load ASCII art from file with fallback
-    3. Generate formatted profile display
-    4. Create professional signature
-    
-    This is an advanced exercise - only attempt after completing the required parts!
+    A class representing a developer profile.
     """
     
-    def __init__(self, name="Student", email="student@example.com"):
+    def __init__(self, name="Student", email="student@example.com", affiliation="TinyTorch Community", specialization="ML Systems"):
         """
-        Initialize developer profile.
+        Initialize a developer profile.
         
-        TODO: Store developer information with defaults.
-        Feel free to customize with your own info!
+        Args:
+            name: Developer's name
+            email: Developer's email
+            affiliation: Developer's affiliation or organization
+            specialization: Developer's area of specialization
         """
-        # YOUR CODE HERE (OPTIONAL)
-        self.name = name
-        self.email = email
+        ### BEGIN SOLUTION
+        # YOUR CODE HERE
+        raise NotImplementedError()
+        ### END SOLUTION
+    
+    def __str__(self):
+        """
+        Return a basic string representation of the developer.
+        Format: "Name (email)"
+        """
+        ### BEGIN SOLUTION
+        # YOUR CODE HERE
+        raise NotImplementedError()
+        ### END SOLUTION
     
     def get_signature(self):
         """
-        Get a short signature for code headers.
-        
-        TODO: Return a signature like "Built by Name (email)"
+        Return a formatted signature for the developer.
+        Should include name, affiliation, and specialization.
         """
-        # YOUR CODE HERE (OPTIONAL)
-        return f"Built by {self.name} ({self.email})"
-
-# %% [markdown]
-"""
-## 🎯 Module Summary
-
-Congratulations! You've completed the TinyTorch setup module:
-
-### What You've Accomplished
-✅ **Environment Setup**: Learned the development workflow  
-✅ **First Function**: Implemented hello_tinytorch() with file handling  
-✅ **Math Operations**: Built add_numbers() for ML foundations  
-✅ **Object-Oriented Programming**: Created SystemInfo class with properties  
-✅ **Testing**: Verified your implementations with automated tests  
-✅ **Package Export**: Used nbdev to build the tinytorch package  
-
-### Key Concepts You've Learned
-- **nbdev workflow**: From notebook to production package
-- **File handling**: Reading ASCII art with graceful fallbacks
-- **System information**: Collecting platform and version data
-- **Object-oriented design**: Classes, properties, and methods
-- **Error handling**: Using try/except and fallback strategies
-
-### Next Steps
-1. **Export your code**: Run `python bin/tito.py sync --module setup`
-2. **Run tests**: Use `python bin/tito.py test --module setup`
-3. **Check your work**: Import your functions with `from tinytorch.core.utils import hello_tinytorch`
-
-**Ready for the next challenge?** Let's move on to building tensors!
-"""
+        ### BEGIN SOLUTION
+        # YOUR CODE HERE
+        raise NotImplementedError()
+        ### END SOLUTION
+    
+    def get_profile_info(self):
+        """
+        Return comprehensive profile information as a dictionary.
+        """
+        ### BEGIN SOLUTION
+        # YOUR CODE HERE
+        raise NotImplementedError()
+        ### END SOLUTION 
\ No newline at end of file
diff --git a/assignments/source/01_tensor/01_tensor.ipynb b/assignments/source/01_tensor/01_tensor.ipynb
deleted file mode 100644
index 9122c966..00000000
--- a/assignments/source/01_tensor/01_tensor.ipynb
+++ /dev/null
@@ -1,1047 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "id": "6186cbf0",
-      "metadata": {
-        "cell_marker": "\"\"\""
-      },
-      "source": [
-        "# Module 1: Tensor - Core Data Structure\n",
-        "\n",
-        "Welcome to the Tensor module! This is where TinyTorch really begins. You'll implement the fundamental data structure that powers all ML systems.\n",
-        "\n",
-        "## Learning Goals\n",
-        "- Understand tensors as N-dimensional arrays with ML-specific operations\n",
-        "- Implement a complete Tensor class with arithmetic operations\n",
-        "- Handle shape management, data types, and memory layout\n",
-        "- Build the foundation for neural networks and automatic differentiation"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "c88b6562",
-      "metadata": {
-        "cell_marker": "\"\"\""
-      },
-      "source": [
-        "## \ud83d\udce6 Where This Code Lives in the Final Package\n",
-        "\n",
-        "**Learning Side:** You work in `assignments/source/01_tensor/tensor_dev.py`  \n",
-        "**Building Side:** Code exports to `tinytorch.core.tensor`\n",
-        "\n",
-        "```python\n",
-        "# Final package structure:\n",
-        "from tinytorch.core.tensor import Tensor\n",
-        "from tinytorch.core.layers import Dense, Conv2D\n",
-        "from tinytorch.core.activations import ReLU, Sigmoid, Tanh\n",
-        "```\n",
-        "\n",
-        "**Why this matters:**\n",
-        "- **Learning:** Focused modules for deep understanding\n",
-        "- **Production:** Proper organization like PyTorch's `torch.tensor`\n",
-        "- **Consistency:** Core data structure lives in `core.tensor`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "ff79eeec",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "#| default_exp core.tensor\n",
-        "\n",
-        "# Setup and imports\n",
-        "import numpy as np\n",
-        "import sys\n",
-        "from typing import Union, List, Tuple, Optional, Any\n",
-        "\n",
-        "print(\"\ud83d\udd25 TinyTorch Tensor Module\")\n",
-        "print(f\"NumPy version: {np.__version__}\")\n",
-        "print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n",
-        "print(\"Ready to build tensors!\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "aacde3bd",
-      "metadata": {
-        "cell_marker": "\"\"\"",
-        "lines_to_next_cell": 1
-      },
-      "source": [
-        "## Step 1: What is a Tensor?\n",
-        "\n",
-        "### Definition\n",
-        "A **tensor** is an N-dimensional array with ML-specific operations. Think of it as a container that can hold data in multiple dimensions:\n",
-        "\n",
-        "- **Scalar** (0D): A single number - `5.0`\n",
-        "- **Vector** (1D): A list of numbers - `[1, 2, 3]`  \n",
-        "- **Matrix** (2D): A 2D array - `[[1, 2], [3, 4]]`\n",
-        "- **Higher dimensions**: 3D, 4D, etc. for images, video, batches\n",
-        "\n",
-        "### Why Tensors Matter in ML\n",
-        "Tensors are the foundation of all machine learning because:\n",
-        "- **Neural networks** process tensors (images, text, audio)\n",
-        "- **Batch processing** requires multiple samples at once\n",
-        "- **GPU acceleration** works efficiently with tensors\n",
-        "- **Automatic differentiation** needs structured data\n",
-        "\n",
-        "### Real-World Examples\n",
-        "- **Image**: 3D tensor `(height, width, channels)` - `(224, 224, 3)` for RGB images\n",
-        "- **Batch of images**: 4D tensor `(batch_size, height, width, channels)` - `(32, 224, 224, 3)`\n",
-        "- **Text**: 2D tensor `(sequence_length, embedding_dim)` - `(100, 768)` for BERT embeddings\n",
-        "- **Audio**: 2D tensor `(time_steps, features)` - `(16000, 1)` for 1 second of audio\n",
-        "\n",
-        "### Why Not Just Use NumPy?\n",
-        "We will use NumPy internally, but our Tensor class adds:\n",
-        "- **ML-specific operations** (later: gradients, GPU support)\n",
-        "- **Consistent API** for neural networks\n",
-        "- **Type safety** and error checking\n",
-        "- **Integration** with the rest of TinyTorch\n",
-        "\n",
-        "### Visual Intuition\n",
-        "```\n",
-        "Scalar (0D):    5.0\n",
-        "Vector (1D):    [1, 2, 3, 4]\n",
-        "Matrix (2D):    [[1, 2, 3],\n",
-        "                 [4, 5, 6]]\n",
-        "3D Tensor:      [[[1, 2], [3, 4]],\n",
-        "                 [[5, 6], [7, 8]]]\n",
-        "```\n",
-        "\n",
-        "Let's start building!"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "2dc8771d",
-      "metadata": {
-        "lines_to_next_cell": 1
-      },
-      "outputs": [],
-      "source": [
-        "#| export\n",
-        "class Tensor:\n",
-        "    \"\"\"\n",
-        "    TinyTorch Tensor: N-dimensional array with ML operations.\n",
-        "    \n",
-        "    The fundamental data structure for all TinyTorch operations.\n",
-        "    Wraps NumPy arrays with ML-specific functionality.\n",
-        "    \n",
-        "    TODO: Implement the core Tensor class with data handling and properties.\n",
-        "    \n",
-        "    APPROACH:\n",
-        "    1. Store the input data as a NumPy array internally\n",
-        "    2. Handle different input types (scalars, lists, numpy arrays)\n",
-        "    3. Implement properties to access shape, size, and data type\n",
-        "    4. Create a clear string representation\n",
-        "    \n",
-        "    EXAMPLE:\n",
-        "    Input: Tensor([1, 2, 3])\n",
-        "    Expected: Tensor with shape (3,), size 3, dtype int32\n",
-        "    \n",
-        "    HINTS:\n",
-        "    - Use NumPy's np.array() to convert inputs\n",
-        "    - Handle dtype parameter for type conversion\n",
-        "    - Store the array in a private attribute like self._data\n",
-        "    - Properties should return information about the stored array\n",
-        "    \"\"\"\n",
-        "    \n",
-        "    def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None):\n",
-        "        \"\"\"\n",
-        "        Create a new tensor from data.\n",
-        "        \n",
-        "        Args:\n",
-        "            data: Input data (scalar, list, or numpy array)\n",
-        "            dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.\n",
-        "            \n",
-        "        TODO: Implement tensor creation with proper type handling.\n",
-        "        \n",
-        "        STEP-BY-STEP:\n",
-        "        1. Check if data is a scalar (int/float) - convert to numpy array\n",
-        "        2. Check if data is a list - convert to numpy array  \n",
-        "        3. Check if data is already a numpy array - use as-is\n",
-        "        4. Apply dtype conversion if specified\n",
-        "        5. Store the result in self._data\n",
-        "        \n",
-        "        EXAMPLE:\n",
-        "        Tensor(5) \u2192 stores np.array(5)\n",
-        "        Tensor([1, 2, 3]) \u2192 stores np.array([1, 2, 3])\n",
-        "        Tensor(np.array([1, 2, 3])) \u2192 stores the array directly\n",
-        "        \"\"\"\n",
-        "        raise NotImplementedError(\"Student implementation required\")\n",
-        "    \n",
-        "    @property\n",
-        "    def data(self) -> np.ndarray:\n",
-        "        \"\"\"\n",
-        "        Access underlying numpy array.\n",
-        "        \n",
-        "        TODO: Return the stored numpy array.\n",
-        "        \n",
-        "        HINT: Return self._data (the array you stored in __init__)\n",
-        "        \"\"\"\n",
-        "        raise NotImplementedError(\"Student implementation required\")\n",
-        "    \n",
-        "    @property\n",
-        "    def shape(self) -> Tuple[int, ...]:\n",
-        "        \"\"\"\n",
-        "        Get tensor shape.\n",
-        "        \n",
-        "        TODO: Return the shape of the stored numpy array.\n",
-        "        \n",
-        "        HINT: Use .shape attribute of the numpy array\n",
-        "        EXAMPLE: Tensor([1, 2, 3]).shape should return (3,)\n",
-        "        \"\"\"\n",
-        "        raise NotImplementedError(\"Student implementation required\")\n",
-        "    \n",
-        "    @property\n",
-        "    def size(self) -> int:\n",
-        "        \"\"\"\n",
-        "        Get total number of elements.\n",
-        "        \n",
-        "        TODO: Return the total number of elements in the tensor.\n",
-        "        \n",
-        "        HINT: Use .size attribute of the numpy array\n",
-        "        EXAMPLE: Tensor([1, 2, 3]).size should return 3\n",
-        "        \"\"\"\n",
-        "        raise NotImplementedError(\"Student implementation required\")\n",
-        "    \n",
-        "    @property\n",
-        "    def dtype(self) -> np.dtype:\n",
-        "        \"\"\"\n",
-        "        Get data type as numpy dtype.\n",
-        "        \n",
-        "        TODO: Return the data type of the stored numpy array.\n",
-        "        \n",
-        "        HINT: Use .dtype attribute of the numpy array\n",
-        "        EXAMPLE: Tensor([1, 2, 3]).dtype should return dtype('int32')\n",
-        "        \"\"\"\n",
-        "        raise NotImplementedError(\"Student implementation required\")\n",
-        "    \n",
-        "    def __repr__(self) -> str:\n",
-        "        \"\"\"\n",
-        "        String representation.\n",
-        "        \n",
-        "        TODO: Create a clear string representation of the tensor.\n",
-        "        \n",
-        "        APPROACH:\n",
-        "        1. Convert the numpy array to a list for readable output\n",
-        "        2. Include the shape and dtype information\n",
-        "        3. Format: \"Tensor([data], shape=shape, dtype=dtype)\"\n",
-        "        \n",
-        "        EXAMPLE:\n",
-        "        Tensor([1, 2, 3]) \u2192 \"Tensor([1, 2, 3], shape=(3,), dtype=int32)\"\n",
-        "        \"\"\"\n",
-        "        raise NotImplementedError(\"Student implementation required\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "707dd61c",
-      "metadata": {
-        "lines_to_next_cell": 1
-      },
-      "outputs": [],
-      "source": [
-        "#| hide\n",
-        "#| export\n",
-        "class Tensor:\n",
-        "    \"\"\"\n",
-        "    TinyTorch Tensor: N-dimensional array with ML operations.\n",
-        "    \n",
-        "    The fundamental data structure for all TinyTorch operations.\n",
-        "    Wraps NumPy arrays with ML-specific functionality.\n",
-        "    \"\"\"\n",
-        "    \n",
-        "    def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None):\n",
-        "        \"\"\"\n",
-        "        Create a new tensor from data.\n",
-        "        \n",
-        "        Args:\n",
-        "            data: Input data (scalar, list, or numpy array)\n",
-        "            dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.\n",
-        "        \"\"\"\n",
-        "        # Convert input to numpy array\n",
-        "        if isinstance(data, (int, float, np.number)):\n",
-        "            # Handle Python and NumPy scalars\n",
-        "            if dtype is None:\n",
-        "                # Auto-detect type: int for integers, float32 for floats\n",
-        "                if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)):\n",
-        "                    dtype = 'int32'\n",
-        "                else:\n",
-        "                    dtype = 'float32'\n",
-        "            self._data = np.array(data, dtype=dtype)\n",
-        "        elif isinstance(data, list):\n",
-        "            # Let NumPy auto-detect type, then convert if needed\n",
-        "            temp_array = np.array(data)\n",
-        "            if dtype is None:\n",
-        "                # Keep NumPy's auto-detected type, but prefer common ML types\n",
-        "                if np.issubdtype(temp_array.dtype, np.integer):\n",
-        "                    dtype = 'int32'\n",
-        "                elif np.issubdtype(temp_array.dtype, np.floating):\n",
-        "                    dtype = 'float32'\n",
-        "                else:\n",
-        "                    dtype = temp_array.dtype\n",
-        "            self._data = temp_array.astype(dtype)\n",
-        "        elif isinstance(data, np.ndarray):\n",
-        "            self._data = data.astype(dtype or data.dtype)\n",
-        "        else:\n",
-        "            raise TypeError(f\"Cannot create tensor from {type(data)}\")\n",
-        "    \n",
-        "    @property\n",
-        "    def data(self) -> np.ndarray:\n",
-        "        \"\"\"Access underlying numpy array.\"\"\"\n",
-        "        return self._data\n",
-        "    \n",
-        "    @property\n",
-        "    def shape(self) -> Tuple[int, ...]:\n",
-        "        \"\"\"Get tensor shape.\"\"\"\n",
-        "        return self._data.shape\n",
-        "    \n",
-        "    @property\n",
-        "    def size(self) -> int:\n",
-        "        \"\"\"Get total number of elements.\"\"\"\n",
-        "        return self._data.size\n",
-        "    \n",
-        "    @property\n",
-        "    def dtype(self) -> np.dtype:\n",
-        "        \"\"\"Get data type as numpy dtype.\"\"\"\n",
-        "        return self._data.dtype\n",
-        "    \n",
-        "    def __repr__(self) -> str:\n",
-        "        \"\"\"String representation.\"\"\"\n",
-        "        return f\"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})\"\n",
-        "    \n",
-        "    def add(self, other: 'Tensor') -> 'Tensor':\n",
-        "        \"\"\"\n",
-        "        Add another tensor to this tensor.\n",
-        "        \n",
-        "        TODO: Implement tensor addition as a method.\n",
-        "        \n",
-        "        APPROACH:\n",
-        "        1. Use the add_tensors function you already implemented\n",
-        "        2. Or implement the addition directly using self._data + other._data\n",
-        "        3. Return a new Tensor with the result\n",
-        "        \n",
-        "        EXAMPLE:\n",
-        "        Tensor([1, 2, 3]).add(Tensor([4, 5, 6])) \u2192 Tensor([5, 7, 9])\n",
-        "        \n",
-        "        HINTS:\n",
-        "        - You can reuse add_tensors(self, other)\n",
-        "        - Or implement directly: Tensor(self._data + other._data)\n",
-        "        \"\"\"\n",
-        "        raise NotImplementedError(\"Student implementation required\")\n",
-        "    \n",
-        "    def multiply(self, other: 'Tensor') -> 'Tensor':\n",
-        "        \"\"\"\n",
-        "        Multiply this tensor by another tensor.\n",
-        "        \n",
-        "        TODO: Implement tensor multiplication as a method.\n",
-        "        \n",
-        "        APPROACH:\n",
-        "        1. Use the multiply_tensors function you already implemented\n",
-        "        2. Or implement the multiplication directly using self._data * other._data\n",
-        "        3. Return a new Tensor with the result\n",
-        "        \n",
-        "        EXAMPLE:\n",
-        "        Tensor([1, 2, 3]).multiply(Tensor([4, 5, 6])) \u2192 Tensor([4, 10, 18])\n",
-        "        \n",
-        "        HINTS:\n",
-        "        - You can reuse multiply_tensors(self, other)\n",
-        "        - Or implement directly: Tensor(self._data * other._data)\n",
-        "        \"\"\"\n",
-        "        raise NotImplementedError(\"Student implementation required\")\n",
-        "    \n",
-        "    # Arithmetic operators for natural syntax (a + b, a * b, etc.)\n",
-        "    def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Addition: tensor + other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data + other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data + other)\n",
-        "    \n",
-        "    def __radd__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse addition: scalar + tensor\"\"\"\n",
-        "        return Tensor(other + self._data)\n",
-        "    \n",
-        "    def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Subtraction: tensor - other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data - other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data - other)\n",
-        "    \n",
-        "    def __rsub__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse subtraction: scalar - tensor\"\"\"\n",
-        "        return Tensor(other - self._data)\n",
-        "    \n",
-        "    def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Multiplication: tensor * other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data * other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data * other)\n",
-        "    \n",
-        "    def __rmul__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse multiplication: scalar * tensor\"\"\"\n",
-        "        return Tensor(other * self._data)\n",
-        "    \n",
-        "    def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Division: tensor / other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data / other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data / other)\n",
-        "    \n",
-        "    def __rtruediv__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse division: scalar / tensor\"\"\"\n",
-        "        return Tensor(other / self._data)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "ea197d3d",
-      "metadata": {
-        "cell_marker": "\"\"\""
-      },
-      "source": [
-        "### \ud83e\uddea Test Your Tensor Class\n",
-        "\n",
-        "Once you implement the Tensor class above, run this cell to test it:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "e8a11c8f",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Test basic tensor creation\n",
-        "print(\"Testing Tensor creation...\")\n",
-        "\n",
-        "try:\n",
-        "    # Test scalar\n",
-        "    t1 = Tensor(5)\n",
-        "    print(f\"\u2705 Scalar: {t1} (shape: {t1.shape}, size: {t1.size})\")\n",
-        "    \n",
-        "    # Test vector\n",
-        "    t2 = Tensor([1, 2, 3, 4])\n",
-        "    print(f\"\u2705 Vector: {t2} (shape: {t2.shape}, size: {t2.size})\")\n",
-        "    \n",
-        "    # Test matrix\n",
-        "    t3 = Tensor([[1, 2], [3, 4]])\n",
-        "    print(f\"\u2705 Matrix: {t3} (shape: {t3.shape}, size: {t3.size})\")\n",
-        "    \n",
-        "    # Test numpy array\n",
-        "    t4 = Tensor(np.array([1.0, 2.0, 3.0]))\n",
-        "    print(f\"\u2705 Numpy: {t4} (shape: {t4.shape}, size: {t4.size})\")\n",
-        "    \n",
-        "    # Test dtype\n",
-        "    t5 = Tensor([1, 2, 3], dtype='float32')\n",
-        "    print(f\"\u2705 Dtype: {t5} (dtype: {t5.dtype})\")\n",
-        "    \n",
-        "    print(\"\\n\ud83c\udf89 All basic tests passed! Your Tensor class is working!\")\n",
-        "    \n",
-        "except Exception as e:\n",
-        "    print(f\"\u274c Error: {e}\")\n",
-        "    print(\"Make sure to implement all the required methods!\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "a025408c",
-      "metadata": {
-        "cell_marker": "\"\"\"",
-        "lines_to_next_cell": 1
-      },
-      "source": [
-        "## Step 2: Tensor Arithmetic Operations\n",
-        "\n",
-        "Now let's add the ability to perform mathematical operations on tensors. This is where tensors become powerful for ML!\n",
-        "\n",
-        "### Why Arithmetic Matters\n",
-        "- **Neural networks** perform millions of arithmetic operations\n",
-        "- **Gradients** require addition, multiplication, and other operations\n",
-        "- **Batch processing** needs element-wise operations\n",
-        "- **GPU acceleration** works with parallel arithmetic\n",
-        "\n",
-        "### Types of Operations\n",
-        "1. **Element-wise**: Add, subtract, multiply, divide\n",
-        "2. **Broadcasting**: Operations between different shapes\n",
-        "3. **Matrix operations**: Matrix multiplication (later)\n",
-        "4. **Reduction**: Sum, mean, max, min (later)\n",
-        "\n",
-        "Let's start with the basics!"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "2b3a5c33",
-      "metadata": {
-        "lines_to_next_cell": 1
-      },
-      "outputs": [],
-      "source": [
-        "#| export\n",
-        "def add_tensors(a: Tensor, b: Tensor) -> Tensor:\n",
-        "    \"\"\"\n",
-        "    Add two tensors element-wise.\n",
-        "    \n",
-        "    TODO: Implement element-wise addition of two tensors.\n",
-        "    \n",
-        "    APPROACH:\n",
-        "    1. Extract the numpy arrays from both tensors\n",
-        "    2. Use NumPy's + operator for element-wise addition\n",
-        "    3. Return a new Tensor with the result\n",
-        "    \n",
-        "    EXAMPLE:\n",
-        "    add_tensors(Tensor([1, 2, 3]), Tensor([4, 5, 6])) \n",
-        "    \u2192 Tensor([5, 7, 9])\n",
-        "    \n",
-        "    HINTS:\n",
-        "    - Use a.data and b.data to get the numpy arrays\n",
-        "    - NumPy handles broadcasting automatically\n",
-        "    - Return Tensor(result) to wrap the result\n",
-        "    \"\"\"\n",
-        "    raise NotImplementedError(\"Student implementation required\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "b3a85505",
-      "metadata": {
-        "lines_to_next_cell": 1
-      },
-      "outputs": [],
-      "source": [
-        "#| hide\n",
-        "#| export\n",
-        "def add_tensors(a: Tensor, b: Tensor) -> Tensor:\n",
-        "    \"\"\"Add two tensors element-wise.\"\"\"\n",
-        "    return Tensor(a.data + b.data)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "34940b0b",
-      "metadata": {
-        "lines_to_next_cell": 1
-      },
-      "outputs": [],
-      "source": [
-        "#| export\n",
-        "def multiply_tensors(a: Tensor, b: Tensor) -> Tensor:\n",
-        "    \"\"\"\n",
-        "    Multiply two tensors element-wise.\n",
-        "    \n",
-        "    TODO: Implement element-wise multiplication of two tensors.\n",
-        "    \n",
-        "    APPROACH:\n",
-        "    1. Extract the numpy arrays from both tensors\n",
-        "    2. Use NumPy's * operator for element-wise multiplication\n",
-        "    3. Return a new Tensor with the result\n",
-        "    \n",
-        "    EXAMPLE:\n",
-        "    multiply_tensors(Tensor([1, 2, 3]), Tensor([4, 5, 6])) \n",
-        "    \u2192 Tensor([4, 10, 18])\n",
-        "    \n",
-        "    HINTS:\n",
-        "    - Use a.data and b.data to get the numpy arrays\n",
-        "    - NumPy handles broadcasting automatically\n",
-        "    - Return Tensor(result) to wrap the result\n",
-        "    \"\"\"\n",
-        "    raise NotImplementedError(\"Student implementation required\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "fa876776",
-      "metadata": {
-        "lines_to_next_cell": 1
-      },
-      "outputs": [],
-      "source": [
-        "#| hide\n",
-        "#| export\n",
-        "def multiply_tensors(a: Tensor, b: Tensor) -> Tensor:\n",
-        "    \"\"\"Multiply two tensors element-wise.\"\"\"\n",
-        "    return Tensor(a.data * b.data)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "20c7fdef",
-      "metadata": {
-        "cell_marker": "\"\"\""
-      },
-      "source": [
-        "### \ud83e\uddea Test Your Arithmetic Operations"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "1dd1a6b7",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Test arithmetic operations\n",
-        "print(\"Testing tensor arithmetic...\")\n",
-        "\n",
-        "try:\n",
-        "    # Test addition\n",
-        "    a = Tensor([1, 2, 3])\n",
-        "    b = Tensor([4, 5, 6])\n",
-        "    c = add_tensors(a, b)\n",
-        "    print(f\"\u2705 Addition: {a} + {b} = {c}\")\n",
-        "    \n",
-        "    # Test multiplication\n",
-        "    d = multiply_tensors(a, b)\n",
-        "    print(f\"\u2705 Multiplication: {a} * {b} = {d}\")\n",
-        "    \n",
-        "    # Test broadcasting (scalar + tensor)\n",
-        "    scalar = Tensor(10)\n",
-        "    e = add_tensors(scalar, a)\n",
-        "    print(f\"\u2705 Broadcasting: {scalar} + {a} = {e}\")\n",
-        "    \n",
-        "    print(\"\\n\ud83c\udf89 All arithmetic tests passed!\")\n",
-        "    \n",
-        "except Exception as e:\n",
-        "    print(f\"\u274c Error: {e}\")\n",
-        "    print(\"Make sure to implement add_tensors and multiply_tensors!\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "a88c025e",
-      "metadata": {
-        "cell_marker": "\"\"\"",
-        "lines_to_next_cell": 1
-      },
-      "source": [
-        "## Step 3: Tensor Methods (Object-Oriented Approach)\n",
-        "\n",
-        "Now let's add methods to the Tensor class itself. This makes the API more intuitive and similar to PyTorch.\n",
-        "\n",
-        "### Why Methods Matter\n",
-        "- **Cleaner API**: `tensor.add(other)` instead of `add_tensors(tensor, other)`\n",
-        "- **Method chaining**: `tensor.add(other).multiply(scalar)`\n",
-        "- **Consistency**: Similar to PyTorch's tensor methods\n",
-        "- **Object-oriented**: Encapsulates operations with data"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "7c61792b",
-      "metadata": {
-        "lines_to_next_cell": 1
-      },
-      "outputs": [],
-      "source": [
-        "#| export\n",
-        "class Tensor:\n",
-        "    \"\"\"\n",
-        "    TinyTorch Tensor: N-dimensional array with ML operations.\n",
-        "    \n",
-        "    The fundamental data structure for all TinyTorch operations.\n",
-        "    Wraps NumPy arrays with ML-specific functionality.\n",
-        "    \"\"\"\n",
-        "    \n",
-        "    def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None):\n",
-        "        \"\"\"\n",
-        "        Create a new tensor from data.\n",
-        "        \n",
-        "        Args:\n",
-        "            data: Input data (scalar, list, or numpy array)\n",
-        "            dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.\n",
-        "        \"\"\"\n",
-        "        # Convert input to numpy array\n",
-        "        if isinstance(data, (int, float, np.number)):\n",
-        "            # Handle Python and NumPy scalars\n",
-        "            if dtype is None:\n",
-        "                # Auto-detect type: int for integers, float32 for floats\n",
-        "                if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)):\n",
-        "                    dtype = 'int32'\n",
-        "                else:\n",
-        "                    dtype = 'float32'\n",
-        "            self._data = np.array(data, dtype=dtype)\n",
-        "        elif isinstance(data, list):\n",
-        "            # Let NumPy auto-detect type, then convert if needed\n",
-        "            temp_array = np.array(data)\n",
-        "            if dtype is None:\n",
-        "                # Keep NumPy's auto-detected type, but prefer common ML types\n",
-        "                if np.issubdtype(temp_array.dtype, np.integer):\n",
-        "                    dtype = 'int32'\n",
-        "                elif np.issubdtype(temp_array.dtype, np.floating):\n",
-        "                    dtype = 'float32'\n",
-        "                else:\n",
-        "                    dtype = temp_array.dtype\n",
-        "            self._data = temp_array.astype(dtype)\n",
-        "        elif isinstance(data, np.ndarray):\n",
-        "            self._data = data.astype(dtype or data.dtype)\n",
-        "        else:\n",
-        "            raise TypeError(f\"Cannot create tensor from {type(data)}\")\n",
-        "    \n",
-        "    @property\n",
-        "    def data(self) -> np.ndarray:\n",
-        "        \"\"\"Access underlying numpy array.\"\"\"\n",
-        "        return self._data\n",
-        "    \n",
-        "    @property\n",
-        "    def shape(self) -> Tuple[int, ...]:\n",
-        "        \"\"\"Get tensor shape.\"\"\"\n",
-        "        return self._data.shape\n",
-        "    \n",
-        "    @property\n",
-        "    def size(self) -> int:\n",
-        "        \"\"\"Get total number of elements.\"\"\"\n",
-        "        return self._data.size\n",
-        "    \n",
-        "    @property\n",
-        "    def dtype(self) -> np.dtype:\n",
-        "        \"\"\"Get data type as numpy dtype.\"\"\"\n",
-        "        return self._data.dtype\n",
-        "    \n",
-        "    def __repr__(self) -> str:\n",
-        "        \"\"\"String representation.\"\"\"\n",
-        "        return f\"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})\"\n",
-        "    \n",
-        "    def add(self, other: 'Tensor') -> 'Tensor':\n",
-        "        \"\"\"\n",
-        "        Add another tensor to this tensor.\n",
-        "        \n",
-        "        TODO: Implement tensor addition as a method.\n",
-        "        \n",
-        "        APPROACH:\n",
-        "        1. Use the add_tensors function you already implemented\n",
-        "        2. Or implement the addition directly using self._data + other._data\n",
-        "        3. Return a new Tensor with the result\n",
-        "        \n",
-        "        EXAMPLE:\n",
-        "        Tensor([1, 2, 3]).add(Tensor([4, 5, 6])) \u2192 Tensor([5, 7, 9])\n",
-        "        \n",
-        "        HINTS:\n",
-        "        - You can reuse add_tensors(self, other)\n",
-        "        - Or implement directly: Tensor(self._data + other._data)\n",
-        "        \"\"\"\n",
-        "        raise NotImplementedError(\"Student implementation required\")\n",
-        "    \n",
-        "    def multiply(self, other: 'Tensor') -> 'Tensor':\n",
-        "        \"\"\"\n",
-        "        Multiply this tensor by another tensor.\n",
-        "        \n",
-        "        TODO: Implement tensor multiplication as a method.\n",
-        "        \n",
-        "        APPROACH:\n",
-        "        1. Use the multiply_tensors function you already implemented\n",
-        "        2. Or implement the multiplication directly using self._data * other._data\n",
-        "        3. Return a new Tensor with the result\n",
-        "        \n",
-        "        EXAMPLE:\n",
-        "        Tensor([1, 2, 3]).multiply(Tensor([4, 5, 6])) \u2192 Tensor([4, 10, 18])\n",
-        "        \n",
-        "        HINTS:\n",
-        "        - You can reuse multiply_tensors(self, other)\n",
-        "        - Or implement directly: Tensor(self._data * other._data)\n",
-        "        \"\"\"\n",
-        "        raise NotImplementedError(\"Student implementation required\")\n",
-        "    \n",
-        "    # Arithmetic operators for natural syntax (a + b, a * b, etc.)\n",
-        "    def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Addition: tensor + other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data + other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data + other)\n",
-        "    \n",
-        "    def __radd__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse addition: scalar + tensor\"\"\"\n",
-        "        return Tensor(other + self._data)\n",
-        "    \n",
-        "    def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Subtraction: tensor - other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data - other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data - other)\n",
-        "    \n",
-        "    def __rsub__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse subtraction: scalar - tensor\"\"\"\n",
-        "        return Tensor(other - self._data)\n",
-        "    \n",
-        "    def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Multiplication: tensor * other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data * other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data * other)\n",
-        "    \n",
-        "    def __rmul__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse multiplication: scalar * tensor\"\"\"\n",
-        "        return Tensor(other * self._data)\n",
-        "    \n",
-        "    def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Division: tensor / other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data / other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data / other)\n",
-        "    \n",
-        "    def __rtruediv__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse division: scalar / tensor\"\"\"\n",
-        "        return Tensor(other / self._data)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "cf7db8ff",
-      "metadata": {
-        "lines_to_next_cell": 1
-      },
-      "outputs": [],
-      "source": [
-        "#| hide\n",
-        "#| export\n",
-        "class Tensor:\n",
-        "    \"\"\"\n",
-        "    TinyTorch Tensor: N-dimensional array with ML operations.\n",
-        "    \n",
-        "    The fundamental data structure for all TinyTorch operations.\n",
-        "    Wraps NumPy arrays with ML-specific functionality.\n",
-        "    \"\"\"\n",
-        "    \n",
-        "    def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None):\n",
-        "        \"\"\"\n",
-        "        Create a new tensor from data.\n",
-        "        \n",
-        "        Args:\n",
-        "            data: Input data (scalar, list, or numpy array)\n",
-        "            dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.\n",
-        "        \"\"\"\n",
-        "        # Convert input to numpy array\n",
-        "        if isinstance(data, (int, float, np.number)):\n",
-        "            # Handle Python and NumPy scalars\n",
-        "            if dtype is None:\n",
-        "                # Auto-detect type: int for integers, float32 for floats\n",
-        "                if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)):\n",
-        "                    dtype = 'int32'\n",
-        "                else:\n",
-        "                    dtype = 'float32'\n",
-        "            self._data = np.array(data, dtype=dtype)\n",
-        "        elif isinstance(data, list):\n",
-        "            # Let NumPy auto-detect type, then convert if needed\n",
-        "            temp_array = np.array(data)\n",
-        "            if dtype is None:\n",
-        "                # Keep NumPy's auto-detected type, but prefer common ML types\n",
-        "                if np.issubdtype(temp_array.dtype, np.integer):\n",
-        "                    dtype = 'int32'\n",
-        "                elif np.issubdtype(temp_array.dtype, np.floating):\n",
-        "                    dtype = 'float32'\n",
-        "                else:\n",
-        "                    dtype = temp_array.dtype\n",
-        "            self._data = temp_array.astype(dtype)\n",
-        "        elif isinstance(data, np.ndarray):\n",
-        "            self._data = data.astype(dtype or data.dtype)\n",
-        "        else:\n",
-        "            raise TypeError(f\"Cannot create tensor from {type(data)}\")\n",
-        "    \n",
-        "    @property\n",
-        "    def data(self) -> np.ndarray:\n",
-        "        \"\"\"Access underlying numpy array.\"\"\"\n",
-        "        return self._data\n",
-        "    \n",
-        "    @property\n",
-        "    def shape(self) -> Tuple[int, ...]:\n",
-        "        \"\"\"Get tensor shape.\"\"\"\n",
-        "        return self._data.shape\n",
-        "    \n",
-        "    @property\n",
-        "    def size(self) -> int:\n",
-        "        \"\"\"Get total number of elements.\"\"\"\n",
-        "        return self._data.size\n",
-        "    \n",
-        "    @property\n",
-        "    def dtype(self) -> np.dtype:\n",
-        "        \"\"\"Get data type as numpy dtype.\"\"\"\n",
-        "        return self._data.dtype\n",
-        "    \n",
-        "    def __repr__(self) -> str:\n",
-        "        \"\"\"String representation.\"\"\"\n",
-        "        return f\"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})\"\n",
-        "    \n",
-        "    def add(self, other: 'Tensor') -> 'Tensor':\n",
-        "        \"\"\"Add another tensor to this tensor.\"\"\"\n",
-        "        return Tensor(self._data + other._data)\n",
-        "    \n",
-        "    def multiply(self, other: 'Tensor') -> 'Tensor':\n",
-        "        \"\"\"Multiply this tensor by another tensor.\"\"\"\n",
-        "        return Tensor(self._data * other._data)\n",
-        "    \n",
-        "    # Arithmetic operators for natural syntax (a + b, a * b, etc.)\n",
-        "    def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Addition: tensor + other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data + other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data + other)\n",
-        "    \n",
-        "    def __radd__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse addition: scalar + tensor\"\"\"\n",
-        "        return Tensor(other + self._data)\n",
-        "    \n",
-        "    def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Subtraction: tensor - other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data - other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data - other)\n",
-        "    \n",
-        "    def __rsub__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse subtraction: scalar - tensor\"\"\"\n",
-        "        return Tensor(other - self._data)\n",
-        "    \n",
-        "    def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Multiplication: tensor * other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data * other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data * other)\n",
-        "    \n",
-        "    def __rmul__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse multiplication: scalar * tensor\"\"\"\n",
-        "        return Tensor(other * self._data)\n",
-        "    \n",
-        "    def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-        "        \"\"\"Division: tensor / other\"\"\"\n",
-        "        if isinstance(other, Tensor):\n",
-        "            return Tensor(self._data / other._data)\n",
-        "        else:  # scalar\n",
-        "            return Tensor(self._data / other)\n",
-        "    \n",
-        "    def __rtruediv__(self, other: Union[int, float]) -> 'Tensor':\n",
-        "        \"\"\"Reverse division: scalar / tensor\"\"\"\n",
-        "        return Tensor(other / self._data)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "53b9cc39",
-      "metadata": {
-        "cell_marker": "\"\"\""
-      },
-      "source": [
-        "### \ud83e\uddea Test Your Tensor Methods"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "7793077f",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Test tensor methods\n",
-        "print(\"Testing tensor methods...\")\n",
-        "\n",
-        "try:\n",
-        "    # Test method-based operations\n",
-        "    a = Tensor([1, 2, 3])\n",
-        "    b = Tensor([4, 5, 6])\n",
-        "    \n",
-        "    c = a.add(b)\n",
-        "    print(f\"\u2705 Method addition: {a}.add({b}) = {c}\")\n",
-        "    \n",
-        "    d = a.multiply(b)\n",
-        "    print(f\"\u2705 Method multiplication: {a}.multiply({b}) = {d}\")\n",
-        "    \n",
-        "    # Test method chaining\n",
-        "    e = a.add(b).multiply(Tensor(2))\n",
-        "    print(f\"\u2705 Method chaining: {a}.add({b}).multiply(2) = {e}\")\n",
-        "    \n",
-        "    print(\"\\n\ud83c\udf89 All method tests passed!\")\n",
-        "    \n",
-        "except Exception as e:\n",
-        "    print(f\"\u274c Error: {e}\")\n",
-        "    print(\"Make sure to implement the add and multiply methods!\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "5dad35d0",
-      "metadata": {
-        "cell_marker": "\"\"\""
-      },
-      "source": [
-        "## \ud83c\udfaf Module Summary\n",
-        "\n",
-        "Congratulations! You've built the foundation of TinyTorch:\n",
-        "\n",
-        "### What You've Accomplished\n",
-        "\u2705 **Tensor Creation**: Handle scalars, lists, and numpy arrays  \n",
-        "\u2705 **Properties**: Access shape, size, and data type  \n",
-        "\u2705 **Arithmetic**: Element-wise addition and multiplication  \n",
-        "\u2705 **Methods**: Object-oriented API for operations  \n",
-        "\u2705 **Testing**: Immediate feedback on your implementation  \n",
-        "\n",
-        "### Key Concepts You've Learned\n",
-        "- **Tensors** are N-dimensional arrays with ML operations\n",
-        "- **NumPy integration** provides efficient computation\n",
-        "- **Element-wise operations** work on corresponding elements\n",
-        "- **Broadcasting** automatically handles different shapes\n",
-        "- **Object-oriented design** makes APIs intuitive\n",
-        "\n",
-        "### What's Next\n",
-        "In the next modules, you'll build on this foundation:\n",
-        "- **Layers**: Transform tensors with weights and biases\n",
-        "- **Activations**: Add nonlinearity to your networks\n",
-        "- **Networks**: Compose layers into complete models\n",
-        "- **Training**: Learn parameters with gradients and optimization\n",
-        "\n",
-        "### Real-World Connection\n",
-        "Your Tensor class is now ready to:\n",
-        "- Store neural network weights and biases\n",
-        "- Process batches of data efficiently\n",
-        "- Handle different data types (images, text, audio)\n",
-        "- Integrate with the rest of the TinyTorch ecosystem\n",
-        "\n",
-        "**Ready for the next challenge?** Let's move on to building layers that can transform your tensors!"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "cdd7d21e",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Final verification\n",
-        "print(\"\\n\" + \"=\"*50)\n",
-        "print(\"\ud83c\udf89 TENSOR MODULE COMPLETE!\")\n",
-        "print(\"=\"*50)\n",
-        "print(\"\u2705 Tensor creation and properties\")\n",
-        "print(\"\u2705 Arithmetic operations\")\n",
-        "print(\"\u2705 Method-based API\")\n",
-        "print(\"\u2705 Comprehensive testing\")\n",
-        "print(\"\\n\ud83d\ude80 Ready to build layers in the next module!\") "
-      ]
-    }
-  ],
-  "metadata": {
-    "jupytext": {
-      "main_language": "python"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 5
-}
\ No newline at end of file
diff --git a/assignments/source/01_tensor/tensor_dev.ipynb b/assignments/source/01_tensor/tensor_dev.ipynb
deleted file mode 100644
index 1f442896..00000000
--- a/assignments/source/01_tensor/tensor_dev.ipynb
+++ /dev/null
@@ -1,1047 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "6186cbf0",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "# Module 1: Tensor - Core Data Structure\n",
-    "\n",
-    "Welcome to the Tensor module! This is where TinyTorch really begins. You'll implement the fundamental data structure that powers all ML systems.\n",
-    "\n",
-    "## Learning Goals\n",
-    "- Understand tensors as N-dimensional arrays with ML-specific operations\n",
-    "- Implement a complete Tensor class with arithmetic operations\n",
-    "- Handle shape management, data types, and memory layout\n",
-    "- Build the foundation for neural networks and automatic differentiation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c88b6562",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 📦 Where This Code Lives in the Final Package\n",
-    "\n",
-    "**Learning Side:** You work in `assignments/source/01_tensor/tensor_dev.py`  \n",
-    "**Building Side:** Code exports to `tinytorch.core.tensor`\n",
-    "\n",
-    "```python\n",
-    "# Final package structure:\n",
-    "from tinytorch.core.tensor import Tensor\n",
-    "from tinytorch.core.layers import Dense, Conv2D\n",
-    "from tinytorch.core.activations import ReLU, Sigmoid, Tanh\n",
-    "```\n",
-    "\n",
-    "**Why this matters:**\n",
-    "- **Learning:** Focused modules for deep understanding\n",
-    "- **Production:** Proper organization like PyTorch's `torch.tensor`\n",
-    "- **Consistency:** Core data structure lives in `core.tensor`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ff79eeec",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp core.tensor\n",
-    "\n",
-    "# Setup and imports\n",
-    "import numpy as np\n",
-    "import sys\n",
-    "from typing import Union, List, Tuple, Optional, Any\n",
-    "\n",
-    "print(\"🔥 TinyTorch Tensor Module\")\n",
-    "print(f\"NumPy version: {np.__version__}\")\n",
-    "print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n",
-    "print(\"Ready to build tensors!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "aacde3bd",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 1: What is a Tensor?\n",
-    "\n",
-    "### Definition\n",
-    "A **tensor** is an N-dimensional array with ML-specific operations. Think of it as a container that can hold data in multiple dimensions:\n",
-    "\n",
-    "- **Scalar** (0D): A single number - `5.0`\n",
-    "- **Vector** (1D): A list of numbers - `[1, 2, 3]`  \n",
-    "- **Matrix** (2D): A 2D array - `[[1, 2], [3, 4]]`\n",
-    "- **Higher dimensions**: 3D, 4D, etc. for images, video, batches\n",
-    "\n",
-    "### Why Tensors Matter in ML\n",
-    "Tensors are the foundation of all machine learning because:\n",
-    "- **Neural networks** process tensors (images, text, audio)\n",
-    "- **Batch processing** requires multiple samples at once\n",
-    "- **GPU acceleration** works efficiently with tensors\n",
-    "- **Automatic differentiation** needs structured data\n",
-    "\n",
-    "### Real-World Examples\n",
-    "- **Image**: 3D tensor `(height, width, channels)` - `(224, 224, 3)` for RGB images\n",
-    "- **Batch of images**: 4D tensor `(batch_size, height, width, channels)` - `(32, 224, 224, 3)`\n",
-    "- **Text**: 2D tensor `(sequence_length, embedding_dim)` - `(100, 768)` for BERT embeddings\n",
-    "- **Audio**: 2D tensor `(time_steps, features)` - `(16000, 1)` for 1 second of audio\n",
-    "\n",
-    "### Why Not Just Use NumPy?\n",
-    "We will use NumPy internally, but our Tensor class adds:\n",
-    "- **ML-specific operations** (later: gradients, GPU support)\n",
-    "- **Consistent API** for neural networks\n",
-    "- **Type safety** and error checking\n",
-    "- **Integration** with the rest of TinyTorch\n",
-    "\n",
-    "### Visual Intuition\n",
-    "```\n",
-    "Scalar (0D):    5.0\n",
-    "Vector (1D):    [1, 2, 3, 4]\n",
-    "Matrix (2D):    [[1, 2, 3],\n",
-    "                 [4, 5, 6]]\n",
-    "3D Tensor:      [[[1, 2], [3, 4]],\n",
-    "                 [[5, 6], [7, 8]]]\n",
-    "```\n",
-    "\n",
-    "Let's start building!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2dc8771d",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Tensor:\n",
-    "    \"\"\"\n",
-    "    TinyTorch Tensor: N-dimensional array with ML operations.\n",
-    "    \n",
-    "    The fundamental data structure for all TinyTorch operations.\n",
-    "    Wraps NumPy arrays with ML-specific functionality.\n",
-    "    \n",
-    "    TODO: Implement the core Tensor class with data handling and properties.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Store the input data as a NumPy array internally\n",
-    "    2. Handle different input types (scalars, lists, numpy arrays)\n",
-    "    3. Implement properties to access shape, size, and data type\n",
-    "    4. Create a clear string representation\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    Input: Tensor([1, 2, 3])\n",
-    "    Expected: Tensor with shape (3,), size 3, dtype int32\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use NumPy's np.array() to convert inputs\n",
-    "    - Handle dtype parameter for type conversion\n",
-    "    - Store the array in a private attribute like self._data\n",
-    "    - Properties should return information about the stored array\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None):\n",
-    "        \"\"\"\n",
-    "        Create a new tensor from data.\n",
-    "        \n",
-    "        Args:\n",
-    "            data: Input data (scalar, list, or numpy array)\n",
-    "            dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.\n",
-    "            \n",
-    "        TODO: Implement tensor creation with proper type handling.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Check if data is a scalar (int/float) - convert to numpy array\n",
-    "        2. Check if data is a list - convert to numpy array  \n",
-    "        3. Check if data is already a numpy array - use as-is\n",
-    "        4. Apply dtype conversion if specified\n",
-    "        5. Store the result in self._data\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Tensor(5) → stores np.array(5)\n",
-    "        Tensor([1, 2, 3]) → stores np.array([1, 2, 3])\n",
-    "        Tensor(np.array([1, 2, 3])) → stores the array directly\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    @property\n",
-    "    def data(self) -> np.ndarray:\n",
-    "        \"\"\"\n",
-    "        Access underlying numpy array.\n",
-    "        \n",
-    "        TODO: Return the stored numpy array.\n",
-    "        \n",
-    "        HINT: Return self._data (the array you stored in __init__)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    @property\n",
-    "    def shape(self) -> Tuple[int, ...]:\n",
-    "        \"\"\"\n",
-    "        Get tensor shape.\n",
-    "        \n",
-    "        TODO: Return the shape of the stored numpy array.\n",
-    "        \n",
-    "        HINT: Use .shape attribute of the numpy array\n",
-    "        EXAMPLE: Tensor([1, 2, 3]).shape should return (3,)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    @property\n",
-    "    def size(self) -> int:\n",
-    "        \"\"\"\n",
-    "        Get total number of elements.\n",
-    "        \n",
-    "        TODO: Return the total number of elements in the tensor.\n",
-    "        \n",
-    "        HINT: Use .size attribute of the numpy array\n",
-    "        EXAMPLE: Tensor([1, 2, 3]).size should return 3\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    @property\n",
-    "    def dtype(self) -> np.dtype:\n",
-    "        \"\"\"\n",
-    "        Get data type as numpy dtype.\n",
-    "        \n",
-    "        TODO: Return the data type of the stored numpy array.\n",
-    "        \n",
-    "        HINT: Use .dtype attribute of the numpy array\n",
-    "        EXAMPLE: Tensor([1, 2, 3]).dtype should return dtype('int32')\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __repr__(self) -> str:\n",
-    "        \"\"\"\n",
-    "        String representation.\n",
-    "        \n",
-    "        TODO: Create a clear string representation of the tensor.\n",
-    "        \n",
-    "        APPROACH:\n",
-    "        1. Convert the numpy array to a list for readable output\n",
-    "        2. Include the shape and dtype information\n",
-    "        3. Format: \"Tensor([data], shape=shape, dtype=dtype)\"\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Tensor([1, 2, 3]) → \"Tensor([1, 2, 3], shape=(3,), dtype=int32)\"\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "707dd61c",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class Tensor:\n",
-    "    \"\"\"\n",
-    "    TinyTorch Tensor: N-dimensional array with ML operations.\n",
-    "    \n",
-    "    The fundamental data structure for all TinyTorch operations.\n",
-    "    Wraps NumPy arrays with ML-specific functionality.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None):\n",
-    "        \"\"\"\n",
-    "        Create a new tensor from data.\n",
-    "        \n",
-    "        Args:\n",
-    "            data: Input data (scalar, list, or numpy array)\n",
-    "            dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.\n",
-    "        \"\"\"\n",
-    "        # Convert input to numpy array\n",
-    "        if isinstance(data, (int, float, np.number)):\n",
-    "            # Handle Python and NumPy scalars\n",
-    "            if dtype is None:\n",
-    "                # Auto-detect type: int for integers, float32 for floats\n",
-    "                if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)):\n",
-    "                    dtype = 'int32'\n",
-    "                else:\n",
-    "                    dtype = 'float32'\n",
-    "            self._data = np.array(data, dtype=dtype)\n",
-    "        elif isinstance(data, list):\n",
-    "            # Let NumPy auto-detect type, then convert if needed\n",
-    "            temp_array = np.array(data)\n",
-    "            if dtype is None:\n",
-    "                # Keep NumPy's auto-detected type, but prefer common ML types\n",
-    "                if np.issubdtype(temp_array.dtype, np.integer):\n",
-    "                    dtype = 'int32'\n",
-    "                elif np.issubdtype(temp_array.dtype, np.floating):\n",
-    "                    dtype = 'float32'\n",
-    "                else:\n",
-    "                    dtype = temp_array.dtype\n",
-    "            self._data = temp_array.astype(dtype)\n",
-    "        elif isinstance(data, np.ndarray):\n",
-    "            self._data = data.astype(dtype or data.dtype)\n",
-    "        else:\n",
-    "            raise TypeError(f\"Cannot create tensor from {type(data)}\")\n",
-    "    \n",
-    "    @property\n",
-    "    def data(self) -> np.ndarray:\n",
-    "        \"\"\"Access underlying numpy array.\"\"\"\n",
-    "        return self._data\n",
-    "    \n",
-    "    @property\n",
-    "    def shape(self) -> Tuple[int, ...]:\n",
-    "        \"\"\"Get tensor shape.\"\"\"\n",
-    "        return self._data.shape\n",
-    "    \n",
-    "    @property\n",
-    "    def size(self) -> int:\n",
-    "        \"\"\"Get total number of elements.\"\"\"\n",
-    "        return self._data.size\n",
-    "    \n",
-    "    @property\n",
-    "    def dtype(self) -> np.dtype:\n",
-    "        \"\"\"Get data type as numpy dtype.\"\"\"\n",
-    "        return self._data.dtype\n",
-    "    \n",
-    "    def __repr__(self) -> str:\n",
-    "        \"\"\"String representation.\"\"\"\n",
-    "        return f\"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})\"\n",
-    "    \n",
-    "    def add(self, other: 'Tensor') -> 'Tensor':\n",
-    "        \"\"\"\n",
-    "        Add another tensor to this tensor.\n",
-    "        \n",
-    "        TODO: Implement tensor addition as a method.\n",
-    "        \n",
-    "        APPROACH:\n",
-    "        1. Use the add_tensors function you already implemented\n",
-    "        2. Or implement the addition directly using self._data + other._data\n",
-    "        3. Return a new Tensor with the result\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Tensor([1, 2, 3]).add(Tensor([4, 5, 6])) → Tensor([5, 7, 9])\n",
-    "        \n",
-    "        HINTS:\n",
-    "        - You can reuse add_tensors(self, other)\n",
-    "        - Or implement directly: Tensor(self._data + other._data)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def multiply(self, other: 'Tensor') -> 'Tensor':\n",
-    "        \"\"\"\n",
-    "        Multiply this tensor by another tensor.\n",
-    "        \n",
-    "        TODO: Implement tensor multiplication as a method.\n",
-    "        \n",
-    "        APPROACH:\n",
-    "        1. Use the multiply_tensors function you already implemented\n",
-    "        2. Or implement the multiplication directly using self._data * other._data\n",
-    "        3. Return a new Tensor with the result\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Tensor([1, 2, 3]).multiply(Tensor([4, 5, 6])) → Tensor([4, 10, 18])\n",
-    "        \n",
-    "        HINTS:\n",
-    "        - You can reuse multiply_tensors(self, other)\n",
-    "        - Or implement directly: Tensor(self._data * other._data)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    # Arithmetic operators for natural syntax (a + b, a * b, etc.)\n",
-    "    def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Addition: tensor + other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data + other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data + other)\n",
-    "    \n",
-    "    def __radd__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse addition: scalar + tensor\"\"\"\n",
-    "        return Tensor(other + self._data)\n",
-    "    \n",
-    "    def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Subtraction: tensor - other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data - other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data - other)\n",
-    "    \n",
-    "    def __rsub__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse subtraction: scalar - tensor\"\"\"\n",
-    "        return Tensor(other - self._data)\n",
-    "    \n",
-    "    def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Multiplication: tensor * other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data * other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data * other)\n",
-    "    \n",
-    "    def __rmul__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse multiplication: scalar * tensor\"\"\"\n",
-    "        return Tensor(other * self._data)\n",
-    "    \n",
-    "    def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Division: tensor / other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data / other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data / other)\n",
-    "    \n",
-    "    def __rtruediv__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse division: scalar / tensor\"\"\"\n",
-    "        return Tensor(other / self._data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ea197d3d",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Tensor Class\n",
-    "\n",
-    "Once you implement the Tensor class above, run this cell to test it:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e8a11c8f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test basic tensor creation\n",
-    "print(\"Testing Tensor creation...\")\n",
-    "\n",
-    "try:\n",
-    "    # Test scalar\n",
-    "    t1 = Tensor(5)\n",
-    "    print(f\"✅ Scalar: {t1} (shape: {t1.shape}, size: {t1.size})\")\n",
-    "    \n",
-    "    # Test vector\n",
-    "    t2 = Tensor([1, 2, 3, 4])\n",
-    "    print(f\"✅ Vector: {t2} (shape: {t2.shape}, size: {t2.size})\")\n",
-    "    \n",
-    "    # Test matrix\n",
-    "    t3 = Tensor([[1, 2], [3, 4]])\n",
-    "    print(f\"✅ Matrix: {t3} (shape: {t3.shape}, size: {t3.size})\")\n",
-    "    \n",
-    "    # Test numpy array\n",
-    "    t4 = Tensor(np.array([1.0, 2.0, 3.0]))\n",
-    "    print(f\"✅ Numpy: {t4} (shape: {t4.shape}, size: {t4.size})\")\n",
-    "    \n",
-    "    # Test dtype\n",
-    "    t5 = Tensor([1, 2, 3], dtype='float32')\n",
-    "    print(f\"✅ Dtype: {t5} (dtype: {t5.dtype})\")\n",
-    "    \n",
-    "    print(\"\\n🎉 All basic tests passed! Your Tensor class is working!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement all the required methods!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a025408c",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 2: Tensor Arithmetic Operations\n",
-    "\n",
-    "Now let's add the ability to perform mathematical operations on tensors. This is where tensors become powerful for ML!\n",
-    "\n",
-    "### Why Arithmetic Matters\n",
-    "- **Neural networks** perform millions of arithmetic operations\n",
-    "- **Gradients** require addition, multiplication, and other operations\n",
-    "- **Batch processing** needs element-wise operations\n",
-    "- **GPU acceleration** works with parallel arithmetic\n",
-    "\n",
-    "### Types of Operations\n",
-    "1. **Element-wise**: Add, subtract, multiply, divide\n",
-    "2. **Broadcasting**: Operations between different shapes\n",
-    "3. **Matrix operations**: Matrix multiplication (later)\n",
-    "4. **Reduction**: Sum, mean, max, min (later)\n",
-    "\n",
-    "Let's start with the basics!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2b3a5c33",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def add_tensors(a: Tensor, b: Tensor) -> Tensor:\n",
-    "    \"\"\"\n",
-    "    Add two tensors element-wise.\n",
-    "    \n",
-    "    TODO: Implement element-wise addition of two tensors.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Extract the numpy arrays from both tensors\n",
-    "    2. Use NumPy's + operator for element-wise addition\n",
-    "    3. Return a new Tensor with the result\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    add_tensors(Tensor([1, 2, 3]), Tensor([4, 5, 6])) \n",
-    "    → Tensor([5, 7, 9])\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use a.data and b.data to get the numpy arrays\n",
-    "    - NumPy handles broadcasting automatically\n",
-    "    - Return Tensor(result) to wrap the result\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b3a85505",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def add_tensors(a: Tensor, b: Tensor) -> Tensor:\n",
-    "    \"\"\"Add two tensors element-wise.\"\"\"\n",
-    "    return Tensor(a.data + b.data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "34940b0b",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def multiply_tensors(a: Tensor, b: Tensor) -> Tensor:\n",
-    "    \"\"\"\n",
-    "    Multiply two tensors element-wise.\n",
-    "    \n",
-    "    TODO: Implement element-wise multiplication of two tensors.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Extract the numpy arrays from both tensors\n",
-    "    2. Use NumPy's * operator for element-wise multiplication\n",
-    "    3. Return a new Tensor with the result\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    multiply_tensors(Tensor([1, 2, 3]), Tensor([4, 5, 6])) \n",
-    "    → Tensor([4, 10, 18])\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use a.data and b.data to get the numpy arrays\n",
-    "    - NumPy handles broadcasting automatically\n",
-    "    - Return Tensor(result) to wrap the result\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fa876776",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def multiply_tensors(a: Tensor, b: Tensor) -> Tensor:\n",
-    "    \"\"\"Multiply two tensors element-wise.\"\"\"\n",
-    "    return Tensor(a.data * b.data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "20c7fdef",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Arithmetic Operations"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1dd1a6b7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test arithmetic operations\n",
-    "print(\"Testing tensor arithmetic...\")\n",
-    "\n",
-    "try:\n",
-    "    # Test addition\n",
-    "    a = Tensor([1, 2, 3])\n",
-    "    b = Tensor([4, 5, 6])\n",
-    "    c = add_tensors(a, b)\n",
-    "    print(f\"✅ Addition: {a} + {b} = {c}\")\n",
-    "    \n",
-    "    # Test multiplication\n",
-    "    d = multiply_tensors(a, b)\n",
-    "    print(f\"✅ Multiplication: {a} * {b} = {d}\")\n",
-    "    \n",
-    "    # Test broadcasting (scalar + tensor)\n",
-    "    scalar = Tensor(10)\n",
-    "    e = add_tensors(scalar, a)\n",
-    "    print(f\"✅ Broadcasting: {scalar} + {a} = {e}\")\n",
-    "    \n",
-    "    print(\"\\n🎉 All arithmetic tests passed!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement add_tensors and multiply_tensors!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a88c025e",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 3: Tensor Methods (Object-Oriented Approach)\n",
-    "\n",
-    "Now let's add methods to the Tensor class itself. This makes the API more intuitive and similar to PyTorch.\n",
-    "\n",
-    "### Why Methods Matter\n",
-    "- **Cleaner API**: `tensor.add(other)` instead of `add_tensors(tensor, other)`\n",
-    "- **Method chaining**: `tensor.add(other).multiply(scalar)`\n",
-    "- **Consistency**: Similar to PyTorch's tensor methods\n",
-    "- **Object-oriented**: Encapsulates operations with data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7c61792b",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Tensor:\n",
-    "    \"\"\"\n",
-    "    TinyTorch Tensor: N-dimensional array with ML operations.\n",
-    "    \n",
-    "    The fundamental data structure for all TinyTorch operations.\n",
-    "    Wraps NumPy arrays with ML-specific functionality.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None):\n",
-    "        \"\"\"\n",
-    "        Create a new tensor from data.\n",
-    "        \n",
-    "        Args:\n",
-    "            data: Input data (scalar, list, or numpy array)\n",
-    "            dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.\n",
-    "        \"\"\"\n",
-    "        # Convert input to numpy array\n",
-    "        if isinstance(data, (int, float, np.number)):\n",
-    "            # Handle Python and NumPy scalars\n",
-    "            if dtype is None:\n",
-    "                # Auto-detect type: int for integers, float32 for floats\n",
-    "                if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)):\n",
-    "                    dtype = 'int32'\n",
-    "                else:\n",
-    "                    dtype = 'float32'\n",
-    "            self._data = np.array(data, dtype=dtype)\n",
-    "        elif isinstance(data, list):\n",
-    "            # Let NumPy auto-detect type, then convert if needed\n",
-    "            temp_array = np.array(data)\n",
-    "            if dtype is None:\n",
-    "                # Keep NumPy's auto-detected type, but prefer common ML types\n",
-    "                if np.issubdtype(temp_array.dtype, np.integer):\n",
-    "                    dtype = 'int32'\n",
-    "                elif np.issubdtype(temp_array.dtype, np.floating):\n",
-    "                    dtype = 'float32'\n",
-    "                else:\n",
-    "                    dtype = temp_array.dtype\n",
-    "            self._data = temp_array.astype(dtype)\n",
-    "        elif isinstance(data, np.ndarray):\n",
-    "            self._data = data.astype(dtype or data.dtype)\n",
-    "        else:\n",
-    "            raise TypeError(f\"Cannot create tensor from {type(data)}\")\n",
-    "    \n",
-    "    @property\n",
-    "    def data(self) -> np.ndarray:\n",
-    "        \"\"\"Access underlying numpy array.\"\"\"\n",
-    "        return self._data\n",
-    "    \n",
-    "    @property\n",
-    "    def shape(self) -> Tuple[int, ...]:\n",
-    "        \"\"\"Get tensor shape.\"\"\"\n",
-    "        return self._data.shape\n",
-    "    \n",
-    "    @property\n",
-    "    def size(self) -> int:\n",
-    "        \"\"\"Get total number of elements.\"\"\"\n",
-    "        return self._data.size\n",
-    "    \n",
-    "    @property\n",
-    "    def dtype(self) -> np.dtype:\n",
-    "        \"\"\"Get data type as numpy dtype.\"\"\"\n",
-    "        return self._data.dtype\n",
-    "    \n",
-    "    def __repr__(self) -> str:\n",
-    "        \"\"\"String representation.\"\"\"\n",
-    "        return f\"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})\"\n",
-    "    \n",
-    "    def add(self, other: 'Tensor') -> 'Tensor':\n",
-    "        \"\"\"\n",
-    "        Add another tensor to this tensor.\n",
-    "        \n",
-    "        TODO: Implement tensor addition as a method.\n",
-    "        \n",
-    "        APPROACH:\n",
-    "        1. Use the add_tensors function you already implemented\n",
-    "        2. Or implement the addition directly using self._data + other._data\n",
-    "        3. Return a new Tensor with the result\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Tensor([1, 2, 3]).add(Tensor([4, 5, 6])) → Tensor([5, 7, 9])\n",
-    "        \n",
-    "        HINTS:\n",
-    "        - You can reuse add_tensors(self, other)\n",
-    "        - Or implement directly: Tensor(self._data + other._data)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def multiply(self, other: 'Tensor') -> 'Tensor':\n",
-    "        \"\"\"\n",
-    "        Multiply this tensor by another tensor.\n",
-    "        \n",
-    "        TODO: Implement tensor multiplication as a method.\n",
-    "        \n",
-    "        APPROACH:\n",
-    "        1. Use the multiply_tensors function you already implemented\n",
-    "        2. Or implement the multiplication directly using self._data * other._data\n",
-    "        3. Return a new Tensor with the result\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Tensor([1, 2, 3]).multiply(Tensor([4, 5, 6])) → Tensor([4, 10, 18])\n",
-    "        \n",
-    "        HINTS:\n",
-    "        - You can reuse multiply_tensors(self, other)\n",
-    "        - Or implement directly: Tensor(self._data * other._data)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    # Arithmetic operators for natural syntax (a + b, a * b, etc.)\n",
-    "    def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Addition: tensor + other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data + other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data + other)\n",
-    "    \n",
-    "    def __radd__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse addition: scalar + tensor\"\"\"\n",
-    "        return Tensor(other + self._data)\n",
-    "    \n",
-    "    def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Subtraction: tensor - other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data - other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data - other)\n",
-    "    \n",
-    "    def __rsub__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse subtraction: scalar - tensor\"\"\"\n",
-    "        return Tensor(other - self._data)\n",
-    "    \n",
-    "    def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Multiplication: tensor * other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data * other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data * other)\n",
-    "    \n",
-    "    def __rmul__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse multiplication: scalar * tensor\"\"\"\n",
-    "        return Tensor(other * self._data)\n",
-    "    \n",
-    "    def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Division: tensor / other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data / other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data / other)\n",
-    "    \n",
-    "    def __rtruediv__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse division: scalar / tensor\"\"\"\n",
-    "        return Tensor(other / self._data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cf7db8ff",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class Tensor:\n",
-    "    \"\"\"\n",
-    "    TinyTorch Tensor: N-dimensional array with ML operations.\n",
-    "    \n",
-    "    The fundamental data structure for all TinyTorch operations.\n",
-    "    Wraps NumPy arrays with ML-specific functionality.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None):\n",
-    "        \"\"\"\n",
-    "        Create a new tensor from data.\n",
-    "        \n",
-    "        Args:\n",
-    "            data: Input data (scalar, list, or numpy array)\n",
-    "            dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.\n",
-    "        \"\"\"\n",
-    "        # Convert input to numpy array\n",
-    "        if isinstance(data, (int, float, np.number)):\n",
-    "            # Handle Python and NumPy scalars\n",
-    "            if dtype is None:\n",
-    "                # Auto-detect type: int for integers, float32 for floats\n",
-    "                if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)):\n",
-    "                    dtype = 'int32'\n",
-    "                else:\n",
-    "                    dtype = 'float32'\n",
-    "            self._data = np.array(data, dtype=dtype)\n",
-    "        elif isinstance(data, list):\n",
-    "            # Let NumPy auto-detect type, then convert if needed\n",
-    "            temp_array = np.array(data)\n",
-    "            if dtype is None:\n",
-    "                # Keep NumPy's auto-detected type, but prefer common ML types\n",
-    "                if np.issubdtype(temp_array.dtype, np.integer):\n",
-    "                    dtype = 'int32'\n",
-    "                elif np.issubdtype(temp_array.dtype, np.floating):\n",
-    "                    dtype = 'float32'\n",
-    "                else:\n",
-    "                    dtype = temp_array.dtype\n",
-    "            self._data = temp_array.astype(dtype)\n",
-    "        elif isinstance(data, np.ndarray):\n",
-    "            self._data = data.astype(dtype or data.dtype)\n",
-    "        else:\n",
-    "            raise TypeError(f\"Cannot create tensor from {type(data)}\")\n",
-    "    \n",
-    "    @property\n",
-    "    def data(self) -> np.ndarray:\n",
-    "        \"\"\"Access underlying numpy array.\"\"\"\n",
-    "        return self._data\n",
-    "    \n",
-    "    @property\n",
-    "    def shape(self) -> Tuple[int, ...]:\n",
-    "        \"\"\"Get tensor shape.\"\"\"\n",
-    "        return self._data.shape\n",
-    "    \n",
-    "    @property\n",
-    "    def size(self) -> int:\n",
-    "        \"\"\"Get total number of elements.\"\"\"\n",
-    "        return self._data.size\n",
-    "    \n",
-    "    @property\n",
-    "    def dtype(self) -> np.dtype:\n",
-    "        \"\"\"Get data type as numpy dtype.\"\"\"\n",
-    "        return self._data.dtype\n",
-    "    \n",
-    "    def __repr__(self) -> str:\n",
-    "        \"\"\"String representation.\"\"\"\n",
-    "        return f\"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})\"\n",
-    "    \n",
-    "    def add(self, other: 'Tensor') -> 'Tensor':\n",
-    "        \"\"\"Add another tensor to this tensor.\"\"\"\n",
-    "        return Tensor(self._data + other._data)\n",
-    "    \n",
-    "    def multiply(self, other: 'Tensor') -> 'Tensor':\n",
-    "        \"\"\"Multiply this tensor by another tensor.\"\"\"\n",
-    "        return Tensor(self._data * other._data)\n",
-    "    \n",
-    "    # Arithmetic operators for natural syntax (a + b, a * b, etc.)\n",
-    "    def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Addition: tensor + other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data + other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data + other)\n",
-    "    \n",
-    "    def __radd__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse addition: scalar + tensor\"\"\"\n",
-    "        return Tensor(other + self._data)\n",
-    "    \n",
-    "    def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Subtraction: tensor - other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data - other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data - other)\n",
-    "    \n",
-    "    def __rsub__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse subtraction: scalar - tensor\"\"\"\n",
-    "        return Tensor(other - self._data)\n",
-    "    \n",
-    "    def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Multiplication: tensor * other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data * other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data * other)\n",
-    "    \n",
-    "    def __rmul__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse multiplication: scalar * tensor\"\"\"\n",
-    "        return Tensor(other * self._data)\n",
-    "    \n",
-    "    def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n",
-    "        \"\"\"Division: tensor / other\"\"\"\n",
-    "        if isinstance(other, Tensor):\n",
-    "            return Tensor(self._data / other._data)\n",
-    "        else:  # scalar\n",
-    "            return Tensor(self._data / other)\n",
-    "    \n",
-    "    def __rtruediv__(self, other: Union[int, float]) -> 'Tensor':\n",
-    "        \"\"\"Reverse division: scalar / tensor\"\"\"\n",
-    "        return Tensor(other / self._data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "53b9cc39",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Tensor Methods"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7793077f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test tensor methods\n",
-    "print(\"Testing tensor methods...\")\n",
-    "\n",
-    "try:\n",
-    "    # Test method-based operations\n",
-    "    a = Tensor([1, 2, 3])\n",
-    "    b = Tensor([4, 5, 6])\n",
-    "    \n",
-    "    c = a.add(b)\n",
-    "    print(f\"✅ Method addition: {a}.add({b}) = {c}\")\n",
-    "    \n",
-    "    d = a.multiply(b)\n",
-    "    print(f\"✅ Method multiplication: {a}.multiply({b}) = {d}\")\n",
-    "    \n",
-    "    # Test method chaining\n",
-    "    e = a.add(b).multiply(Tensor(2))\n",
-    "    print(f\"✅ Method chaining: {a}.add({b}).multiply(2) = {e}\")\n",
-    "    \n",
-    "    print(\"\\n🎉 All method tests passed!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement the add and multiply methods!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5dad35d0",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 🎯 Module Summary\n",
-    "\n",
-    "Congratulations! You've built the foundation of TinyTorch:\n",
-    "\n",
-    "### What You've Accomplished\n",
-    "✅ **Tensor Creation**: Handle scalars, lists, and numpy arrays  \n",
-    "✅ **Properties**: Access shape, size, and data type  \n",
-    "✅ **Arithmetic**: Element-wise addition and multiplication  \n",
-    "✅ **Methods**: Object-oriented API for operations  \n",
-    "✅ **Testing**: Immediate feedback on your implementation  \n",
-    "\n",
-    "### Key Concepts You've Learned\n",
-    "- **Tensors** are N-dimensional arrays with ML operations\n",
-    "- **NumPy integration** provides efficient computation\n",
-    "- **Element-wise operations** work on corresponding elements\n",
-    "- **Broadcasting** automatically handles different shapes\n",
-    "- **Object-oriented design** makes APIs intuitive\n",
-    "\n",
-    "### What's Next\n",
-    "In the next modules, you'll build on this foundation:\n",
-    "- **Layers**: Transform tensors with weights and biases\n",
-    "- **Activations**: Add nonlinearity to your networks\n",
-    "- **Networks**: Compose layers into complete models\n",
-    "- **Training**: Learn parameters with gradients and optimization\n",
-    "\n",
-    "### Real-World Connection\n",
-    "Your Tensor class is now ready to:\n",
-    "- Store neural network weights and biases\n",
-    "- Process batches of data efficiently\n",
-    "- Handle different data types (images, text, audio)\n",
-    "- Integrate with the rest of the TinyTorch ecosystem\n",
-    "\n",
-    "**Ready for the next challenge?** Let's move on to building layers that can transform your tensors!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cdd7d21e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Final verification\n",
-    "print(\"\\n\" + \"=\"*50)\n",
-    "print(\"🎉 TENSOR MODULE COMPLETE!\")\n",
-    "print(\"=\"*50)\n",
-    "print(\"✅ Tensor creation and properties\")\n",
-    "print(\"✅ Arithmetic operations\")\n",
-    "print(\"✅ Method-based API\")\n",
-    "print(\"✅ Comprehensive testing\")\n",
-    "print(\"\\n🚀 Ready to build layers in the next module!\") "
-   ]
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "main_language": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/assignments/source/02_activations/README.md b/assignments/source/02_activations/README.md
index b1d6fc0f..ed43c9db 100644
--- a/assignments/source/02_activations/README.md
+++ b/assignments/source/02_activations/README.md
@@ -51,7 +51,7 @@ This module teaches you the mathematical foundations that make deep learning pos
 1. **Open the development file**:
    ```bash
    python bin/tito.py jupyter
-   # Then open modules/activations/activations_dev.py
+   # Then open assignments/source/02_activations/activations_dev.py
    ```
 
 2. **Implement the functions**:
diff --git a/assignments/source/02_activations/activations_dev.ipynb b/assignments/source/02_activations/activations_dev.ipynb
deleted file mode 100644
index 1afb8092..00000000
--- a/assignments/source/02_activations/activations_dev.ipynb
+++ /dev/null
@@ -1,1143 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "5054d749",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "# Module 3: Activation Functions - The Spark of Intelligence\n",
-    "\n",
-    "**Learning Goals:**\n",
-    "- Understand why activation functions are essential for neural networks\n",
-    "- Implement four fundamental activation functions from scratch\n",
-    "- Learn the mathematical properties and use cases of each activation\n",
-    "- Visualize activation function behavior and understand their impact\n",
-    "\n",
-    "**Why This Matters:**\n",
-    "Without activation functions, neural networks would just be linear transformations - no matter how many layers you stack, you'd only get linear relationships. Activation functions introduce the nonlinearity that allows neural networks to learn complex patterns and approximate any function.\n",
-    "\n",
-    "**Real-World Context:**\n",
-    "Every neural network you've heard of - from image recognition to language models - relies on activation functions. Understanding them deeply is crucial for designing effective architectures and debugging training issues."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2e2c3999",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp core.activations"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3438973c",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import math\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "import os\n",
-    "import sys\n",
-    "from typing import Union, List\n",
-    "\n",
-    "# Import our Tensor class from the main package (rock solid foundation)\n",
-    "from tinytorch.core.tensor import Tensor"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9aaa0c53",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def _should_show_plots():\n",
-    "    \"\"\"Check if we should show plots (disable during testing)\"\"\"\n",
-    "    # Check multiple conditions that indicate we're in test mode\n",
-    "    is_pytest = (\n",
-    "        'pytest' in sys.modules or\n",
-    "        'test' in sys.argv or\n",
-    "        os.environ.get('PYTEST_CURRENT_TEST') is not None or\n",
-    "        any('test' in arg for arg in sys.argv) or\n",
-    "        any('pytest' in arg for arg in sys.argv)\n",
-    "    )\n",
-    "    \n",
-    "    # Show plots in development mode (when not in test mode)\n",
-    "    return not is_pytest"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b6b4ab89",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def visualize_activation_function(activation_fn, name: str, x_range: tuple = (-5, 5), num_points: int = 100):\n",
-    "    \"\"\"Visualize an activation function's behavior\"\"\"\n",
-    "    if not _should_show_plots():\n",
-    "        return\n",
-    "        \n",
-    "    try:\n",
-    "        \n",
-    "        # Generate input values\n",
-    "        x_vals = np.linspace(x_range[0], x_range[1], num_points)\n",
-    "        \n",
-    "        # Apply activation function\n",
-    "        y_vals = []\n",
-    "        for x in x_vals:\n",
-    "            input_tensor = Tensor([[x]])\n",
-    "            output = activation_fn(input_tensor)\n",
-    "            y_vals.append(output.data.item())\n",
-    "        \n",
-    "        # Create plot\n",
-    "        plt.figure(figsize=(10, 6))\n",
-    "        plt.plot(x_vals, y_vals, 'b-', linewidth=2, label=f'{name} Activation')\n",
-    "        plt.grid(True, alpha=0.3)\n",
-    "        plt.xlabel('Input (x)')\n",
-    "        plt.ylabel(f'{name}(x)')\n",
-    "        plt.title(f'{name} Activation Function')\n",
-    "        plt.legend()\n",
-    "        plt.show()\n",
-    "        \n",
-    "    except ImportError:\n",
-    "        print(\"   📊 Matplotlib not available - skipping visualization\")\n",
-    "    except Exception as e:\n",
-    "        print(f\"   ⚠️  Visualization error: {e}\")\n",
-    "\n",
-    "def visualize_activation_on_data(activation_fn, name: str, data: Tensor):\n",
-    "    \"\"\"Show activation function applied to sample data\"\"\"\n",
-    "    if not _should_show_plots():\n",
-    "        return\n",
-    "        \n",
-    "    try:\n",
-    "        output = activation_fn(data)\n",
-    "        print(f\"   📊 {name} Example:\")\n",
-    "        print(f\"      Input:  {data.data.flatten()}\")\n",
-    "        print(f\"      Output: {output.data.flatten()}\")\n",
-    "        print(f\"      Range:  [{output.data.min():.3f}, {output.data.max():.3f}]\")\n",
-    "        \n",
-    "    except Exception as e:\n",
-    "        print(f\"   ⚠️  Data visualization error: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7da2a826",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## Step 1: What is an Activation Function?\n",
-    "\n",
-    "### Definition\n",
-    "An **activation function** is a mathematical function that adds nonlinearity to neural networks. It transforms the output of a layer before passing it to the next layer.\n",
-    "\n",
-    "### Why Activation Functions Matter\n",
-    "**Without activation functions, neural networks are just linear transformations!**\n",
-    "\n",
-    "```\n",
-    "Linear → Linear → Linear = Still Linear\n",
-    "```\n",
-    "\n",
-    "No matter how many layers you stack, without activation functions, you can only learn linear relationships. Activation functions introduce the nonlinearity that allows neural networks to:\n",
-    "- Learn complex patterns\n",
-    "- Approximate any continuous function\n",
-    "- Solve non-linear problems\n",
-    "\n",
-    "### Visual Analogy\n",
-    "Think of activation functions as **decision makers** at each neuron:\n",
-    "- **ReLU**: \"If positive, pass it through; if negative, block it\"\n",
-    "- **Sigmoid**: \"Squash everything between 0 and 1\"\n",
-    "- **Tanh**: \"Squash everything between -1 and 1\"\n",
-    "- **Softmax**: \"Convert to probabilities that sum to 1\"\n",
-    "\n",
-    "### Connection to Previous Modules\n",
-    "In Module 2 (Layers), we learned how to transform data through linear operations (matrix multiplication + bias). Now we add the nonlinear activation functions that make neural networks powerful."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4517fc3d",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 2: ReLU - The Workhorse of Deep Learning\n",
-    "\n",
-    "### What is ReLU?\n",
-    "**ReLU (Rectified Linear Unit)** is the most popular activation function in deep learning.\n",
-    "\n",
-    "**Mathematical Definition:**\n",
-    "```\n",
-    "f(x) = max(0, x)\n",
-    "```\n",
-    "\n",
-    "**In Plain English:**\n",
-    "- If input is positive → pass it through unchanged\n",
-    "- If input is negative → output zero\n",
-    "\n",
-    "### Why ReLU is Popular\n",
-    "1. **Simple**: Easy to compute and understand\n",
-    "2. **Fast**: No expensive operations (no exponentials)\n",
-    "3. **Sparse**: Outputs many zeros, creating sparse representations\n",
-    "4. **Gradient-friendly**: Gradient is either 0 or 1 (no vanishing gradient for positive inputs)\n",
-    "\n",
-    "### Real-World Analogy\n",
-    "ReLU is like a **one-way valve** - it only lets positive \"pressure\" through, blocking negative values completely.\n",
-    "\n",
-    "### When to Use ReLU\n",
-    "- **Hidden layers** in most neural networks\n",
-    "- **Convolutional layers** in image processing\n",
-    "- **When you want sparse activations**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "05bf6005",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class ReLU:\n",
-    "    \"\"\"\n",
-    "    ReLU Activation Function: f(x) = max(0, x)\n",
-    "    \n",
-    "    The most popular activation function in deep learning.\n",
-    "    Simple, fast, and effective for most applications.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"\n",
-    "        Apply ReLU activation: f(x) = max(0, x)\n",
-    "        \n",
-    "        TODO: Implement ReLU activation\n",
-    "        \n",
-    "        APPROACH:\n",
-    "        1. For each element in the input tensor, apply max(0, element)\n",
-    "        2. Return a new Tensor with the results\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Input: Tensor([[-1, 0, 1, 2, -3]])\n",
-    "        Expected: Tensor([[0, 0, 1, 2, 0]])\n",
-    "        \n",
-    "        HINTS:\n",
-    "        - Use np.maximum(0, x.data) for element-wise max\n",
-    "        - Remember to return a new Tensor object\n",
-    "        - The shape should remain the same as input\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"Allow calling the activation like a function: relu(x)\"\"\"\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "88e1afe8",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class ReLU:\n",
-    "    \"\"\"ReLU Activation: f(x) = max(0, x)\"\"\"\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        result = np.maximum(0, x.data)\n",
-    "        return Tensor(result)\n",
-    "        \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b323b04b",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your ReLU Implementation\n",
-    "\n",
-    "Let's test your ReLU implementation right away to make sure it's working correctly:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "242b52fd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "try:\n",
-    "    # Create ReLU activation\n",
-    "    relu = ReLU()\n",
-    "    \n",
-    "    # Test 1: Basic functionality\n",
-    "    print(\"🔧 Testing ReLU Implementation\")\n",
-    "    print(\"=\" * 40)\n",
-    "    \n",
-    "    # Test with mixed positive/negative values\n",
-    "    test_input = Tensor([[-2, -1, 0, 1, 2]])\n",
-    "    expected = Tensor([[0, 0, 0, 1, 2]])\n",
-    "    \n",
-    "    result = relu(test_input)\n",
-    "    print(f\"Input:    {test_input.data.flatten()}\")\n",
-    "    print(f\"Output:   {result.data.flatten()}\")\n",
-    "    print(f\"Expected: {expected.data.flatten()}\")\n",
-    "    \n",
-    "    # Verify correctness\n",
-    "    if np.allclose(result.data, expected.data):\n",
-    "        print(\"✅ Basic ReLU test passed!\")\n",
-    "    else:\n",
-    "        print(\"❌ Basic ReLU test failed!\")\n",
-    "        print(\"   Check your max(0, x) implementation\")\n",
-    "    \n",
-    "    # Test 2: Edge cases\n",
-    "    edge_cases = Tensor([[-100, -0.1, 0, 0.1, 100]])\n",
-    "    edge_result = relu(edge_cases)\n",
-    "    expected_edge = np.array([[0, 0, 0, 0.1, 100]])\n",
-    "    \n",
-    "    print(f\"\\nEdge cases: {edge_cases.data.flatten()}\")\n",
-    "    print(f\"Output:     {edge_result.data.flatten()}\")\n",
-    "    \n",
-    "    if np.allclose(edge_result.data, expected_edge):\n",
-    "        print(\"✅ Edge case test passed!\")\n",
-    "    else:\n",
-    "        print(\"❌ Edge case test failed!\")\n",
-    "    \n",
-    "    # Test 3: Shape preservation\n",
-    "    multi_dim = Tensor([[1, -1], [2, -2], [0, 3]])\n",
-    "    multi_result = relu(multi_dim)\n",
-    "    \n",
-    "    if multi_result.data.shape == multi_dim.data.shape:\n",
-    "        print(\"✅ Shape preservation test passed!\")\n",
-    "    else:\n",
-    "        print(\"❌ Shape preservation test failed!\")\n",
-    "        print(f\"   Expected shape: {multi_dim.data.shape}, got: {multi_result.data.shape}\")\n",
-    "    \n",
-    "    print(\"✅ ReLU tests complete!\")\n",
-    "    \n",
-    "except NotImplementedError:\n",
-    "    print(\"⚠️  ReLU not implemented yet - complete the forward method above!\")\n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error in ReLU: {e}\")\n",
-    "    print(\"   Check your implementation in the forward method\")\n",
-    "\n",
-    "print()  # Add spacing"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "55ab92c4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# 🎨 ReLU Visualization (development only - not exported)\n",
-    "if _should_show_plots():\n",
-    "    try:\n",
-    "        relu = ReLU()\n",
-    "        print(\"🎨 Visualizing ReLU behavior...\")\n",
-    "        visualize_activation_function(relu, \"ReLU\", x_range=(-3, 3))\n",
-    "        \n",
-    "        # Show ReLU with real data\n",
-    "        sample_data = Tensor([[-2.5, -1.0, -0.5, 0.0, 0.5, 1.0, 2.5]])\n",
-    "        visualize_activation_on_data(relu, \"ReLU\", sample_data)\n",
-    "    except:\n",
-    "        pass  # Skip if ReLU not implemented"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "588a5bb0",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 3: Sigmoid - The Smooth Classifier\n",
-    "\n",
-    "### What is Sigmoid?\n",
-    "**Sigmoid** is a smooth, S-shaped activation function that squashes inputs to the range (0, 1).\n",
-    "\n",
-    "**Mathematical Definition:**\n",
-    "```\n",
-    "f(x) = 1 / (1 + e^(-x))\n",
-    "```\n",
-    "\n",
-    "**Key Properties:**\n",
-    "- **Range**: (0, 1) - never exactly 0 or 1\n",
-    "- **Smooth**: Differentiable everywhere\n",
-    "- **Monotonic**: Always increasing\n",
-    "- **Symmetric**: Around the point (0, 0.5)\n",
-    "\n",
-    "### Why Sigmoid is Useful\n",
-    "1. **Probability interpretation**: Output can be interpreted as probability\n",
-    "2. **Smooth gradients**: Nice for optimization\n",
-    "3. **Bounded output**: Prevents extreme values\n",
-    "\n",
-    "### Real-World Analogy\n",
-    "Sigmoid is like a **smooth dimmer switch** - it gradually transitions from \"off\" (near 0) to \"on\" (near 1), unlike ReLU's sharp cutoff.\n",
-    "\n",
-    "### When to Use Sigmoid\n",
-    "- **Binary classification** (output layer)\n",
-    "- **Gate mechanisms** (in LSTMs)\n",
-    "- **When you need probabilities**\n",
-    "\n",
-    "### Numerical Stability Note\n",
-    "For very large positive or negative inputs, sigmoid can cause numerical issues. We'll handle this with clipping."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d3fba9e9",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Sigmoid:\n",
-    "    \"\"\"\n",
-    "    Sigmoid Activation Function: f(x) = 1 / (1 + e^(-x))\n",
-    "    \n",
-    "    Squashes inputs to the range (0, 1), useful for binary classification\n",
-    "    and probability interpretation.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"\n",
-    "        Apply Sigmoid activation: f(x) = 1 / (1 + e^(-x))\n",
-    "        \n",
-    "        TODO: Implement Sigmoid activation\n",
-    "        \n",
-    "        APPROACH:\n",
-    "        1. For numerical stability, clip x to reasonable range (e.g., -500 to 500)\n",
-    "        2. Compute 1 / (1 + exp(-x)) for each element\n",
-    "        3. Return a new Tensor with the results\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Input: Tensor([[-2, -1, 0, 1, 2]])\n",
-    "        Expected: Tensor([[0.119, 0.269, 0.5, 0.731, 0.881]]) (approximately)\n",
-    "        \n",
-    "        HINTS:\n",
-    "        - Use np.clip(x.data, -500, 500) for numerical stability\n",
-    "        - Use np.exp(-clipped_x) for the exponential\n",
-    "        - Formula: 1 / (1 + np.exp(-clipped_x))\n",
-    "        - Remember to return a new Tensor object\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"Allow calling the activation like a function: sigmoid(x)\"\"\"\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7012fe16",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class Sigmoid:\n",
-    "    \"\"\"Sigmoid Activation: f(x) = 1 / (1 + e^(-x))\"\"\"\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        # Clip for numerical stability\n",
-    "        clipped = np.clip(x.data, -500, 500)\n",
-    "        result = 1 / (1 + np.exp(-clipped))\n",
-    "        return Tensor(result)\n",
-    "        \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "22123d76",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Sigmoid Implementation\n",
-    "\n",
-    "Let's test your Sigmoid implementation to ensure it's working correctly:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "29b81905",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "try:\n",
-    "    # Create Sigmoid activation\n",
-    "    sigmoid = Sigmoid()\n",
-    "    \n",
-    "    print(\"🔧 Testing Sigmoid Implementation\")\n",
-    "    print(\"=\" * 40)\n",
-    "    \n",
-    "    # Test 1: Basic functionality\n",
-    "    test_input = Tensor([[-2, -1, 0, 1, 2]])\n",
-    "    result = sigmoid(test_input)\n",
-    "    \n",
-    "    print(f\"Input:  {test_input.data.flatten()}\")\n",
-    "    print(f\"Output: {result.data.flatten()}\")\n",
-    "    \n",
-    "    # Check properties\n",
-    "    # 1. All outputs should be between 0 and 1\n",
-    "    if np.all(result.data >= 0) and np.all(result.data <= 1):\n",
-    "        print(\"✅ Range test passed: all outputs in (0, 1)\")\n",
-    "    else:\n",
-    "        print(\"❌ Range test failed: outputs should be in (0, 1)\")\n",
-    "    \n",
-    "    # 2. Sigmoid(0) should be 0.5\n",
-    "    zero_input = Tensor([[0]])\n",
-    "    zero_result = sigmoid(zero_input)\n",
-    "    if abs(zero_result.data.item() - 0.5) < 1e-6:\n",
-    "        print(\"✅ Sigmoid(0) = 0.5 test passed!\")\n",
-    "    else:\n",
-    "        print(f\"❌ Sigmoid(0) should be 0.5, got {zero_result.data.item()}\")\n",
-    "    \n",
-    "    # 3. Test symmetry: sigmoid(-x) = 1 - sigmoid(x)\n",
-    "    x_val = 2.0\n",
-    "    pos_result = sigmoid(Tensor([[x_val]])).data.item()\n",
-    "    neg_result = sigmoid(Tensor([[-x_val]])).data.item()\n",
-    "    \n",
-    "    if abs(pos_result + neg_result - 1.0) < 1e-6:\n",
-    "        print(\"✅ Symmetry test passed!\")\n",
-    "    else:\n",
-    "        print(f\"❌ Symmetry test failed: sigmoid({x_val}) + sigmoid({-x_val}) should equal 1\")\n",
-    "    \n",
-    "    # 4. Test numerical stability with extreme values\n",
-    "    extreme_input = Tensor([[-1000, 1000]])\n",
-    "    extreme_result = sigmoid(extreme_input)\n",
-    "    \n",
-    "    # Should not produce NaN or inf\n",
-    "    if not np.any(np.isnan(extreme_result.data)) and not np.any(np.isinf(extreme_result.data)):\n",
-    "        print(\"✅ Numerical stability test passed!\")\n",
-    "    else:\n",
-    "        print(\"❌ Numerical stability test failed: extreme values produced NaN/inf\")\n",
-    "    \n",
-    "    print(\"✅ Sigmoid tests complete!\")\n",
-    "    \n",
-    "    # 🎨 Visualize Sigmoid behavior (development only)\n",
-    "    if _should_show_plots():\n",
-    "        print(\"\\n🎨 Visualizing Sigmoid behavior...\")\n",
-    "        visualize_activation_function(sigmoid, \"Sigmoid\", x_range=(-5, 5))\n",
-    "        \n",
-    "        # Show Sigmoid with real data\n",
-    "        sample_data = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]])\n",
-    "        visualize_activation_on_data(sigmoid, \"Sigmoid\", sample_data)\n",
-    "    \n",
-    "except NotImplementedError:\n",
-    "    print(\"⚠️  Sigmoid not implemented yet - complete the forward method above!\")\n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error in Sigmoid: {e}\")\n",
-    "    print(\"   Check your implementation in the forward method\")\n",
-    "\n",
-    "print()  # Add spacing"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2d0e0ed5",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 4: Tanh - The Centered Alternative\n",
-    "\n",
-    "### What is Tanh?\n",
-    "**Tanh (Hyperbolic Tangent)** is similar to Sigmoid but centered around zero, with range (-1, 1).\n",
-    "\n",
-    "**Mathematical Definition:**\n",
-    "```\n",
-    "f(x) = (e^x - e^(-x)) / (e^x + e^(-x))\n",
-    "```\n",
-    "\n",
-    "**Alternative form:**\n",
-    "```\n",
-    "f(x) = 2 * sigmoid(2x) - 1\n",
-    "```\n",
-    "\n",
-    "**Key Properties:**\n",
-    "- **Range**: (-1, 1) - symmetric around zero\n",
-    "- **Zero-centered**: Output has mean closer to zero\n",
-    "- **Smooth**: Differentiable everywhere\n",
-    "- **Stronger gradients**: Steeper than sigmoid\n",
-    "\n",
-    "### Why Tanh is Better Than Sigmoid\n",
-    "1. **Zero-centered**: Helps with gradient flow in deep networks\n",
-    "2. **Stronger gradients**: Faster convergence in some cases\n",
-    "3. **Symmetric**: Better for certain applications\n",
-    "\n",
-    "### Real-World Analogy\n",
-    "Tanh is like a **balanced scale** - it can tip strongly in either direction (-1 to +1) but defaults to neutral (0).\n",
-    "\n",
-    "### When to Use Tanh\n",
-    "- **Hidden layers** (alternative to ReLU)\n",
-    "- **Recurrent networks** (RNNs, LSTMs)\n",
-    "- **When you need zero-centered outputs**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5455e049",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Tanh:\n",
-    "    \"\"\"\n",
-    "    Tanh Activation Function: f(x) = (e^x - e^(-x)) / (e^x + e^(-x))\n",
-    "    \n",
-    "    Zero-centered activation function with range (-1, 1).\n",
-    "    Often preferred over Sigmoid for hidden layers.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"\n",
-    "        Apply Tanh activation: f(x) = (e^x - e^(-x)) / (e^x + e^(-x))\n",
-    "        \n",
-    "        TODO: Implement Tanh activation\n",
-    "        \n",
-    "        APPROACH:\n",
-    "        1. Use numpy's built-in tanh function: np.tanh(x.data)\n",
-    "        2. Return a new Tensor with the results\n",
-    "        \n",
-    "        ALTERNATIVE APPROACH:\n",
-    "        1. Compute e^x and e^(-x)\n",
-    "        2. Use formula: (e^x - e^(-x)) / (e^x + e^(-x))\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Input: Tensor([[-2, -1, 0, 1, 2]])\n",
-    "        Expected: Tensor([[-0.964, -0.762, 0.0, 0.762, 0.964]]) (approximately)\n",
-    "        \n",
-    "        HINTS:\n",
-    "        - np.tanh() is the simplest approach\n",
-    "        - Output range is (-1, 1)\n",
-    "        - tanh(0) = 0 (zero-centered)\n",
-    "        - Remember to return a new Tensor object\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"Allow calling the activation like a function: tanh(x)\"\"\"\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f2ec4c0e",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class Tanh:\n",
-    "    \"\"\"Tanh Activation: f(x) = (e^x - e^(-x)) / (e^x + e^(-x))\"\"\"\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        result = np.tanh(x.data)\n",
-    "        return Tensor(result)\n",
-    "        \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "47020ae2",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Tanh Implementation\n",
-    "\n",
-    "Let's test your Tanh implementation to ensure it's working correctly:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "20af2dbb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "try:\n",
-    "    # Create Tanh activation\n",
-    "    tanh = Tanh()\n",
-    "    \n",
-    "    print(\"🔧 Testing Tanh Implementation\")\n",
-    "    print(\"=\" * 40)\n",
-    "    \n",
-    "    # Test 1: Basic functionality\n",
-    "    test_input = Tensor([[-2, -1, 0, 1, 2]])\n",
-    "    result = tanh(test_input)\n",
-    "    \n",
-    "    print(f\"Input:  {test_input.data.flatten()}\")\n",
-    "    print(f\"Output: {result.data.flatten()}\")\n",
-    "    \n",
-    "    # Check properties\n",
-    "    # 1. All outputs should be between -1 and 1\n",
-    "    if np.all(result.data >= -1) and np.all(result.data <= 1):\n",
-    "        print(\"✅ Range test passed: all outputs in (-1, 1)\")\n",
-    "    else:\n",
-    "        print(\"❌ Range test failed: outputs should be in (-1, 1)\")\n",
-    "    \n",
-    "    # 2. Tanh(0) should be 0\n",
-    "    zero_input = Tensor([[0]])\n",
-    "    zero_result = tanh(zero_input)\n",
-    "    if abs(zero_result.data.item()) < 1e-6:\n",
-    "        print(\"✅ Tanh(0) = 0 test passed!\")\n",
-    "    else:\n",
-    "        print(f\"❌ Tanh(0) should be 0, got {zero_result.data.item()}\")\n",
-    "    \n",
-    "    # 3. Test antisymmetry: tanh(-x) = -tanh(x)\n",
-    "    x_val = 1.5\n",
-    "    pos_result = tanh(Tensor([[x_val]])).data.item()\n",
-    "    neg_result = tanh(Tensor([[-x_val]])).data.item()\n",
-    "    \n",
-    "    if abs(pos_result + neg_result) < 1e-6:\n",
-    "        print(\"✅ Antisymmetry test passed!\")\n",
-    "    else:\n",
-    "        print(f\"❌ Antisymmetry test failed: tanh({x_val}) + tanh({-x_val}) should equal 0\")\n",
-    "    \n",
-    "    # 4. Test that tanh is stronger than sigmoid\n",
-    "    # For the same input, |tanh(x)| should be > |sigmoid(x) - 0.5|\n",
-    "    test_val = 1.0\n",
-    "    tanh_result = abs(tanh(Tensor([[test_val]])).data.item())\n",
-    "    sigmoid_result = abs(sigmoid(Tensor([[test_val]])).data.item() - 0.5)\n",
-    "    \n",
-    "    if tanh_result > sigmoid_result:\n",
-    "        print(\"✅ Stronger gradient test passed!\")\n",
-    "    else:\n",
-    "        print(\"❌ Tanh should have stronger gradients than sigmoid\")\n",
-    "    \n",
-    "    print(\"✅ Tanh tests complete!\")\n",
-    "    \n",
-    "    # 🎨 Visualize Tanh behavior (development only)\n",
-    "    if _should_show_plots():\n",
-    "        print(\"\\n🎨 Visualizing Tanh behavior...\")\n",
-    "        visualize_activation_function(tanh, \"Tanh\", x_range=(-3, 3))\n",
-    "        \n",
-    "        # Show Tanh with real data\n",
-    "        sample_data = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]])\n",
-    "        visualize_activation_on_data(tanh, \"Tanh\", sample_data)\n",
-    "    \n",
-    "except NotImplementedError:\n",
-    "    print(\"⚠️  Tanh not implemented yet - complete the forward method above!\")\n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error in Tanh: {e}\")\n",
-    "    print(\"   Check your implementation in the forward method\")\n",
-    "\n",
-    "print()  # Add spacing"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5ac8d4b2",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 5: Softmax - The Probability Maker\n",
-    "\n",
-    "### What is Softmax?\n",
-    "**Softmax** converts a vector of real numbers into a probability distribution. It's essential for multi-class classification.\n",
-    "\n",
-    "**Mathematical Definition:**\n",
-    "```\n",
-    "f(x_i) = e^(x_i) / Σ(e^(x_j)) for all j\n",
-    "```\n",
-    "\n",
-    "**Key Properties:**\n",
-    "- **Probability distribution**: All outputs sum to 1\n",
-    "- **Non-negative**: All outputs ≥ 0\n",
-    "- **Differentiable**: Smooth for optimization\n",
-    "- **Relative**: Emphasizes the largest input\n",
-    "\n",
-    "### Why Softmax is Special\n",
-    "1. **Probability interpretation**: Perfect for classification\n",
-    "2. **Competitive**: Emphasizes the winner (largest input)\n",
-    "3. **Differentiable**: Works well with gradient descent\n",
-    "\n",
-    "### Real-World Analogy\n",
-    "Softmax is like **voting with enthusiasm** - not only does the most popular choice win, but the \"votes\" are weighted by how much more popular it is.\n",
-    "\n",
-    "### When to Use Softmax\n",
-    "- **Multi-class classification** (output layer)\n",
-    "- **Attention mechanisms** (in Transformers)\n",
-    "- **When you need probability distributions**\n",
-    "\n",
-    "### Numerical Stability Note\n",
-    "For numerical stability, we subtract the maximum value before computing exponentials."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "651a79de",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Softmax:\n",
-    "    \"\"\"\n",
-    "    Softmax Activation Function: f(x_i) = e^(x_i) / Σ(e^(x_j))\n",
-    "    \n",
-    "    Converts a vector of real numbers into a probability distribution.\n",
-    "    Essential for multi-class classification.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"\n",
-    "        Apply Softmax activation: f(x_i) = e^(x_i) / Σ(e^(x_j))\n",
-    "        \n",
-    "        TODO: Implement Softmax activation\n",
-    "        \n",
-    "        APPROACH:\n",
-    "        1. For numerical stability, subtract the maximum value from each row\n",
-    "        2. Compute exponentials of the shifted values\n",
-    "        3. Divide each exponential by the sum of exponentials in its row\n",
-    "        4. Return a new Tensor with the results\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Input: Tensor([[1, 2, 3]])\n",
-    "        Expected: Tensor([[0.090, 0.245, 0.665]]) (approximately)\n",
-    "        Sum should be 1.0\n",
-    "        \n",
-    "        HINTS:\n",
-    "        - Use np.max(x.data, axis=1, keepdims=True) to find row maximums\n",
-    "        - Subtract max from x.data for numerical stability\n",
-    "        - Use np.exp() for exponentials\n",
-    "        - Use np.sum(exp_vals, axis=1, keepdims=True) for row sums\n",
-    "        - Remember to return a new Tensor object\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"Allow calling the activation like a function: softmax(x)\"\"\"\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "467911c0",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class Softmax:\n",
-    "    \"\"\"Softmax Activation: f(x_i) = e^(x_i) / Σ(e^(x_j))\"\"\"\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        # Subtract max for numerical stability\n",
-    "        shifted = x.data - np.max(x.data, axis=1, keepdims=True)\n",
-    "        exp_vals = np.exp(shifted)\n",
-    "        result = exp_vals / np.sum(exp_vals, axis=1, keepdims=True)\n",
-    "        return Tensor(result)\n",
-    "        \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "943a9119",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Softmax Implementation\n",
-    "\n",
-    "Let's test your Softmax implementation to ensure it's working correctly:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "01c98430",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "try:\n",
-    "    # Create Softmax activation\n",
-    "    softmax = Softmax()\n",
-    "    \n",
-    "    print(\"🔧 Testing Softmax Implementation\")\n",
-    "    print(\"=\" * 40)\n",
-    "    \n",
-    "    # Test 1: Basic functionality\n",
-    "    test_input = Tensor([[1, 2, 3]])\n",
-    "    result = softmax(test_input)\n",
-    "    \n",
-    "    print(f\"Input:  {test_input.data.flatten()}\")\n",
-    "    print(f\"Output: {result.data.flatten()}\")\n",
-    "    \n",
-    "    # Check properties\n",
-    "    # 1. All outputs should be non-negative\n",
-    "    if np.all(result.data >= 0):\n",
-    "        print(\"✅ Non-negative test passed!\")\n",
-    "    else:\n",
-    "        print(\"❌ Non-negative test failed: all outputs should be ≥ 0\")\n",
-    "    \n",
-    "    # 2. Sum should equal 1 (probability distribution)\n",
-    "    row_sums = np.sum(result.data, axis=1)\n",
-    "    if np.allclose(row_sums, 1.0):\n",
-    "        print(\"✅ Probability distribution test passed!\")\n",
-    "    else:\n",
-    "        print(f\"❌ Sum test failed: sum should be 1.0, got {row_sums}\")\n",
-    "    \n",
-    "    # 3. Test with multiple rows\n",
-    "    multi_input = Tensor([[1, 2, 3], [0, 0, 0], [10, 20, 30]])\n",
-    "    multi_result = softmax(multi_input)\n",
-    "    multi_sums = np.sum(multi_result.data, axis=1)\n",
-    "    \n",
-    "    if np.allclose(multi_sums, 1.0):\n",
-    "        print(\"✅ Multi-row test passed!\")\n",
-    "    else:\n",
-    "        print(f\"❌ Multi-row test failed: all row sums should be 1.0, got {multi_sums}\")\n",
-    "    \n",
-    "    # 4. Test numerical stability\n",
-    "    large_input = Tensor([[1000, 1001, 1002]])\n",
-    "    large_result = softmax(large_input)\n",
-    "    \n",
-    "    # Should not produce NaN or inf\n",
-    "    if not np.any(np.isnan(large_result.data)) and not np.any(np.isinf(large_result.data)):\n",
-    "        print(\"✅ Numerical stability test passed!\")\n",
-    "    else:\n",
-    "        print(\"❌ Numerical stability test failed: large values produced NaN/inf\")\n",
-    "    \n",
-    "    # 5. Test that largest input gets highest probability\n",
-    "    test_logits = Tensor([[1, 5, 2]])\n",
-    "    test_probs = softmax(test_logits)\n",
-    "    max_idx = np.argmax(test_probs.data)\n",
-    "    \n",
-    "    if max_idx == 1:  # Second element (index 1) should be largest\n",
-    "        print(\"✅ Max probability test passed!\")\n",
-    "    else:\n",
-    "        print(\"❌ Max probability test failed: largest input should get highest probability\")\n",
-    "    \n",
-    "    print(\"✅ Softmax tests complete!\")\n",
-    "    \n",
-    "    # 🎨 Visualize Softmax behavior (development only)\n",
-    "    if _should_show_plots():\n",
-    "        print(\"\\n🎨 Visualizing Softmax behavior...\")\n",
-    "        # Note: Softmax is different - it's a vector function, so we show it differently\n",
-    "        sample_logits = Tensor([[1.0, 2.0, 3.0]])  # Simple 3-class example\n",
-    "        softmax_output = softmax(sample_logits)\n",
-    "        \n",
-    "        print(f\"   Example: logits {sample_logits.data.flatten()} → probabilities {softmax_output.data.flatten()}\")\n",
-    "        print(f\"   Sum of probabilities: {softmax_output.data.sum():.6f} (should be 1.0)\")\n",
-    "        \n",
-    "        # Show how different input scales affect output\n",
-    "        scale_examples = [\n",
-    "            Tensor([[1.0, 2.0, 3.0]]),    # Original\n",
-    "            Tensor([[2.0, 4.0, 6.0]]),    # Scaled up\n",
-    "            Tensor([[0.1, 0.2, 0.3]]),    # Scaled down\n",
-    "        ]\n",
-    "        \n",
-    "        print(\"\\n   📊 Scale sensitivity:\")\n",
-    "        for i, example in enumerate(scale_examples):\n",
-    "            output = softmax(example)\n",
-    "            print(f\"   Scale {i+1}: {example.data.flatten()} → {output.data.flatten()}\")\n",
-    "    \n",
-    "except NotImplementedError:\n",
-    "    print(\"⚠️  Softmax not implemented yet - complete the forward method above!\")\n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error in Softmax: {e}\")\n",
-    "    print(\"   Check your implementation in the forward method\")\n",
-    "\n",
-    "print()  # Add spacing"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8f6d7da2",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 🎨 Comprehensive Activation Function Comparison\n",
-    "\n",
-    "Now that we've implemented all four activation functions, let's compare them side by side to understand their differences and use cases."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4079ab0f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Comprehensive comparison of all activation functions\n",
-    "print(\"🎨 Comprehensive Activation Function Comparison\")\n",
-    "print(\"=\" * 60)\n",
-    "\n",
-    "try:\n",
-    "    # Create all activation functions\n",
-    "    activations = {\n",
-    "        'ReLU': ReLU(),\n",
-    "        'Sigmoid': Sigmoid(),\n",
-    "        'Tanh': Tanh(),\n",
-    "        'Softmax': Softmax()\n",
-    "    }\n",
-    "    \n",
-    "    # Test with sample data\n",
-    "    test_data = Tensor([[-2, -1, 0, 1, 2]])\n",
-    "    \n",
-    "    print(\"📊 Activation Function Outputs:\")\n",
-    "    print(f\"Input: {test_data.data.flatten()}\")\n",
-    "    print(\"-\" * 40)\n",
-    "    \n",
-    "    for name, activation in activations.items():\n",
-    "        try:\n",
-    "            result = activation(test_data)\n",
-    "            print(f\"{name:8}: {result.data.flatten()}\")\n",
-    "        except Exception as e:\n",
-    "            print(f\"{name:8}: Error - {e}\")\n",
-    "    \n",
-    "    print(\"\\n📈 Key Properties Summary:\")\n",
-    "    print(\"-\" * 40)\n",
-    "    print(\"ReLU     : Range [0, ∞), sparse, fast\")\n",
-    "    print(\"Sigmoid  : Range (0, 1), smooth, probability-like\")\n",
-    "    print(\"Tanh     : Range (-1, 1), zero-centered, symmetric\")\n",
-    "    print(\"Softmax  : Probability distribution, sums to 1\")\n",
-    "    \n",
-    "    print(\"\\n🎯 When to Use Each:\")\n",
-    "    print(\"-\" * 40)\n",
-    "    print(\"ReLU     : Hidden layers, CNNs, most deep networks\")\n",
-    "    print(\"Sigmoid  : Binary classification, gates, probabilities\")\n",
-    "    print(\"Tanh     : RNNs, when you need zero-centered output\")\n",
-    "    print(\"Softmax  : Multi-class classification, attention\")\n",
-    "    \n",
-    "    # Show comprehensive visualization if available\n",
-    "    if _should_show_plots():\n",
-    "        print(\"\\n🎨 Generating comprehensive comparison plot...\")\n",
-    "        try:\n",
-    "            import matplotlib.pyplot as plt\n",
-    "            \n",
-    "            fig, axes = plt.subplots(2, 2, figsize=(12, 10))\n",
-    "            fig.suptitle('Activation Function Comparison', fontsize=16)\n",
-    "            \n",
-    "            x_vals = np.linspace(-5, 5, 100)\n",
-    "            \n",
-    "            # Plot each activation function\n",
-    "            for i, (name, activation) in enumerate(list(activations.items())[:3]):  # Skip Softmax for now\n",
-    "                row, col = i // 2, i % 2\n",
-    "                ax = axes[row, col]\n",
-    "                \n",
-    "                y_vals = []\n",
-    "                for x in x_vals:\n",
-    "                    try:\n",
-    "                        input_tensor = Tensor([[x]])\n",
-    "                        output = activation(input_tensor)\n",
-    "                        y_vals.append(output.data.item())\n",
-    "                    except:\n",
-    "                        y_vals.append(0)\n",
-    "                \n",
-    "                ax.plot(x_vals, y_vals, 'b-', linewidth=2)\n",
-    "                ax.set_title(f'{name} Activation')\n",
-    "                ax.grid(True, alpha=0.3)\n",
-    "                ax.set_xlabel('Input (x)')\n",
-    "                ax.set_ylabel(f'{name}(x)')\n",
-    "            \n",
-    "            # Special handling for Softmax\n",
-    "            ax = axes[1, 1]\n",
-    "            sample_inputs = np.array([[1, 2, 3], [0, 0, 0], [-1, 0, 1]])\n",
-    "            softmax_results = []\n",
-    "            \n",
-    "            for inp in sample_inputs:\n",
-    "                result = softmax(Tensor([inp]))\n",
-    "                softmax_results.append(result.data.flatten())\n",
-    "            \n",
-    "            x_pos = np.arange(len(sample_inputs))\n",
-    "            width = 0.25\n",
-    "            \n",
-    "            for i in range(3):  # 3 classes\n",
-    "                values = [result[i] for result in softmax_results]\n",
-    "                ax.bar(x_pos + i * width, values, width, label=f'Class {i+1}')\n",
-    "            \n",
-    "            ax.set_title('Softmax Activation')\n",
-    "            ax.set_xlabel('Input Examples')\n",
-    "            ax.set_ylabel('Probability')\n",
-    "            ax.set_xticks(x_pos + width)\n",
-    "            ax.set_xticklabels(['[1,2,3]', '[0,0,0]', '[-1,0,1]'])\n",
-    "            ax.legend()\n",
-    "            \n",
-    "            plt.tight_layout()\n",
-    "            plt.show()\n",
-    "            \n",
-    "        except ImportError:\n",
-    "            print(\"   📊 Matplotlib not available - skipping comprehensive plot\")\n",
-    "        except Exception as e:\n",
-    "            print(f\"   ⚠️  Comprehensive plot error: {e}\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error in comprehensive comparison: {e}\")\n",
-    "\n",
-    "print(\"\\n\" + \"=\" * 60)\n",
-    "print(\"🎉 Congratulations! You've implemented all four activation functions!\")\n",
-    "print(\"You now understand the building blocks that make neural networks intelligent.\")\n",
-    "print(\"=\" * 60) "
-   ]
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "main_language": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/assignments/source/03_layers/layers_dev.ipynb b/assignments/source/03_layers/layers_dev.ipynb
deleted file mode 100644
index 88e3f07e..00000000
--- a/assignments/source/03_layers/layers_dev.ipynb
+++ /dev/null
@@ -1,797 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "2668bc45",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "# Module 2: Layers - Neural Network Building Blocks\n",
-    "\n",
-    "Welcome to the Layers module! This is where neural networks begin. You'll implement the fundamental building blocks that transform tensors.\n",
-    "\n",
-    "## Learning Goals\n",
-    "- Understand layers as functions that transform tensors: `y = f(x)`\n",
-    "- Implement Dense layers with linear transformations: `y = Wx + b`\n",
-    "- Use activation functions from the activations module for nonlinearity\n",
-    "- See how neural networks are just function composition\n",
-    "- Build intuition before diving into training\n",
-    "\n",
-    "## Build → Use → Understand\n",
-    "1. **Build**: Dense layers using activation functions as building blocks\n",
-    "2. **Use**: Transform tensors and see immediate results\n",
-    "3. **Understand**: How neural networks transform information\n",
-    "\n",
-    "## Module Dependencies\n",
-    "This module builds on the **activations** module:\n",
-    "- **activations** → **layers** → **networks**\n",
-    "- Clean separation of concerns: math functions → layer building blocks → full networks"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "530716e8",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 📦 Where This Code Lives in the Final Package\n",
-    "\n",
-    "**Learning Side:** You work in `assignments/source/03_layers/layers_dev.py`  \n",
-    "**Building Side:** Code exports to `tinytorch.core.layers`\n",
-    "\n",
-    "```python\n",
-    "# Final package structure:\n",
-    "from tinytorch.core.layers import Dense, Conv2D  # All layers together!\n",
-    "from tinytorch.core.activations import ReLU, Sigmoid, Tanh\n",
-    "from tinytorch.core.tensor import Tensor\n",
-    "```\n",
-    "\n",
-    "**Why this matters:**\n",
-    "- **Learning:** Focused modules for deep understanding\n",
-    "- **Production:** Proper organization like PyTorch's `torch.nn`\n",
-    "- **Consistency:** All layers (Dense, Conv2D) live together in `core.layers`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4f63809e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp core.layers\n",
-    "\n",
-    "# Setup and imports\n",
-    "import numpy as np\n",
-    "import sys\n",
-    "from typing import Union, Optional, Callable\n",
-    "import math"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "00a72b7c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import numpy as np\n",
-    "import math\n",
-    "import sys\n",
-    "from typing import Union, Optional, Callable\n",
-    "\n",
-    "# Import from the main package (rock solid foundation)\n",
-    "from tinytorch.core.tensor import Tensor\n",
-    "from tinytorch.core.activations import ReLU, Sigmoid, Tanh\n",
-    "\n",
-    "# print(\"🔥 TinyTorch Layers Module\")\n",
-    "# print(f\"NumPy version: {np.__version__}\")\n",
-    "# print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n",
-    "# print(\"Ready to build neural network layers!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a0ad08ea",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## Step 1: What is a Layer?\n",
-    "\n",
-    "### Definition\n",
-    "A **layer** is a function that transforms tensors. Think of it as a mathematical operation that takes input data and produces output data:\n",
-    "\n",
-    "```\n",
-    "Input Tensor → Layer → Output Tensor\n",
-    "```\n",
-    "\n",
-    "### Why Layers Matter in Neural Networks\n",
-    "Layers are the fundamental building blocks of all neural networks because:\n",
-    "- **Modularity**: Each layer has a specific job (linear transformation, nonlinearity, etc.)\n",
-    "- **Composability**: Layers can be combined to create complex functions\n",
-    "- **Learnability**: Each layer has parameters that can be learned from data\n",
-    "- **Interpretability**: Different layers learn different features\n",
-    "\n",
-    "### The Fundamental Insight\n",
-    "**Neural networks are just function composition!**\n",
-    "```\n",
-    "x → Layer1 → Layer2 → Layer3 → y\n",
-    "```\n",
-    "\n",
-    "Each layer transforms the data, and the final output is the composition of all these transformations.\n",
-    "\n",
-    "### Real-World Examples\n",
-    "- **Dense Layer**: Learns linear relationships between features\n",
-    "- **Convolutional Layer**: Learns spatial patterns in images\n",
-    "- **Recurrent Layer**: Learns temporal patterns in sequences\n",
-    "- **Activation Layer**: Adds nonlinearity to make networks powerful\n",
-    "\n",
-    "### Visual Intuition\n",
-    "```\n",
-    "Input: [1, 2, 3] (3 features)\n",
-    "Dense Layer: y = Wx + b\n",
-    "Weights W: [[0.1, 0.2, 0.3],\n",
-    "            [0.4, 0.5, 0.6]] (2×3 matrix)\n",
-    "Bias b: [0.1, 0.2] (2 values)\n",
-    "Output: [0.1*1 + 0.2*2 + 0.3*3 + 0.1,\n",
-    "         0.4*1 + 0.5*2 + 0.6*3 + 0.2] = [1.4, 3.2]\n",
-    "```\n",
-    "\n",
-    "Let's start with the most important layer: **Dense** (also called Linear or Fully Connected)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5d63d076",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 2: Understanding Matrix Multiplication\n",
-    "\n",
-    "Before we build layers, let's understand the core operation: **matrix multiplication**. This is what powers all neural network computations.\n",
-    "\n",
-    "### Why Matrix Multiplication Matters\n",
-    "- **Efficiency**: Process multiple inputs at once\n",
-    "- **Parallelization**: GPU acceleration works great with matrix operations\n",
-    "- **Batch processing**: Handle multiple samples simultaneously\n",
-    "- **Mathematical foundation**: Linear algebra is the language of neural networks\n",
-    "\n",
-    "### The Math Behind It\n",
-    "For matrices A (m×n) and B (n×p), the result C (m×p) is:\n",
-    "```\n",
-    "C[i,j] = sum(A[i,k] * B[k,j] for k in range(n))\n",
-    "```\n",
-    "\n",
-    "### Visual Example\n",
-    "```\n",
-    "A = [[1, 2],     B = [[5, 6],\n",
-    "     [3, 4]]          [7, 8]]\n",
-    "\n",
-    "C = A @ B = [[1*5 + 2*7,  1*6 + 2*8],\n",
-    "              [3*5 + 4*7,  3*6 + 4*8]]\n",
-    "  = [[19, 22],\n",
-    "     [43, 50]]\n",
-    "```\n",
-    "\n",
-    "Let's implement this step by step!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "82cc8565",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def matmul_naive(A: np.ndarray, B: np.ndarray) -> np.ndarray:\n",
-    "    \"\"\"\n",
-    "    Naive matrix multiplication using explicit for-loops.\n",
-    "    \n",
-    "    This helps you understand what matrix multiplication really does!\n",
-    "    \n",
-    "    Args:\n",
-    "        A: Matrix of shape (m, n)\n",
-    "        B: Matrix of shape (n, p)\n",
-    "        \n",
-    "    Returns:\n",
-    "        Matrix of shape (m, p) where C[i,j] = sum(A[i,k] * B[k,j] for k in range(n))\n",
-    "        \n",
-    "    TODO: Implement matrix multiplication using three nested for-loops.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Get the dimensions: m, n from A and n2, p from B\n",
-    "    2. Check that n == n2 (matrices must be compatible)\n",
-    "    3. Create output matrix C of shape (m, p) filled with zeros\n",
-    "    4. Use three nested loops:\n",
-    "       - i loop: rows of A (0 to m-1)\n",
-    "       - j loop: columns of B (0 to p-1) \n",
-    "       - k loop: shared dimension (0 to n-1)\n",
-    "    5. For each (i,j), compute: C[i,j] += A[i,k] * B[k,j]\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    A = [[1, 2],     B = [[5, 6],\n",
-    "         [3, 4]]          [7, 8]]\n",
-    "    \n",
-    "    C[0,0] = A[0,0]*B[0,0] + A[0,1]*B[1,0] = 1*5 + 2*7 = 19\n",
-    "    C[0,1] = A[0,0]*B[0,1] + A[0,1]*B[1,1] = 1*6 + 2*8 = 22\n",
-    "    C[1,0] = A[1,0]*B[0,0] + A[1,1]*B[1,0] = 3*5 + 4*7 = 43\n",
-    "    C[1,1] = A[1,0]*B[0,1] + A[1,1]*B[1,1] = 3*6 + 4*8 = 50\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Start with C = np.zeros((m, p))\n",
-    "    - Use three nested for loops: for i in range(m): for j in range(p): for k in range(n):\n",
-    "    - Accumulate the sum: C[i,j] += A[i,k] * B[k,j]\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ea923f30",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def matmul_naive(A: np.ndarray, B: np.ndarray) -> np.ndarray:\n",
-    "    \"\"\"\n",
-    "    Naive matrix multiplication using explicit for-loops.\n",
-    "    \n",
-    "    This helps you understand what matrix multiplication really does!\n",
-    "    \"\"\"\n",
-    "    m, n = A.shape\n",
-    "    n2, p = B.shape\n",
-    "    assert n == n2, f\"Matrix shapes don't match: A({m},{n}) @ B({n2},{p})\"\n",
-    "    \n",
-    "    C = np.zeros((m, p))\n",
-    "    for i in range(m):\n",
-    "        for j in range(p):\n",
-    "            for k in range(n):\n",
-    "                C[i, j] += A[i, k] * B[k, j]\n",
-    "    return C"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "60fb8544",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Matrix Multiplication"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "28898e45",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test matrix multiplication\n",
-    "print(\"Testing matrix multiplication...\")\n",
-    "\n",
-    "try:\n",
-    "    # Test case 1: Simple 2x2 matrices\n",
-    "    A = np.array([[1, 2], [3, 4]], dtype=np.float32)\n",
-    "    B = np.array([[5, 6], [7, 8]], dtype=np.float32)\n",
-    "    \n",
-    "    result = matmul_naive(A, B)\n",
-    "    expected = np.array([[19, 22], [43, 50]], dtype=np.float32)\n",
-    "    \n",
-    "    print(f\"✅ Matrix A:\\n{A}\")\n",
-    "    print(f\"✅ Matrix B:\\n{B}\")\n",
-    "    print(f\"✅ Your result:\\n{result}\")\n",
-    "    print(f\"✅ Expected:\\n{expected}\")\n",
-    "    \n",
-    "    assert np.allclose(result, expected), \"❌ Result doesn't match expected!\"\n",
-    "    print(\"🎉 Matrix multiplication works!\")\n",
-    "    \n",
-    "    # Test case 2: Compare with NumPy\n",
-    "    numpy_result = A @ B\n",
-    "    assert np.allclose(result, numpy_result), \"❌ Doesn't match NumPy result!\"\n",
-    "    print(\"✅ Matches NumPy implementation!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement matmul_naive above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d8176801",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 3: Building the Dense Layer\n",
-    "\n",
-    "Now let's build the **Dense layer**, the most fundamental building block of neural networks. A Dense layer performs a linear transformation: `y = Wx + b`\n",
-    "\n",
-    "### What is a Dense Layer?\n",
-    "- **Linear transformation**: `y = Wx + b`\n",
-    "- **W**: Weight matrix (learnable parameters)\n",
-    "- **x**: Input tensor\n",
-    "- **b**: Bias vector (learnable parameters)\n",
-    "- **y**: Output tensor\n",
-    "\n",
-    "### Why Dense Layers Matter\n",
-    "- **Universal approximation**: Can approximate any function with enough neurons\n",
-    "- **Feature learning**: Each neuron learns a different feature\n",
-    "- **Nonlinearity**: When combined with activation functions, becomes very powerful\n",
-    "- **Foundation**: All other layers build on this concept\n",
-    "\n",
-    "### The Math\n",
-    "For input x of shape (batch_size, input_size):\n",
-    "- **W**: Weight matrix of shape (input_size, output_size)\n",
-    "- **b**: Bias vector of shape (output_size)\n",
-    "- **y**: Output of shape (batch_size, output_size)\n",
-    "\n",
-    "### Visual Example\n",
-    "```\n",
-    "Input: x = [1, 2, 3] (3 features)\n",
-    "Weights: W = [[0.1, 0.2],    Bias: b = [0.1, 0.2]\n",
-    "              [0.3, 0.4],\n",
-    "              [0.5, 0.6]]\n",
-    "\n",
-    "Step 1: Wx = [0.1*1 + 0.3*2 + 0.5*3,  0.2*1 + 0.4*2 + 0.6*3]\n",
-    "            = [2.2, 3.2]\n",
-    "\n",
-    "Step 2: y = Wx + b = [2.2 + 0.1, 3.2 + 0.2] = [2.3, 3.4]\n",
-    "```\n",
-    "\n",
-    "Let's implement this!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4a916c67",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Dense:\n",
-    "    \"\"\"\n",
-    "    Dense (Linear) Layer: y = Wx + b\n",
-    "    \n",
-    "    The fundamental building block of neural networks.\n",
-    "    Performs linear transformation: matrix multiplication + bias addition.\n",
-    "    \n",
-    "    Args:\n",
-    "        input_size: Number of input features\n",
-    "        output_size: Number of output features\n",
-    "        use_bias: Whether to include bias term (default: True)\n",
-    "        use_naive_matmul: Whether to use naive matrix multiplication (for learning)\n",
-    "        \n",
-    "    TODO: Implement the Dense layer with weight initialization and forward pass.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Store layer parameters (input_size, output_size, use_bias, use_naive_matmul)\n",
-    "    2. Initialize weights with small random values (Xavier/Glorot initialization)\n",
-    "    3. Initialize bias to zeros (if use_bias=True)\n",
-    "    4. Implement forward pass using matrix multiplication and bias addition\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    layer = Dense(input_size=3, output_size=2)\n",
-    "    x = Tensor([[1, 2, 3]])  # batch_size=1, input_size=3\n",
-    "    y = layer(x)  # shape: (1, 2)\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use np.random.randn() for random initialization\n",
-    "    - Scale weights by sqrt(2/(input_size + output_size)) for Xavier init\n",
-    "    - Store weights and bias as numpy arrays\n",
-    "    - Use matmul_naive or @ operator based on use_naive_matmul flag\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, input_size: int, output_size: int, use_bias: bool = True, \n",
-    "                 use_naive_matmul: bool = False):\n",
-    "        \"\"\"\n",
-    "        Initialize Dense layer with random weights.\n",
-    "        \n",
-    "        Args:\n",
-    "            input_size: Number of input features\n",
-    "            output_size: Number of output features\n",
-    "            use_bias: Whether to include bias term\n",
-    "            use_naive_matmul: Use naive matrix multiplication (for learning)\n",
-    "            \n",
-    "        TODO: \n",
-    "        1. Store layer parameters (input_size, output_size, use_bias, use_naive_matmul)\n",
-    "        2. Initialize weights with small random values\n",
-    "        3. Initialize bias to zeros (if use_bias=True)\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Store the parameters as instance variables\n",
-    "        2. Calculate scale factor for Xavier initialization: sqrt(2/(input_size + output_size))\n",
-    "        3. Initialize weights: np.random.randn(input_size, output_size) * scale\n",
-    "        4. If use_bias=True, initialize bias: np.zeros(output_size)\n",
-    "        5. If use_bias=False, set bias to None\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Dense(3, 2) creates:\n",
-    "        - weights: shape (3, 2) with small random values\n",
-    "        - bias: shape (2,) with zeros\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"\n",
-    "        Forward pass: y = Wx + b\n",
-    "        \n",
-    "        Args:\n",
-    "            x: Input tensor of shape (batch_size, input_size)\n",
-    "            \n",
-    "        Returns:\n",
-    "            Output tensor of shape (batch_size, output_size)\n",
-    "            \n",
-    "        TODO: Implement matrix multiplication and bias addition\n",
-    "        - Use self.use_naive_matmul to choose between NumPy and naive implementation\n",
-    "        - If use_naive_matmul=True, use matmul_naive(x.data, self.weights)\n",
-    "        - If use_naive_matmul=False, use x.data @ self.weights\n",
-    "        - Add bias if self.use_bias=True\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Perform matrix multiplication: Wx\n",
-    "           - If use_naive_matmul: result = matmul_naive(x.data, self.weights)\n",
-    "           - Else: result = x.data @ self.weights\n",
-    "        2. Add bias if use_bias: result += self.bias\n",
-    "        3. Return Tensor(result)\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Input x: Tensor([[1, 2, 3]])  # shape (1, 3)\n",
-    "        Weights: shape (3, 2)\n",
-    "        Output: Tensor([[val1, val2]])  # shape (1, 2)\n",
-    "        \n",
-    "        HINTS:\n",
-    "        - x.data gives you the numpy array\n",
-    "        - self.weights is your weight matrix\n",
-    "        - Use broadcasting for bias addition: result + self.bias\n",
-    "        - Return Tensor(result) to wrap the result\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"Make layer callable: layer(x) same as layer.forward(x)\"\"\"\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8570d026",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class Dense:\n",
-    "    \"\"\"\n",
-    "    Dense (Linear) Layer: y = Wx + b\n",
-    "    \n",
-    "    The fundamental building block of neural networks.\n",
-    "    Performs linear transformation: matrix multiplication + bias addition.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, input_size: int, output_size: int, use_bias: bool = True, \n",
-    "                 use_naive_matmul: bool = False):\n",
-    "        \"\"\"\n",
-    "        Initialize Dense layer with random weights.\n",
-    "        \n",
-    "        Args:\n",
-    "            input_size: Number of input features\n",
-    "            output_size: Number of output features\n",
-    "            use_bias: Whether to include bias term\n",
-    "            use_naive_matmul: Use naive matrix multiplication (for learning)\n",
-    "        \"\"\"\n",
-    "        # Store parameters\n",
-    "        self.input_size = input_size\n",
-    "        self.output_size = output_size\n",
-    "        self.use_bias = use_bias\n",
-    "        self.use_naive_matmul = use_naive_matmul\n",
-    "        \n",
-    "        # Xavier/Glorot initialization\n",
-    "        scale = np.sqrt(2.0 / (input_size + output_size))\n",
-    "        self.weights = np.random.randn(input_size, output_size).astype(np.float32) * scale\n",
-    "        \n",
-    "        # Initialize bias\n",
-    "        if use_bias:\n",
-    "            self.bias = np.zeros(output_size, dtype=np.float32)\n",
-    "        else:\n",
-    "            self.bias = None\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"\n",
-    "        Forward pass: y = Wx + b\n",
-    "        \n",
-    "        Args:\n",
-    "            x: Input tensor of shape (batch_size, input_size)\n",
-    "            \n",
-    "        Returns:\n",
-    "            Output tensor of shape (batch_size, output_size)\n",
-    "        \"\"\"\n",
-    "        # Matrix multiplication\n",
-    "        if self.use_naive_matmul:\n",
-    "            result = matmul_naive(x.data, self.weights)\n",
-    "        else:\n",
-    "            result = x.data @ self.weights\n",
-    "        \n",
-    "        # Add bias\n",
-    "        if self.use_bias:\n",
-    "            result += self.bias\n",
-    "        \n",
-    "        return Tensor(result)\n",
-    "    \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"Make layer callable: layer(x) same as layer.forward(x)\"\"\"\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "90197c65",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Dense Layer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9d9e4d64",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test Dense layer\n",
-    "print(\"Testing Dense layer...\")\n",
-    "\n",
-    "try:\n",
-    "    # Test basic Dense layer\n",
-    "    layer = Dense(input_size=3, output_size=2, use_bias=True)\n",
-    "    x = Tensor([[1, 2, 3]])  # batch_size=1, input_size=3\n",
-    "    \n",
-    "    print(f\"✅ Input shape: {x.shape}\")\n",
-    "    print(f\"✅ Layer weights shape: {layer.weights.shape}\")\n",
-    "    print(f\"✅ Layer bias shape: {layer.bias.shape}\")\n",
-    "    \n",
-    "    y = layer(x)\n",
-    "    print(f\"✅ Output shape: {y.shape}\")\n",
-    "    print(f\"✅ Output: {y}\")\n",
-    "    \n",
-    "    # Test without bias\n",
-    "    layer_no_bias = Dense(input_size=2, output_size=1, use_bias=False)\n",
-    "    x2 = Tensor([[1, 2]])\n",
-    "    y2 = layer_no_bias(x2)\n",
-    "    print(f\"✅ No bias output: {y2}\")\n",
-    "    \n",
-    "    # Test naive matrix multiplication\n",
-    "    layer_naive = Dense(input_size=2, output_size=2, use_naive_matmul=True)\n",
-    "    x3 = Tensor([[1, 2]])\n",
-    "    y3 = layer_naive(x3)\n",
-    "    print(f\"✅ Naive matmul output: {y3}\")\n",
-    "    \n",
-    "    print(\"\\n🎉 All Dense layer tests passed!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement the Dense layer above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "37532e4d",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## Step 4: Composing Layers with Activations\n",
-    "\n",
-    "Now let's see how layers work together! A neural network is just layers composed with activation functions.\n",
-    "\n",
-    "### Why Layer Composition Matters\n",
-    "- **Nonlinearity**: Activation functions make networks powerful\n",
-    "- **Feature learning**: Each layer learns different levels of features\n",
-    "- **Universal approximation**: Can approximate any function\n",
-    "- **Modularity**: Easy to experiment with different architectures\n",
-    "\n",
-    "### The Pattern\n",
-    "```\n",
-    "Input → Dense → Activation → Dense → Activation → Output\n",
-    "```\n",
-    "\n",
-    "### Real-World Example\n",
-    "```\n",
-    "Input: [1, 2, 3] (3 features)\n",
-    "Dense(3→2): [1.4, 2.8] (linear transformation)\n",
-    "ReLU: [1.4, 2.8] (nonlinearity)\n",
-    "Dense(2→1): [3.2] (final prediction)\n",
-    "```\n",
-    "\n",
-    "Let's build a simple network!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d6e1d85c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test layer composition\n",
-    "print(\"Testing layer composition...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create a simple network: Dense → ReLU → Dense\n",
-    "    dense1 = Dense(input_size=3, output_size=2)\n",
-    "    relu = ReLU()\n",
-    "    dense2 = Dense(input_size=2, output_size=1)\n",
-    "    \n",
-    "    # Test input\n",
-    "    x = Tensor([[1, 2, 3]])\n",
-    "    print(f\"✅ Input: {x}\")\n",
-    "    \n",
-    "    # Forward pass through the network\n",
-    "    h1 = dense1(x)\n",
-    "    print(f\"✅ After Dense1: {h1}\")\n",
-    "    \n",
-    "    h2 = relu(h1)\n",
-    "    print(f\"✅ After ReLU: {h2}\")\n",
-    "    \n",
-    "    y = dense2(h2)\n",
-    "    print(f\"✅ Final output: {y}\")\n",
-    "    \n",
-    "    print(\"\\n🎉 Layer composition works!\")\n",
-    "    print(\"This is how neural networks work: layers + activations!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure all your layers and activations are working!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5f2f8a48",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## Step 5: Performance Comparison\n",
-    "\n",
-    "Let's compare our naive matrix multiplication with NumPy's optimized version to understand why optimization matters in ML.\n",
-    "\n",
-    "### Why Performance Matters\n",
-    "- **Training time**: Neural networks train for hours/days\n",
-    "- **Inference speed**: Real-time applications need fast predictions\n",
-    "- **GPU utilization**: Optimized operations use hardware efficiently\n",
-    "- **Scalability**: Large models need efficient implementations"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b6f490a2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Performance comparison\n",
-    "print(\"Comparing naive vs NumPy matrix multiplication...\")\n",
-    "\n",
-    "try:\n",
-    "    import time\n",
-    "    \n",
-    "    # Create test matrices\n",
-    "    A = np.random.randn(100, 100).astype(np.float32)\n",
-    "    B = np.random.randn(100, 100).astype(np.float32)\n",
-    "    \n",
-    "    # Time naive implementation\n",
-    "    start_time = time.time()\n",
-    "    result_naive = matmul_naive(A, B)\n",
-    "    naive_time = time.time() - start_time\n",
-    "    \n",
-    "    # Time NumPy implementation\n",
-    "    start_time = time.time()\n",
-    "    result_numpy = A @ B\n",
-    "    numpy_time = time.time() - start_time\n",
-    "    \n",
-    "    print(f\"✅ Naive time: {naive_time:.4f} seconds\")\n",
-    "    print(f\"✅ NumPy time: {numpy_time:.4f} seconds\")\n",
-    "    print(f\"✅ Speedup: {naive_time/numpy_time:.1f}x faster\")\n",
-    "    \n",
-    "    # Verify correctness\n",
-    "    assert np.allclose(result_naive, result_numpy), \"Results don't match!\"\n",
-    "    print(\"✅ Results are identical!\")\n",
-    "    \n",
-    "    print(\"\\n💡 This is why we use optimized libraries in production!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "35efc1ca",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 🎯 Module Summary\n",
-    "\n",
-    "Congratulations! You've built the foundation of neural network layers:\n",
-    "\n",
-    "### What You've Accomplished\n",
-    "✅ **Matrix Multiplication**: Understanding the core operation  \n",
-    "✅ **Dense Layer**: Linear transformation with weights and bias  \n",
-    "✅ **Layer Composition**: Combining layers with activations  \n",
-    "✅ **Performance Awareness**: Understanding optimization importance  \n",
-    "✅ **Testing**: Immediate feedback on your implementations  \n",
-    "\n",
-    "### Key Concepts You've Learned\n",
-    "- **Layers** are functions that transform tensors\n",
-    "- **Matrix multiplication** powers all neural network computations\n",
-    "- **Dense layers** perform linear transformations: `y = Wx + b`\n",
-    "- **Layer composition** creates complex functions from simple building blocks\n",
-    "- **Performance** matters for real-world ML applications\n",
-    "\n",
-    "### What's Next\n",
-    "In the next modules, you'll build on this foundation:\n",
-    "- **Networks**: Compose layers into complete models\n",
-    "- **Training**: Learn parameters with gradients and optimization\n",
-    "- **Convolutional layers**: Process spatial data like images\n",
-    "- **Recurrent layers**: Process sequential data like text\n",
-    "\n",
-    "### Real-World Connection\n",
-    "Your Dense layer is now ready to:\n",
-    "- Learn patterns in data through weight updates\n",
-    "- Transform features for classification and regression\n",
-    "- Serve as building blocks for complex architectures\n",
-    "- Integrate with the rest of the TinyTorch ecosystem\n",
-    "\n",
-    "**Ready for the next challenge?** Let's move on to building complete neural networks!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9c9187ca",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Final verification\n",
-    "print(\"\\n\" + \"=\"*50)\n",
-    "print(\"🎉 LAYERS MODULE COMPLETE!\")\n",
-    "print(\"=\"*50)\n",
-    "print(\"✅ Matrix multiplication understanding\")\n",
-    "print(\"✅ Dense layer implementation\")\n",
-    "print(\"✅ Layer composition with activations\")\n",
-    "print(\"✅ Performance awareness\")\n",
-    "print(\"✅ Comprehensive testing\")\n",
-    "print(\"\\n🚀 Ready to build networks in the next module!\") "
-   ]
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "main_language": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/assignments/source/04_networks/networks_dev.ipynb b/assignments/source/04_networks/networks_dev.ipynb
deleted file mode 100644
index f348e3bb..00000000
--- a/assignments/source/04_networks/networks_dev.ipynb
+++ /dev/null
@@ -1,1437 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "355dc307",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "# Module 3: Networks - Neural Network Architectures\n",
-    "\n",
-    "Welcome to the Networks module! This is where we compose layers into complete neural network architectures.\n",
-    "\n",
-    "## Learning Goals\n",
-    "- Understand networks as function composition: `f(x) = layer_n(...layer_2(layer_1(x)))`\n",
-    "- Build common architectures (MLP, CNN) from layers\n",
-    "- Visualize network structure and data flow\n",
-    "- See how architecture affects capability\n",
-    "- Master forward pass inference (no training yet!)\n",
-    "\n",
-    "## Build → Use → Understand\n",
-    "1. **Build**: Compose layers into complete networks\n",
-    "2. **Use**: Create different architectures and run inference\n",
-    "3. **Understand**: How architecture design affects network behavior\n",
-    "\n",
-    "## Module Dependencies\n",
-    "This module builds on previous modules:\n",
-    "- **tensor** → **activations** → **layers** → **networks**\n",
-    "- Clean composition: math functions → building blocks → complete systems"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cf724917",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 📦 Where This Code Lives in the Final Package\n",
-    "\n",
-    "**Learning Side:** You work in `assignments/source/04_networks/networks_dev.py`  \n",
-    "**Building Side:** Code exports to `tinytorch.core.networks`\n",
-    "\n",
-    "```python\n",
-    "# Final package structure:\n",
-    "from tinytorch.core.networks import Sequential, MLP\n",
-    "from tinytorch.core.layers import Dense, Conv2D\n",
-    "from tinytorch.core.activations import ReLU, Sigmoid, Tanh\n",
-    "from tinytorch.core.tensor import Tensor\n",
-    "```\n",
-    "\n",
-    "**Why this matters:**\n",
-    "- **Learning:** Focused modules for deep understanding\n",
-    "- **Production:** Proper organization like PyTorch's `torch.nn`\n",
-    "- **Consistency:** All network architectures live together in `core.networks`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "79460d45",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp core.networks\n",
-    "\n",
-    "# Setup and imports\n",
-    "import numpy as np\n",
-    "import sys\n",
-    "from typing import List, Union, Optional, Callable\n",
-    "import matplotlib.pyplot as plt\n",
-    "import matplotlib.patches as patches\n",
-    "from matplotlib.patches import FancyBboxPatch, ConnectionPatch\n",
-    "import seaborn as sns\n",
-    "\n",
-    "# Import all the building blocks we need\n",
-    "from tinytorch.core.tensor import Tensor\n",
-    "from tinytorch.core.layers import Dense\n",
-    "from tinytorch.core.activations import ReLU, Sigmoid, Tanh, Softmax\n",
-    "\n",
-    "print(\"🔥 TinyTorch Networks Module\")\n",
-    "print(f\"NumPy version: {np.__version__}\")\n",
-    "print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n",
-    "print(\"Ready to build neural network architectures!\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2190e04d",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import numpy as np\n",
-    "import sys\n",
-    "from typing import List, Union, Optional, Callable\n",
-    "import matplotlib.pyplot as plt\n",
-    "import matplotlib.patches as patches\n",
-    "from matplotlib.patches import FancyBboxPatch, ConnectionPatch\n",
-    "import seaborn as sns\n",
-    "\n",
-    "# Import our building blocks\n",
-    "from tinytorch.core.tensor import Tensor\n",
-    "from tinytorch.core.layers import Dense\n",
-    "from tinytorch.core.activations import ReLU, Sigmoid, Tanh"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c03a46b9",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def _should_show_plots():\n",
-    "    \"\"\"Check if we should show plots (disable during testing)\"\"\"\n",
-    "    return 'pytest' not in sys.modules and 'test' not in sys.argv"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "58e30d14",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 1: What is a Network?\n",
-    "\n",
-    "### Definition\n",
-    "A **network** is a composition of layers that transforms input data into output predictions. Think of it as a pipeline of transformations:\n",
-    "\n",
-    "```\n",
-    "Input → Layer1 → Layer2 → Layer3 → Output\n",
-    "```\n",
-    "\n",
-    "### Why Networks Matter\n",
-    "- **Function composition**: Complex behavior from simple building blocks\n",
-    "- **Learnable parameters**: Each layer has weights that can be learned\n",
-    "- **Architecture design**: Different layouts solve different problems\n",
-    "- **Real-world applications**: Classification, regression, generation, etc.\n",
-    "\n",
-    "### The Fundamental Insight\n",
-    "**Neural networks are just function composition!**\n",
-    "- Each layer is a function: `f_i(x)`\n",
-    "- The network is: `f(x) = f_n(...f_2(f_1(x)))`\n",
-    "- Complex behavior emerges from simple building blocks\n",
-    "\n",
-    "### Real-World Examples\n",
-    "- **MLP (Multi-Layer Perceptron)**: Classic feedforward network\n",
-    "- **CNN (Convolutional Neural Network)**: For image processing\n",
-    "- **RNN (Recurrent Neural Network)**: For sequential data\n",
-    "- **Transformer**: For attention-based processing\n",
-    "\n",
-    "### Visual Intuition\n",
-    "```\n",
-    "Input: [1, 2, 3] (3 features)\n",
-    "Layer1: [1.4, 2.8] (linear transformation)\n",
-    "Layer2: [1.4, 2.8] (nonlinearity)\n",
-    "Layer3: [0.7] (final prediction)\n",
-    "```\n",
-    "\n",
-    "### The Math Behind It\n",
-    "For a network with layers `f_1, f_2, ..., f_n`:\n",
-    "```\n",
-    "f(x) = f_n(f_{n-1}(...f_2(f_1(x))))\n",
-    "```\n",
-    "\n",
-    "Each layer transforms the data, and the final output is the composition of all these transformations.\n",
-    "\n",
-    "Let's start by building the most fundamental network: **Sequential**."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8de00b9b",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Sequential:\n",
-    "    \"\"\"\n",
-    "    Sequential Network: Composes layers in sequence\n",
-    "    \n",
-    "    The most fundamental network architecture.\n",
-    "    Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))\n",
-    "    \n",
-    "    Args:\n",
-    "        layers: List of layers to compose\n",
-    "        \n",
-    "    TODO: Implement the Sequential network with forward pass.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Store the list of layers as an instance variable\n",
-    "    2. Implement forward pass that applies each layer in sequence\n",
-    "    3. Make the network callable for easy use\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    network = Sequential([\n",
-    "        Dense(3, 4),\n",
-    "        ReLU(),\n",
-    "        Dense(4, 2),\n",
-    "        Sigmoid()\n",
-    "    ])\n",
-    "    x = Tensor([[1, 2, 3]])\n",
-    "    y = network(x)  # Forward pass through all layers\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Store layers in self.layers\n",
-    "    - Use a for loop to apply each layer in order\n",
-    "    - Each layer's output becomes the next layer's input\n",
-    "    - Return the final output\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, layers: List):\n",
-    "        \"\"\"\n",
-    "        Initialize Sequential network with layers.\n",
-    "        \n",
-    "        Args:\n",
-    "            layers: List of layers to compose in order\n",
-    "            \n",
-    "        TODO: Store the layers and implement forward pass\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Store the layers list as self.layers\n",
-    "        2. This creates the network architecture\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Sequential([Dense(3,4), ReLU(), Dense(4,2)])\n",
-    "        creates a 3-layer network: Dense → ReLU → Dense\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"\n",
-    "        Forward pass through all layers in sequence.\n",
-    "        \n",
-    "        Args:\n",
-    "            x: Input tensor\n",
-    "            \n",
-    "        Returns:\n",
-    "            Output tensor after passing through all layers\n",
-    "            \n",
-    "        TODO: Implement sequential forward pass through all layers\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Start with the input tensor: current = x\n",
-    "        2. Loop through each layer in self.layers\n",
-    "        3. Apply each layer: current = layer(current)\n",
-    "        4. Return the final output\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Input: Tensor([[1, 2, 3]])\n",
-    "        Layer1 (Dense): Tensor([[1.4, 2.8]])\n",
-    "        Layer2 (ReLU): Tensor([[1.4, 2.8]])\n",
-    "        Layer3 (Dense): Tensor([[0.7]])\n",
-    "        Output: Tensor([[0.7]])\n",
-    "        \n",
-    "        HINTS:\n",
-    "        - Use a for loop: for layer in self.layers:\n",
-    "        - Apply each layer: current = layer(current)\n",
-    "        - The output of one layer becomes input to the next\n",
-    "        - Return the final result\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"Make network callable: network(x) same as network.forward(x)\"\"\"\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4e9f65af",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class Sequential:\n",
-    "    \"\"\"\n",
-    "    Sequential Network: Composes layers in sequence\n",
-    "    \n",
-    "    The most fundamental network architecture.\n",
-    "    Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, layers: List):\n",
-    "        \"\"\"Initialize Sequential network with layers.\"\"\"\n",
-    "        self.layers = layers\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"Forward pass through all layers in sequence.\"\"\"\n",
-    "        # Apply each layer in order\n",
-    "        for layer in self.layers:\n",
-    "            x = layer(x)\n",
-    "        return x\n",
-    "    \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"Make network callable: network(x) same as network.forward(x)\"\"\"\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "88b54128",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Sequential Network"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9b814f23",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test the Sequential network\n",
-    "print(\"Testing Sequential network...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create a simple 2-layer network: 3 → 4 → 2\n",
-    "    network = Sequential([\n",
-    "        Dense(input_size=3, output_size=4),\n",
-    "        ReLU(),\n",
-    "        Dense(input_size=4, output_size=2),\n",
-    "        Sigmoid()\n",
-    "    ])\n",
-    "    \n",
-    "    print(f\"✅ Network created with {len(network.layers)} layers\")\n",
-    "    \n",
-    "    # Test with sample data\n",
-    "    x = Tensor([[1.0, 2.0, 3.0]])\n",
-    "    print(f\"✅ Input: {x}\")\n",
-    "    \n",
-    "    # Forward pass\n",
-    "    y = network(x)\n",
-    "    print(f\"✅ Output: {y}\")\n",
-    "    print(f\"✅ Output shape: {y.shape}\")\n",
-    "    \n",
-    "    # Verify the network works\n",
-    "    assert y.shape == (1, 2), f\"❌ Expected shape (1, 2), got {y.shape}\"\n",
-    "    assert np.all(y.data >= 0) and np.all(y.data <= 1), \"❌ Sigmoid output should be between 0 and 1\"\n",
-    "    print(\"🎉 Sequential network works!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement the Sequential network above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "28eb9398",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 2: Understanding Network Architecture\n",
-    "\n",
-    "Now let's explore how different network architectures affect the network's capabilities.\n",
-    "\n",
-    "### What is Network Architecture?\n",
-    "**Architecture** refers to how layers are arranged and connected. It determines:\n",
-    "- **Capacity**: How complex patterns the network can learn\n",
-    "- **Efficiency**: How many parameters and computations needed\n",
-    "- **Specialization**: What types of problems it's good at\n",
-    "\n",
-    "### Common Architectures\n",
-    "\n",
-    "#### 1. **MLP (Multi-Layer Perceptron)**\n",
-    "```\n",
-    "Input → Dense → ReLU → Dense → ReLU → Dense → Output\n",
-    "```\n",
-    "- **Use case**: General-purpose learning\n",
-    "- **Strengths**: Universal approximation, simple to understand\n",
-    "- **Weaknesses**: Doesn't exploit spatial structure\n",
-    "\n",
-    "#### 2. **CNN (Convolutional Neural Network)**\n",
-    "```\n",
-    "Input → Conv2D → ReLU → Conv2D → ReLU → Dense → Output\n",
-    "```\n",
-    "- **Use case**: Image processing, spatial data\n",
-    "- **Strengths**: Parameter sharing, translation invariance\n",
-    "- **Weaknesses**: Fixed spatial structure\n",
-    "\n",
-    "#### 3. **Deep Network**\n",
-    "```\n",
-    "Input → Dense → ReLU → Dense → ReLU → Dense → ReLU → Dense → Output\n",
-    "```\n",
-    "- **Use case**: Complex pattern recognition\n",
-    "- **Strengths**: High capacity, can learn complex functions\n",
-    "- **Weaknesses**: More parameters, harder to train\n",
-    "\n",
-    "Let's build some common architectures!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ae4fe584",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def create_mlp(input_size: int, hidden_sizes: List[int], output_size: int, \n",
-    "               activation=ReLU, output_activation=Sigmoid) -> Sequential:\n",
-    "    \"\"\"\n",
-    "    Create a Multi-Layer Perceptron (MLP) network.\n",
-    "    \n",
-    "    Args:\n",
-    "        input_size: Number of input features\n",
-    "        hidden_sizes: List of hidden layer sizes\n",
-    "        output_size: Number of output features\n",
-    "        activation: Activation function for hidden layers (default: ReLU)\n",
-    "        output_activation: Activation function for output layer (default: Sigmoid)\n",
-    "        \n",
-    "    Returns:\n",
-    "        Sequential network with MLP architecture\n",
-    "        \n",
-    "    TODO: Implement MLP creation with alternating Dense and activation layers.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Start with an empty list of layers\n",
-    "    2. Add the first Dense layer: input_size → first hidden size\n",
-    "    3. For each hidden layer:\n",
-    "       - Add activation function\n",
-    "       - Add Dense layer connecting to next hidden size\n",
-    "    4. Add final activation function\n",
-    "    5. Add final Dense layer: last hidden size → output_size\n",
-    "    6. Add output activation function\n",
-    "    7. Return Sequential(layers)\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    create_mlp(3, [4, 2], 1) creates:\n",
-    "    Dense(3→4) → ReLU → Dense(4→2) → ReLU → Dense(2→1) → Sigmoid\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Start with layers = []\n",
-    "    - Add Dense layers with appropriate input/output sizes\n",
-    "    - Add activation functions between Dense layers\n",
-    "    - Don't forget the final output activation\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3df597d8",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def create_mlp(input_size: int, hidden_sizes: List[int], output_size: int, \n",
-    "               activation=ReLU, output_activation=Sigmoid) -> Sequential:\n",
-    "    \"\"\"Create a Multi-Layer Perceptron (MLP) network.\"\"\"\n",
-    "    layers = []\n",
-    "    \n",
-    "    # Add first layer\n",
-    "    current_size = input_size\n",
-    "    for hidden_size in hidden_sizes:\n",
-    "        layers.append(Dense(input_size=current_size, output_size=hidden_size))\n",
-    "        layers.append(activation())\n",
-    "        current_size = hidden_size\n",
-    "    \n",
-    "    # Add output layer\n",
-    "    layers.append(Dense(input_size=current_size, output_size=output_size))\n",
-    "    layers.append(output_activation())\n",
-    "    \n",
-    "    return Sequential(layers)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f053d4a8",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your MLP Creation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "efec756b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test MLP creation\n",
-    "print(\"Testing MLP creation...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create different MLP architectures\n",
-    "    mlp1 = create_mlp(input_size=3, hidden_sizes=[4], output_size=1)\n",
-    "    mlp2 = create_mlp(input_size=5, hidden_sizes=[8, 4], output_size=2)\n",
-    "    mlp3 = create_mlp(input_size=2, hidden_sizes=[10, 6, 3], output_size=1, activation=Tanh)\n",
-    "    \n",
-    "    print(f\"✅ MLP1: {len(mlp1.layers)} layers\")\n",
-    "    print(f\"✅ MLP2: {len(mlp2.layers)} layers\")\n",
-    "    print(f\"✅ MLP3: {len(mlp3.layers)} layers\")\n",
-    "    \n",
-    "    # Test forward pass\n",
-    "    x = Tensor([[1.0, 2.0, 3.0]])\n",
-    "    y1 = mlp1(x)\n",
-    "    print(f\"✅ MLP1 output: {y1}\")\n",
-    "    \n",
-    "    x2 = Tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])\n",
-    "    y2 = mlp2(x2)\n",
-    "    print(f\"✅ MLP2 output: {y2}\")\n",
-    "    \n",
-    "    print(\"🎉 MLP creation works!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement create_mlp above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9d1c34b6",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 3: Network Visualization and Analysis\n",
-    "\n",
-    "Let's create tools to visualize and analyze network architectures. This helps us understand what our networks are doing.\n",
-    "\n",
-    "### Why Visualization Matters\n",
-    "- **Architecture understanding**: See how data flows through the network\n",
-    "- **Debugging**: Identify bottlenecks and issues\n",
-    "- **Design**: Compare different architectures\n",
-    "- **Communication**: Explain networks to others\n",
-    "\n",
-    "### What We'll Build\n",
-    "1. **Architecture visualization**: Show layer connections\n",
-    "2. **Data flow visualization**: See how data transforms\n",
-    "3. **Network comparison**: Compare different architectures\n",
-    "4. **Behavior analysis**: Understand network capabilities"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a74a3b28",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def visualize_network_architecture(network: Sequential, title: str = \"Network Architecture\"):\n",
-    "    \"\"\"\n",
-    "    Visualize the architecture of a Sequential network.\n",
-    "    \n",
-    "    Args:\n",
-    "        network: Sequential network to visualize\n",
-    "        title: Title for the plot\n",
-    "        \n",
-    "    TODO: Create a visualization showing the network structure.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Create a matplotlib figure\n",
-    "    2. For each layer, draw a box showing its type and size\n",
-    "    3. Connect the boxes with arrows showing data flow\n",
-    "    4. Add labels and formatting\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    Input → Dense(3→4) → ReLU → Dense(4→2) → Sigmoid → Output\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use plt.subplots() to create the figure\n",
-    "    - Use plt.text() to add layer labels\n",
-    "    - Use plt.arrow() to show connections\n",
-    "    - Add proper spacing and formatting\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b1274dbc",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def visualize_network_architecture(network: Sequential, title: str = \"Network Architecture\"):\n",
-    "    \"\"\"Visualize the architecture of a Sequential network.\"\"\"\n",
-    "    if not _should_show_plots():\n",
-    "        print(\"📊 Visualization disabled during testing\")\n",
-    "        return\n",
-    "    \n",
-    "    fig, ax = plt.subplots(1, 1, figsize=(12, 6))\n",
-    "    \n",
-    "    # Calculate positions\n",
-    "    num_layers = len(network.layers)\n",
-    "    x_positions = np.linspace(0, 10, num_layers + 2)\n",
-    "    \n",
-    "    # Draw input\n",
-    "    ax.text(x_positions[0], 0, 'Input', ha='center', va='center', \n",
-    "            bbox=dict(boxstyle='round,pad=0.3', facecolor='lightblue'))\n",
-    "    \n",
-    "    # Draw layers\n",
-    "    for i, layer in enumerate(network.layers):\n",
-    "        layer_name = type(layer).__name__\n",
-    "        ax.text(x_positions[i+1], 0, layer_name, ha='center', va='center',\n",
-    "                bbox=dict(boxstyle='round,pad=0.3', facecolor='lightgreen'))\n",
-    "        \n",
-    "        # Draw arrow\n",
-    "        ax.arrow(x_positions[i], 0, 0.8, 0, head_width=0.1, head_length=0.1, \n",
-    "                fc='black', ec='black')\n",
-    "    \n",
-    "    # Draw output\n",
-    "    ax.text(x_positions[-1], 0, 'Output', ha='center', va='center',\n",
-    "            bbox=dict(boxstyle='round,pad=0.3', facecolor='lightcoral'))\n",
-    "    \n",
-    "    ax.set_xlim(-0.5, 10.5)\n",
-    "    ax.set_ylim(-0.5, 0.5)\n",
-    "    ax.set_title(title)\n",
-    "    ax.axis('off')\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "286f403e",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Network Visualization"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2630d356",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test network visualization\n",
-    "print(\"Testing network visualization...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create a test network\n",
-    "    test_network = Sequential([\n",
-    "        Dense(input_size=3, output_size=4),\n",
-    "        ReLU(),\n",
-    "        Dense(input_size=4, output_size=2),\n",
-    "        Sigmoid()\n",
-    "    ])\n",
-    "    \n",
-    "    # Visualize the network\n",
-    "    if _should_show_plots():\n",
-    "        visualize_network_architecture(test_network, \"Test Network Architecture\")\n",
-    "        print(\"✅ Network visualization created!\")\n",
-    "    else:\n",
-    "        print(\"✅ Network visualization skipped during testing\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement visualize_network_architecture above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d1b3aaee",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 4: Data Flow Analysis\n",
-    "\n",
-    "Let's create tools to analyze how data flows through the network. This helps us understand what each layer is doing.\n",
-    "\n",
-    "### Why Data Flow Analysis Matters\n",
-    "- **Debugging**: See where data gets corrupted\n",
-    "- **Optimization**: Identify bottlenecks\n",
-    "- **Understanding**: Learn what each layer learns\n",
-    "- **Design**: Choose appropriate layer sizes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7bc5136d",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def visualize_data_flow(network: Sequential, input_data: Tensor, title: str = \"Data Flow Through Network\"):\n",
-    "    \"\"\"\n",
-    "    Visualize how data flows through the network.\n",
-    "    \n",
-    "    Args:\n",
-    "        network: Sequential network to analyze\n",
-    "        input_data: Input tensor to trace through the network\n",
-    "        title: Title for the plot\n",
-    "        \n",
-    "    TODO: Create a visualization showing how data transforms through each layer.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Trace the input through each layer\n",
-    "    2. Record the output of each layer\n",
-    "    3. Create a visualization showing the transformations\n",
-    "    4. Add statistics (mean, std, range) for each layer\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    Input: [1, 2, 3] → Layer1: [1.4, 2.8] → Layer2: [1.4, 2.8] → Output: [0.7]\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use a for loop to apply each layer\n",
-    "    - Store intermediate outputs\n",
-    "    - Use plt.subplot() to create multiple subplots\n",
-    "    - Show statistics for each layer output\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c318ea50",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def visualize_data_flow(network: Sequential, input_data: Tensor, title: str = \"Data Flow Through Network\"):\n",
-    "    \"\"\"Visualize how data flows through the network.\"\"\"\n",
-    "    if not _should_show_plots():\n",
-    "        print(\"📊 Visualization disabled during testing\")\n",
-    "        return\n",
-    "    \n",
-    "    # Trace data through network\n",
-    "    current_data = input_data\n",
-    "    layer_outputs = [current_data.data.flatten()]\n",
-    "    layer_names = ['Input']\n",
-    "    \n",
-    "    for layer in network.layers:\n",
-    "        current_data = layer(current_data)\n",
-    "        layer_outputs.append(current_data.data.flatten())\n",
-    "        layer_names.append(type(layer).__name__)\n",
-    "    \n",
-    "    # Create visualization\n",
-    "    fig, axes = plt.subplots(2, len(layer_outputs), figsize=(15, 8))\n",
-    "    \n",
-    "    for i, (output, name) in enumerate(zip(layer_outputs, layer_names)):\n",
-    "        # Histogram\n",
-    "        axes[0, i].hist(output, bins=20, alpha=0.7)\n",
-    "        axes[0, i].set_title(f'{name}\\nShape: {output.shape}')\n",
-    "        axes[0, i].set_xlabel('Value')\n",
-    "        axes[0, i].set_ylabel('Frequency')\n",
-    "        \n",
-    "        # Statistics\n",
-    "        stats_text = f'Mean: {np.mean(output):.3f}\\nStd: {np.std(output):.3f}\\nRange: [{np.min(output):.3f}, {np.max(output):.3f}]'\n",
-    "        axes[1, i].text(0.1, 0.5, stats_text, transform=axes[1, i].transAxes, \n",
-    "                        verticalalignment='center', fontsize=10)\n",
-    "        axes[1, i].set_title(f'{name} Statistics')\n",
-    "        axes[1, i].axis('off')\n",
-    "    \n",
-    "    plt.suptitle(title)\n",
-    "    plt.tight_layout()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "bba1f652",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Data Flow Visualization"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "af4ed8de",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test data flow visualization\n",
-    "print(\"Testing data flow visualization...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create a test network\n",
-    "    test_network = Sequential([\n",
-    "        Dense(input_size=3, output_size=4),\n",
-    "        ReLU(),\n",
-    "        Dense(input_size=4, output_size=2),\n",
-    "        Sigmoid()\n",
-    "    ])\n",
-    "    \n",
-    "    # Test input\n",
-    "    test_input = Tensor([[1.0, 2.0, 3.0]])\n",
-    "    \n",
-    "    # Visualize data flow\n",
-    "    if _should_show_plots():\n",
-    "        visualize_data_flow(test_network, test_input, \"Test Network Data Flow\")\n",
-    "        print(\"✅ Data flow visualization created!\")\n",
-    "    else:\n",
-    "        print(\"✅ Data flow visualization skipped during testing\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement visualize_data_flow above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "02308b13",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 5: Network Comparison and Analysis\n",
-    "\n",
-    "Let's create tools to compare different network architectures and understand their capabilities.\n",
-    "\n",
-    "### Why Network Comparison Matters\n",
-    "- **Architecture selection**: Choose the right network for your problem\n",
-    "- **Performance analysis**: Understand trade-offs between different designs\n",
-    "- **Design insights**: Learn what makes networks effective\n",
-    "- **Research**: Compare new architectures to baselines"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4c3634ab",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def compare_networks(networks: List[Sequential], network_names: List[str], \n",
-    "                    input_data: Tensor, title: str = \"Network Comparison\"):\n",
-    "    \"\"\"\n",
-    "    Compare multiple networks on the same input.\n",
-    "    \n",
-    "    Args:\n",
-    "        networks: List of Sequential networks to compare\n",
-    "        network_names: Names for each network\n",
-    "        input_data: Input tensor to test all networks\n",
-    "        title: Title for the plot\n",
-    "        \n",
-    "    TODO: Create a comparison visualization showing how different networks process the same input.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Run the same input through each network\n",
-    "    2. Collect the outputs and intermediate results\n",
-    "    3. Create a visualization comparing the results\n",
-    "    4. Show statistics and differences\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    Compare MLP vs Deep Network vs Wide Network on same input\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use a for loop to test each network\n",
-    "    - Store outputs and any relevant statistics\n",
-    "    - Use plt.subplot() to create comparison plots\n",
-    "    - Show both outputs and intermediate layer results\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ce1d5a21",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def compare_networks(networks: List[Sequential], network_names: List[str], \n",
-    "                    input_data: Tensor, title: str = \"Network Comparison\"):\n",
-    "    \"\"\"Compare multiple networks on the same input.\"\"\"\n",
-    "    if not _should_show_plots():\n",
-    "        print(\"📊 Visualization disabled during testing\")\n",
-    "        return\n",
-    "    \n",
-    "    # Test all networks\n",
-    "    outputs = []\n",
-    "    for network in networks:\n",
-    "        output = network(input_data)\n",
-    "        outputs.append(output.data.flatten())\n",
-    "    \n",
-    "    # Create comparison plot\n",
-    "    fig, axes = plt.subplots(2, len(networks), figsize=(15, 8))\n",
-    "    \n",
-    "    for i, (output, name) in enumerate(zip(outputs, network_names)):\n",
-    "        # Output distribution\n",
-    "        axes[0, i].hist(output, bins=20, alpha=0.7)\n",
-    "        axes[0, i].set_title(f'{name}\\nOutput Distribution')\n",
-    "        axes[0, i].set_xlabel('Value')\n",
-    "        axes[0, i].set_ylabel('Frequency')\n",
-    "        \n",
-    "        # Statistics\n",
-    "        stats_text = f'Mean: {np.mean(output):.3f}\\nStd: {np.std(output):.3f}\\nRange: [{np.min(output):.3f}, {np.max(output):.3f}]\\nSize: {len(output)}'\n",
-    "        axes[1, i].text(0.1, 0.5, stats_text, transform=axes[1, i].transAxes, \n",
-    "                        verticalalignment='center', fontsize=10)\n",
-    "        axes[1, i].set_title(f'{name} Statistics')\n",
-    "        axes[1, i].axis('off')\n",
-    "    \n",
-    "    plt.suptitle(title)\n",
-    "    plt.tight_layout()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d16eb163",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Network Comparison"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ab17ac91",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test network comparison\n",
-    "print(\"Testing network comparison...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create different networks\n",
-    "    network1 = create_mlp(input_size=3, hidden_sizes=[4], output_size=1)\n",
-    "    network2 = create_mlp(input_size=3, hidden_sizes=[8, 4], output_size=1)\n",
-    "    network3 = create_mlp(input_size=3, hidden_sizes=[2], output_size=1, activation=Tanh)\n",
-    "    \n",
-    "    networks = [network1, network2, network3]\n",
-    "    names = [\"Small MLP\", \"Deep MLP\", \"Tanh MLP\"]\n",
-    "    \n",
-    "    # Test input\n",
-    "    test_input = Tensor([[1.0, 2.0, 3.0]])\n",
-    "    \n",
-    "    # Compare networks\n",
-    "    if _should_show_plots():\n",
-    "        compare_networks(networks, names, test_input, \"Network Architecture Comparison\")\n",
-    "        print(\"✅ Network comparison created!\")\n",
-    "    else:\n",
-    "        print(\"✅ Network comparison skipped during testing\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement compare_networks above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c61fc030",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 6: Practical Network Architectures\n",
-    "\n",
-    "Now let's create some practical network architectures for common machine learning tasks.\n",
-    "\n",
-    "### Common Network Types\n",
-    "\n",
-    "#### 1. **Classification Networks**\n",
-    "- **Binary classification**: Output single probability\n",
-    "- **Multi-class classification**: Output probability distribution\n",
-    "- **Use cases**: Image classification, spam detection, sentiment analysis\n",
-    "\n",
-    "#### 2. **Regression Networks**\n",
-    "- **Single output**: Predict continuous value\n",
-    "- **Multiple outputs**: Predict multiple values\n",
-    "- **Use cases**: Price prediction, temperature forecasting, demand estimation\n",
-    "\n",
-    "#### 3. **Feature Extraction Networks**\n",
-    "- **Encoder networks**: Compress data into features\n",
-    "- **Use cases**: Dimensionality reduction, feature learning, representation learning"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f117af1e",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def create_classification_network(input_size: int, num_classes: int, \n",
-    "                                hidden_sizes: List[int] = None) -> Sequential:\n",
-    "    \"\"\"\n",
-    "    Create a network for classification tasks.\n",
-    "    \n",
-    "    Args:\n",
-    "        input_size: Number of input features\n",
-    "        num_classes: Number of output classes\n",
-    "        hidden_sizes: List of hidden layer sizes (default: [input_size * 2])\n",
-    "        \n",
-    "    Returns:\n",
-    "        Sequential network for classification\n",
-    "        \n",
-    "    TODO: Implement classification network creation.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Use default hidden sizes if none provided\n",
-    "    2. Create MLP with appropriate architecture\n",
-    "    3. Use Sigmoid for binary classification (num_classes=1)\n",
-    "    4. Use appropriate activation for multi-class\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    create_classification_network(10, 3) creates:\n",
-    "    Dense(10→20) → ReLU → Dense(20→3) → Sigmoid\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use create_mlp() function\n",
-    "    - Choose appropriate output activation based on num_classes\n",
-    "    - For binary classification (num_classes=1), use Sigmoid\n",
-    "    - For multi-class, you could use Sigmoid or no activation\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "867fa5d4",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def create_classification_network(input_size: int, num_classes: int, \n",
-    "                                hidden_sizes: List[int] = None) -> Sequential:\n",
-    "    \"\"\"Create a network for classification tasks.\"\"\"\n",
-    "    if hidden_sizes is None:\n",
-    "        hidden_sizes = [input_size // 2]  # Use input_size // 2 as default\n",
-    "    \n",
-    "    # Choose appropriate output activation\n",
-    "    output_activation = Sigmoid if num_classes == 1 else Softmax\n",
-    "    \n",
-    "    return create_mlp(input_size, hidden_sizes, num_classes, \n",
-    "                     activation=ReLU, output_activation=output_activation)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8888dc0c",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def create_regression_network(input_size: int, output_size: int = 1,\n",
-    "                             hidden_sizes: List[int] = None) -> Sequential:\n",
-    "    \"\"\"\n",
-    "    Create a network for regression tasks.\n",
-    "    \n",
-    "    Args:\n",
-    "        input_size: Number of input features\n",
-    "        output_size: Number of output values (default: 1)\n",
-    "        hidden_sizes: List of hidden layer sizes (default: [input_size * 2])\n",
-    "        \n",
-    "    Returns:\n",
-    "        Sequential network for regression\n",
-    "        \n",
-    "    TODO: Implement regression network creation.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Use default hidden sizes if none provided\n",
-    "    2. Create MLP with appropriate architecture\n",
-    "    3. Use no activation on output layer (linear output)\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    create_regression_network(5, 1) creates:\n",
-    "    Dense(5→10) → ReLU → Dense(10→1) (no activation)\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use create_mlp() but with no output activation\n",
-    "    - For regression, we want linear outputs (no activation)\n",
-    "    - You can pass None or identity function as output_activation\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "052bb51a",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def create_regression_network(input_size: int, output_size: int = 1,\n",
-    "                             hidden_sizes: List[int] = None) -> Sequential:\n",
-    "    \"\"\"Create a network for regression tasks.\"\"\"\n",
-    "    if hidden_sizes is None:\n",
-    "        hidden_sizes = [input_size // 2]  # Use input_size // 2 as default\n",
-    "    \n",
-    "    # Create MLP with Tanh output activation for regression\n",
-    "    return create_mlp(input_size, hidden_sizes, output_size, \n",
-    "                     activation=ReLU, output_activation=Tanh)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5dd183e8",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Practical Networks"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0cf0dc20",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test practical networks\n",
-    "print(\"Testing practical networks...\")\n",
-    "\n",
-    "try:\n",
-    "    # Test classification network\n",
-    "    class_net = create_classification_network(input_size=5, num_classes=1)\n",
-    "    x_class = Tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])\n",
-    "    y_class = class_net(x_class)\n",
-    "    print(f\"✅ Classification output: {y_class}\")\n",
-    "    print(f\"✅ Output range: [{np.min(y_class.data):.3f}, {np.max(y_class.data):.3f}]\")\n",
-    "    \n",
-    "    # Test regression network\n",
-    "    reg_net = create_regression_network(input_size=3, output_size=1)\n",
-    "    x_reg = Tensor([[1.0, 2.0, 3.0]])\n",
-    "    y_reg = reg_net(x_reg)\n",
-    "    print(f\"✅ Regression output: {y_reg}\")\n",
-    "    print(f\"✅ Output range: [{np.min(y_reg.data):.3f}, {np.max(y_reg.data):.3f}]\")\n",
-    "    \n",
-    "    print(\"🎉 Practical networks work!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement the network creation functions above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "da4b34d4",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 7: Network Behavior Analysis\n",
-    "\n",
-    "Let's create tools to analyze how networks behave with different inputs and understand their capabilities.\n",
-    "\n",
-    "### Why Behavior Analysis Matters\n",
-    "- **Understanding**: Learn what patterns networks can learn\n",
-    "- **Debugging**: Identify when networks fail\n",
-    "- **Design**: Choose appropriate architectures\n",
-    "- **Validation**: Ensure networks work as expected"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f9cbf0f3",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def analyze_network_behavior(network: Sequential, input_data: Tensor, \n",
-    "                           title: str = \"Network Behavior Analysis\"):\n",
-    "    \"\"\"\n",
-    "    Analyze how a network behaves with different inputs.\n",
-    "    \n",
-    "    Args:\n",
-    "        network: Sequential network to analyze\n",
-    "        input_data: Input tensor to test\n",
-    "        title: Title for the plot\n",
-    "        \n",
-    "    TODO: Create an analysis showing network behavior and capabilities.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Test the network with the given input\n",
-    "    2. Analyze the output characteristics\n",
-    "    3. Test with variations of the input\n",
-    "    4. Create visualizations showing behavior patterns\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    Test network with original input and noisy versions\n",
-    "    Show how output changes with input variations\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Test the original input\n",
-    "    - Create variations (noise, scaling, etc.)\n",
-    "    - Compare outputs across variations\n",
-    "    - Show statistics and patterns\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f002ab23",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def analyze_network_behavior(network: Sequential, input_data: Tensor, \n",
-    "                           title: str = \"Network Behavior Analysis\"):\n",
-    "    \"\"\"Analyze how a network behaves with different inputs.\"\"\"\n",
-    "    if not _should_show_plots():\n",
-    "        print(\"📊 Visualization disabled during testing\")\n",
-    "        return\n",
-    "    \n",
-    "    # Test original input\n",
-    "    original_output = network(input_data)\n",
-    "    \n",
-    "    # Create variations\n",
-    "    noise_levels = [0.0, 0.1, 0.2, 0.5]\n",
-    "    outputs = []\n",
-    "    \n",
-    "    for noise in noise_levels:\n",
-    "        noisy_input = Tensor(input_data.data + noise * np.random.randn(*input_data.data.shape))\n",
-    "        output = network(noisy_input)\n",
-    "        outputs.append(output.data.flatten())\n",
-    "    \n",
-    "    # Create analysis plot\n",
-    "    fig, axes = plt.subplots(2, 2, figsize=(12, 10))\n",
-    "    \n",
-    "    # Original output\n",
-    "    axes[0, 0].hist(outputs[0], bins=20, alpha=0.7)\n",
-    "    axes[0, 0].set_title('Original Input Output')\n",
-    "    axes[0, 0].set_xlabel('Value')\n",
-    "    axes[0, 0].set_ylabel('Frequency')\n",
-    "    \n",
-    "    # Output stability\n",
-    "    output_means = [np.mean(out) for out in outputs]\n",
-    "    output_stds = [np.std(out) for out in outputs]\n",
-    "    axes[0, 1].plot(noise_levels, output_means, 'bo-', label='Mean')\n",
-    "    axes[0, 1].fill_between(noise_levels, \n",
-    "                           [m-s for m, s in zip(output_means, output_stds)],\n",
-    "                           [m+s for m, s in zip(output_means, output_stds)], \n",
-    "                           alpha=0.3, label='±1 Std')\n",
-    "    axes[0, 1].set_xlabel('Noise Level')\n",
-    "    axes[0, 1].set_ylabel('Output Value')\n",
-    "    axes[0, 1].set_title('Output Stability')\n",
-    "    axes[0, 1].legend()\n",
-    "    \n",
-    "    # Output distribution comparison\n",
-    "    for i, (output, noise) in enumerate(zip(outputs, noise_levels)):\n",
-    "        axes[1, 0].hist(output, bins=20, alpha=0.5, label=f'Noise={noise}')\n",
-    "    axes[1, 0].set_xlabel('Output Value')\n",
-    "    axes[1, 0].set_ylabel('Frequency')\n",
-    "    axes[1, 0].set_title('Output Distribution Comparison')\n",
-    "    axes[1, 0].legend()\n",
-    "    \n",
-    "    # Statistics\n",
-    "    stats_text = f'Original Mean: {np.mean(outputs[0]):.3f}\\nOriginal Std: {np.std(outputs[0]):.3f}\\nOutput Range: [{np.min(outputs[0]):.3f}, {np.max(outputs[0]):.3f}]'\n",
-    "    axes[1, 1].text(0.1, 0.5, stats_text, transform=axes[1, 1].transAxes, \n",
-    "                    verticalalignment='center', fontsize=10)\n",
-    "    axes[1, 1].set_title('Network Statistics')\n",
-    "    axes[1, 1].axis('off')\n",
-    "    \n",
-    "    plt.suptitle(title)\n",
-    "    plt.tight_layout()\n",
-    "    plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "58c4d2fe",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Network Behavior Analysis"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4241defa",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test network behavior analysis\n",
-    "print(\"Testing network behavior analysis...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create a test network\n",
-    "    test_network = create_classification_network(input_size=3, num_classes=1)\n",
-    "    test_input = Tensor([[1.0, 2.0, 3.0]])\n",
-    "    \n",
-    "    # Analyze behavior\n",
-    "    if _should_show_plots():\n",
-    "        analyze_network_behavior(test_network, test_input, \"Test Network Behavior\")\n",
-    "        print(\"✅ Network behavior analysis created!\")\n",
-    "    else:\n",
-    "        print(\"✅ Network behavior analysis skipped during testing\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement analyze_network_behavior above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5e6395d0",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 🎯 Module Summary\n",
-    "\n",
-    "Congratulations! You've built the foundation of neural network architectures:\n",
-    "\n",
-    "### What You've Accomplished\n",
-    "✅ **Sequential Networks**: Composing layers into complete architectures  \n",
-    "✅ **MLP Creation**: Building multi-layer perceptrons  \n",
-    "✅ **Network Visualization**: Understanding architecture and data flow  \n",
-    "✅ **Network Comparison**: Analyzing different architectures  \n",
-    "✅ **Practical Networks**: Classification and regression networks  \n",
-    "✅ **Behavior Analysis**: Understanding network capabilities  \n",
-    "\n",
-    "### Key Concepts You've Learned\n",
-    "- **Networks** are compositions of layers that transform data\n",
-    "- **Architecture design** determines network capabilities\n",
-    "- **Sequential networks** are the most fundamental building block\n",
-    "- **Different architectures** solve different problems\n",
-    "- **Visualization tools** help understand network behavior\n",
-    "\n",
-    "### What's Next\n",
-    "In the next modules, you'll build on this foundation:\n",
-    "- **Autograd**: Enable automatic differentiation for training\n",
-    "- **Training**: Learn parameters using gradients and optimizers\n",
-    "- **Loss Functions**: Define objectives for learning\n",
-    "- **Applications**: Solve real problems with neural networks\n",
-    "\n",
-    "### Real-World Connection\n",
-    "Your network architectures are now ready to:\n",
-    "- Compose layers into complete neural networks\n",
-    "- Create specialized architectures for different tasks\n",
-    "- Analyze and understand network behavior\n",
-    "- Integrate with the rest of the TinyTorch ecosystem\n",
-    "\n",
-    "**Ready for the next challenge?** Let's move on to automatic differentiation to enable training!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "090bbc0d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Final verification\n",
-    "print(\"\\n\" + \"=\"*50)\n",
-    "print(\"🎉 NETWORKS MODULE COMPLETE!\")\n",
-    "print(\"=\"*50)\n",
-    "print(\"✅ Sequential network implementation\")\n",
-    "print(\"✅ MLP creation and architecture design\")\n",
-    "print(\"✅ Network visualization and analysis\")\n",
-    "print(\"✅ Network comparison tools\")\n",
-    "print(\"✅ Practical classification and regression networks\")\n",
-    "print(\"✅ Network behavior analysis\")\n",
-    "print(\"\\n🚀 Ready to enable training with autograd in the next module!\") "
-   ]
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "main_language": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/assignments/source/05_cnn/cnn_dev.ipynb b/assignments/source/05_cnn/cnn_dev.ipynb
deleted file mode 100644
index 562cd32e..00000000
--- a/assignments/source/05_cnn/cnn_dev.ipynb
+++ /dev/null
@@ -1,816 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "955f6ea8",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "# Module X: CNN - Convolutional Neural Networks\n",
-    "\n",
-    "Welcome to the CNN module! Here you'll implement the core building block of modern computer vision: the convolutional layer.\n",
-    "\n",
-    "## Learning Goals\n",
-    "- Understand the convolution operation (sliding window, local connectivity, weight sharing)\n",
-    "- Implement Conv2D with explicit for-loops\n",
-    "- Visualize how convolution builds feature maps\n",
-    "- Compose Conv2D with other layers to build a simple ConvNet\n",
-    "- (Stretch) Explore stride, padding, pooling, and multi-channel input\n",
-    "\n",
-    "## Build → Use → Understand\n",
-    "1. **Build**: Conv2D layer using sliding window convolution\n",
-    "2. **Use**: Transform images and see feature maps\n",
-    "3. **Understand**: How CNNs learn spatial patterns"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "60a60cd1",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 📦 Where This Code Lives in the Final Package\n",
-    "\n",
-    "**Learning Side:** You work in `assignments/source/05_cnn/cnn_dev.py`  \n",
-    "**Building Side:** Code exports to `tinytorch.core.layers`\n",
-    "\n",
-    "```python\n",
-    "# Final package structure:\n",
-    "from tinytorch.core.layers import Dense, Conv2D  # Both layers together!\n",
-    "from tinytorch.core.activations import ReLU\n",
-    "from tinytorch.core.tensor import Tensor\n",
-    "```\n",
-    "\n",
-    "**Why this matters:**\n",
-    "- **Learning:** Focused modules for deep understanding\n",
-    "- **Production:** Proper organization like PyTorch's `torch.nn`\n",
-    "- **Consistency:** All layers (Dense, Conv2D) live together in `core.layers`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f0294e6a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp core.cnn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7e1c6590",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import numpy as np\n",
-    "from typing import List, Tuple, Optional\n",
-    "from tinytorch.core.tensor import Tensor\n",
-    "\n",
-    "# Setup and imports (for development)\n",
-    "import matplotlib.pyplot as plt\n",
-    "from tinytorch.core.layers import Dense\n",
-    "from tinytorch.core.activations import ReLU"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fed284f4",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 1: What is Convolution?\n",
-    "\n",
-    "### Definition\n",
-    "A **convolutional layer** applies a small filter (kernel) across the input, producing a feature map. This operation captures local patterns and is the foundation of modern vision models.\n",
-    "\n",
-    "### Why Convolution Matters in Computer Vision\n",
-    "- **Local connectivity**: Each output value depends only on a small region of the input\n",
-    "- **Weight sharing**: The same filter is applied everywhere (translation invariance)\n",
-    "- **Spatial hierarchy**: Multiple layers build increasingly complex features\n",
-    "- **Parameter efficiency**: Much fewer parameters than fully connected layers\n",
-    "\n",
-    "### The Fundamental Insight\n",
-    "**Convolution is pattern matching!** The kernel learns to detect specific patterns:\n",
-    "- **Edge detectors**: Find boundaries between objects\n",
-    "- **Texture detectors**: Recognize surface patterns\n",
-    "- **Shape detectors**: Identify geometric forms\n",
-    "- **Feature detectors**: Combine simple patterns into complex features\n",
-    "\n",
-    "### Real-World Examples\n",
-    "- **Image processing**: Detect edges, blur, sharpen\n",
-    "- **Computer vision**: Recognize objects, faces, text\n",
-    "- **Medical imaging**: Detect tumors, analyze scans\n",
-    "- **Autonomous driving**: Identify traffic signs, pedestrians\n",
-    "\n",
-    "### Visual Intuition\n",
-    "```\n",
-    "Input Image:     Kernel:        Output Feature Map:\n",
-    "[1, 2, 3]       [1,  0]       [1*1+2*0+4*0+5*(-1), 2*1+3*0+5*0+6*(-1)]\n",
-    "[4, 5, 6]       [0, -1]       [4*1+5*0+7*0+8*(-1), 5*1+6*0+8*0+9*(-1)]\n",
-    "[7, 8, 9]\n",
-    "```\n",
-    "\n",
-    "The kernel slides across the input, computing dot products at each position.\n",
-    "\n",
-    "### The Math Behind It\n",
-    "For input I (H×W) and kernel K (kH×kW), the output O (out_H×out_W) is:\n",
-    "```\n",
-    "O[i,j] = sum(I[i+di, j+dj] * K[di, dj] for di in range(kH), dj in range(kW))\n",
-    "```\n",
-    "\n",
-    "Let's implement this step by step!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "64e2c944",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def conv2d_naive(input: np.ndarray, kernel: np.ndarray) -> np.ndarray:\n",
-    "    \"\"\"\n",
-    "    Naive 2D convolution (single channel, no stride, no padding).\n",
-    "    \n",
-    "    Args:\n",
-    "        input: 2D input array (H, W)\n",
-    "        kernel: 2D filter (kH, kW)\n",
-    "    Returns:\n",
-    "        2D output array (H-kH+1, W-kW+1)\n",
-    "        \n",
-    "    TODO: Implement the sliding window convolution using for-loops.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Get input dimensions: H, W = input.shape\n",
-    "    2. Get kernel dimensions: kH, kW = kernel.shape\n",
-    "    3. Calculate output dimensions: out_H = H - kH + 1, out_W = W - kW + 1\n",
-    "    4. Create output array: np.zeros((out_H, out_W))\n",
-    "    5. Use nested loops to slide the kernel:\n",
-    "       - i loop: output rows (0 to out_H-1)\n",
-    "       - j loop: output columns (0 to out_W-1)\n",
-    "       - di loop: kernel rows (0 to kH-1)\n",
-    "       - dj loop: kernel columns (0 to kW-1)\n",
-    "    6. For each (i,j), compute: output[i,j] += input[i+di, j+dj] * kernel[di, dj]\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    Input: [[1, 2, 3],     Kernel: [[1, 0],\n",
-    "            [4, 5, 6],               [0, -1]]\n",
-    "            [7, 8, 9]]\n",
-    "    \n",
-    "    Output[0,0] = 1*1 + 2*0 + 4*0 + 5*(-1) = 1 - 5 = -4\n",
-    "    Output[0,1] = 2*1 + 3*0 + 5*0 + 6*(-1) = 2 - 6 = -4\n",
-    "    Output[1,0] = 4*1 + 5*0 + 7*0 + 8*(-1) = 4 - 8 = -4\n",
-    "    Output[1,1] = 5*1 + 6*0 + 8*0 + 9*(-1) = 5 - 9 = -4\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Start with output = np.zeros((out_H, out_W))\n",
-    "    - Use four nested loops: for i in range(out_H): for j in range(out_W): for di in range(kH): for dj in range(kW):\n",
-    "    - Accumulate the sum: output[i,j] += input[i+di, j+dj] * kernel[di, dj]\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ff21cee2",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def conv2d_naive(input: np.ndarray, kernel: np.ndarray) -> np.ndarray:\n",
-    "    H, W = input.shape\n",
-    "    kH, kW = kernel.shape\n",
-    "    out_H, out_W = H - kH + 1, W - kW + 1\n",
-    "    output = np.zeros((out_H, out_W), dtype=input.dtype)\n",
-    "    for i in range(out_H):\n",
-    "        for j in range(out_W):\n",
-    "            for di in range(kH):\n",
-    "                for dj in range(kW):\n",
-    "                    output[i, j] += input[i + di, j + dj] * kernel[di, dj]\n",
-    "    return output"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2d3d2a95",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Conv2D Implementation\n",
-    "\n",
-    "Try your function on this simple example:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ac0cac68",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test case for conv2d_naive\n",
-    "input = np.array([\n",
-    "    [1, 2, 3],\n",
-    "    [4, 5, 6],\n",
-    "    [7, 8, 9]\n",
-    "], dtype=np.float32)\n",
-    "kernel = np.array([\n",
-    "    [1, 0],\n",
-    "    [0, -1]\n",
-    "], dtype=np.float32)\n",
-    "\n",
-    "expected = np.array([\n",
-    "    [1*1+2*0+4*0+5*(-1), 2*1+3*0+5*0+6*(-1)],\n",
-    "    [4*1+5*0+7*0+8*(-1), 5*1+6*0+8*0+9*(-1)]\n",
-    "], dtype=np.float32)\n",
-    "\n",
-    "try:\n",
-    "    output = conv2d_naive(input, kernel)\n",
-    "    print(\"✅ Input:\\n\", input)\n",
-    "    print(\"✅ Kernel:\\n\", kernel)\n",
-    "    print(\"✅ Your output:\\n\", output)\n",
-    "    print(\"✅ Expected:\\n\", expected)\n",
-    "    assert np.allclose(output, expected), \"❌ Output does not match expected!\"\n",
-    "    print(\"🎉 conv2d_naive works!\")\n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement conv2d_naive above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c0771c94",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## Step 2: Understanding What Convolution Does\n",
-    "\n",
-    "Let's visualize how different kernels detect different patterns:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c9e63d70",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Visualize different convolution kernels\n",
-    "print(\"Visualizing different convolution kernels...\")\n",
-    "\n",
-    "try:\n",
-    "    # Test different kernels\n",
-    "    test_input = np.array([\n",
-    "        [1, 1, 1, 0, 0],\n",
-    "        [1, 1, 1, 0, 0],\n",
-    "        [1, 1, 1, 0, 0],\n",
-    "        [0, 0, 0, 0, 0],\n",
-    "        [0, 0, 0, 0, 0]\n",
-    "    ], dtype=np.float32)\n",
-    "    \n",
-    "    # Edge detection kernel (horizontal)\n",
-    "    edge_kernel = np.array([\n",
-    "        [1, 1, 1],\n",
-    "        [0, 0, 0],\n",
-    "        [-1, -1, -1]\n",
-    "    ], dtype=np.float32)\n",
-    "    \n",
-    "    # Sharpening kernel\n",
-    "    sharpen_kernel = np.array([\n",
-    "        [0, -1, 0],\n",
-    "        [-1, 5, -1],\n",
-    "        [0, -1, 0]\n",
-    "    ], dtype=np.float32)\n",
-    "    \n",
-    "    # Test edge detection\n",
-    "    edge_output = conv2d_naive(test_input, edge_kernel)\n",
-    "    print(\"✅ Edge detection kernel:\")\n",
-    "    print(\"   Detects horizontal edges (boundaries between light and dark)\")\n",
-    "    print(\"   Output:\\n\", edge_output)\n",
-    "    \n",
-    "    # Test sharpening\n",
-    "    sharpen_output = conv2d_naive(test_input, sharpen_kernel)\n",
-    "    print(\"✅ Sharpening kernel:\")\n",
-    "    print(\"   Enhances edges and details\")\n",
-    "    print(\"   Output:\\n\", sharpen_output)\n",
-    "    \n",
-    "    print(\"\\n💡 Different kernels detect different patterns!\")\n",
-    "    print(\"   Neural networks learn these kernels automatically!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ef10d9f8",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 3: Conv2D Layer Class\n",
-    "\n",
-    "Now let's wrap your convolution function in a layer class for use in networks. This makes it consistent with other layers like Dense.\n",
-    "\n",
-    "### Why Layer Classes Matter\n",
-    "- **Consistent API**: Same interface as Dense layers\n",
-    "- **Learnable parameters**: Kernels can be learned from data\n",
-    "- **Composability**: Can be combined with other layers\n",
-    "- **Integration**: Works seamlessly with the rest of TinyTorch\n",
-    "\n",
-    "### The Pattern\n",
-    "```\n",
-    "Input Tensor → Conv2D → Output Tensor\n",
-    "```\n",
-    "\n",
-    "Just like Dense layers, but with spatial operations instead of linear transformations."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3ae72cc4",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Conv2D:\n",
-    "    \"\"\"\n",
-    "    2D Convolutional Layer (single channel, single filter, no stride/pad).\n",
-    "    \n",
-    "    Args:\n",
-    "        kernel_size: (kH, kW) - size of the convolution kernel\n",
-    "        \n",
-    "    TODO: Initialize a random kernel and implement the forward pass using conv2d_naive.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Store kernel_size as instance variable\n",
-    "    2. Initialize random kernel with small values\n",
-    "    3. Implement forward pass using conv2d_naive function\n",
-    "    4. Return Tensor wrapped around the result\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    layer = Conv2D(kernel_size=(2, 2))\n",
-    "    x = Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])  # shape (3, 3)\n",
-    "    y = layer(x)  # shape (2, 2)\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Store kernel_size as (kH, kW)\n",
-    "    - Initialize kernel with np.random.randn(kH, kW) * 0.1 (small values)\n",
-    "    - Use conv2d_naive(x.data, self.kernel) in forward pass\n",
-    "    - Return Tensor(result) to wrap the result\n",
-    "    \"\"\"\n",
-    "    def __init__(self, kernel_size: Tuple[int, int]):\n",
-    "        \"\"\"\n",
-    "        Initialize Conv2D layer with random kernel.\n",
-    "        \n",
-    "        Args:\n",
-    "            kernel_size: (kH, kW) - size of the convolution kernel\n",
-    "            \n",
-    "        TODO: \n",
-    "        1. Store kernel_size as instance variable\n",
-    "        2. Initialize random kernel with small values\n",
-    "        3. Scale kernel values to prevent large outputs\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Store kernel_size as self.kernel_size\n",
-    "        2. Unpack kernel_size into kH, kW\n",
-    "        3. Initialize kernel: np.random.randn(kH, kW) * 0.1\n",
-    "        4. Convert to float32 for consistency\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Conv2D((2, 2)) creates:\n",
-    "        - kernel: shape (2, 2) with small random values\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"\n",
-    "        Forward pass: apply convolution to input.\n",
-    "        \n",
-    "        Args:\n",
-    "            x: Input tensor of shape (H, W)\n",
-    "            \n",
-    "        Returns:\n",
-    "            Output tensor of shape (H-kH+1, W-kW+1)\n",
-    "            \n",
-    "        TODO: Implement convolution using conv2d_naive function.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Use conv2d_naive(x.data, self.kernel)\n",
-    "        2. Return Tensor(result)\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Input x: Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])  # shape (3, 3)\n",
-    "        Kernel: shape (2, 2)\n",
-    "        Output: Tensor([[val1, val2], [val3, val4]])  # shape (2, 2)\n",
-    "        \n",
-    "        HINTS:\n",
-    "        - x.data gives you the numpy array\n",
-    "        - self.kernel is your learned kernel\n",
-    "        - Use conv2d_naive(x.data, self.kernel)\n",
-    "        - Return Tensor(result) to wrap the result\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        \"\"\"Make layer callable: layer(x) same as layer.forward(x)\"\"\"\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "48e50d1b",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class Conv2D:\n",
-    "    def __init__(self, kernel_size: Tuple[int, int]):\n",
-    "        self.kernel_size = kernel_size\n",
-    "        kH, kW = kernel_size\n",
-    "        # Initialize with small random values\n",
-    "        self.kernel = np.random.randn(kH, kW).astype(np.float32) * 0.1\n",
-    "    \n",
-    "    def forward(self, x: Tensor) -> Tensor:\n",
-    "        return Tensor(conv2d_naive(x.data, self.kernel))\n",
-    "    \n",
-    "    def __call__(self, x: Tensor) -> Tensor:\n",
-    "        return self.forward(x)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4c29a5dd",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Conv2D Layer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ab358d43",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test Conv2D layer\n",
-    "print(\"Testing Conv2D layer...\")\n",
-    "\n",
-    "try:\n",
-    "    # Test basic Conv2D layer\n",
-    "    conv = Conv2D(kernel_size=(2, 2))\n",
-    "    x = Tensor(np.array([\n",
-    "        [1, 2, 3],\n",
-    "        [4, 5, 6],\n",
-    "        [7, 8, 9]\n",
-    "    ], dtype=np.float32))\n",
-    "    \n",
-    "    print(f\"✅ Input shape: {x.shape}\")\n",
-    "    print(f\"✅ Kernel shape: {conv.kernel.shape}\")\n",
-    "    print(f\"✅ Kernel values:\\n{conv.kernel}\")\n",
-    "    \n",
-    "    y = conv(x)\n",
-    "    print(f\"✅ Output shape: {y.shape}\")\n",
-    "    print(f\"✅ Output: {y}\")\n",
-    "    \n",
-    "    # Test with different kernel size\n",
-    "    conv2 = Conv2D(kernel_size=(3, 3))\n",
-    "    y2 = conv2(x)\n",
-    "    print(f\"✅ 3x3 kernel output shape: {y2.shape}\")\n",
-    "    \n",
-    "    print(\"\\n🎉 Conv2D layer works!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement the Conv2D layer above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9700071c",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 4: Building a Simple ConvNet\n",
-    "\n",
-    "Now let's compose Conv2D layers with other layers to build a complete convolutional neural network!\n",
-    "\n",
-    "### Why ConvNets Matter\n",
-    "- **Spatial hierarchy**: Each layer learns increasingly complex features\n",
-    "- **Parameter sharing**: Same kernel applied everywhere (efficiency)\n",
-    "- **Translation invariance**: Can recognize objects regardless of position\n",
-    "- **Real-world success**: Power most modern computer vision systems\n",
-    "\n",
-    "### The Architecture\n",
-    "```\n",
-    "Input Image → Conv2D → ReLU → Flatten → Dense → Output\n",
-    "```\n",
-    "\n",
-    "This simple architecture can learn to recognize patterns in images!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "abbf0682",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def flatten(x: Tensor) -> Tensor:\n",
-    "    \"\"\"\n",
-    "    Flatten a 2D tensor to 1D (for connecting to Dense).\n",
-    "    \n",
-    "    TODO: Implement flattening operation.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Get the numpy array from the tensor\n",
-    "    2. Use .flatten() to convert to 1D\n",
-    "    3. Add batch dimension with [None, :]\n",
-    "    4. Return Tensor wrapped around the result\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    Input: Tensor([[1, 2], [3, 4]])  # shape (2, 2)\n",
-    "    Output: Tensor([[1, 2, 3, 4]])  # shape (1, 4)\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use x.data.flatten() to get 1D array\n",
-    "    - Add batch dimension: result[None, :]\n",
-    "    - Return Tensor(result)\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1b107a4c",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def flatten(x: Tensor) -> Tensor:\n",
-    "    \"\"\"Flatten a 2D tensor to 1D (for connecting to Dense).\"\"\"\n",
-    "    return Tensor(x.data.flatten()[None, :])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7729a18f",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Flatten Function"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "075746b0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test flatten function\n",
-    "print(\"Testing flatten function...\")\n",
-    "\n",
-    "try:\n",
-    "    # Test flattening\n",
-    "    x = Tensor([[1, 2, 3], [4, 5, 6]])  # shape (2, 3)\n",
-    "    flattened = flatten(x)\n",
-    "    \n",
-    "    print(f\"✅ Input shape: {x.shape}\")\n",
-    "    print(f\"✅ Flattened shape: {flattened.shape}\")\n",
-    "    print(f\"✅ Flattened values: {flattened}\")\n",
-    "    \n",
-    "    # Verify the flattening worked correctly\n",
-    "    expected = np.array([[1, 2, 3, 4, 5, 6]])\n",
-    "    assert np.allclose(flattened.data, expected), \"❌ Flattening incorrect!\"\n",
-    "    print(\"✅ Flattening works correctly!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement the flatten function above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "506b9eb7",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## Step 5: Composing a Complete ConvNet\n",
-    "\n",
-    "Now let's build a simple convolutional neural network that can process images!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "952a65f9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Compose a simple ConvNet\n",
-    "print(\"Building a simple ConvNet...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create network components\n",
-    "    conv = Conv2D((2, 2))\n",
-    "    relu = ReLU()\n",
-    "    dense = Dense(input_size=4, output_size=1)  # 4 features from 2x2 output\n",
-    "    \n",
-    "    # Test input (small 3x3 \"image\")\n",
-    "    x = Tensor(np.random.randn(3, 3).astype(np.float32))\n",
-    "    print(f\"✅ Input shape: {x.shape}\")\n",
-    "    print(f\"✅ Input: {x}\")\n",
-    "    \n",
-    "    # Forward pass through the network\n",
-    "    conv_out = conv(x)\n",
-    "    print(f\"✅ After Conv2D: {conv_out}\")\n",
-    "    \n",
-    "    relu_out = relu(conv_out)\n",
-    "    print(f\"✅ After ReLU: {relu_out}\")\n",
-    "    \n",
-    "    flattened = flatten(relu_out)\n",
-    "    print(f\"✅ After flatten: {flattened}\")\n",
-    "    \n",
-    "    final_out = dense(flattened)\n",
-    "    print(f\"✅ Final output: {final_out}\")\n",
-    "    \n",
-    "    print(\"\\n🎉 Simple ConvNet works!\")\n",
-    "    print(\"This network can learn to recognize patterns in images!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Check your Conv2D, flatten, and Dense implementations!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "92177b19",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## Step 6: Understanding the Power of Convolution\n",
-    "\n",
-    "Let's see how convolution captures different types of patterns:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "255f7e00",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Demonstrate pattern detection\n",
-    "print(\"Demonstrating pattern detection...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create a simple \"image\" with a pattern\n",
-    "    image = np.array([\n",
-    "        [0, 0, 0, 0, 0],\n",
-    "        [0, 1, 1, 1, 0],\n",
-    "        [0, 1, 1, 1, 0],\n",
-    "        [0, 1, 1, 1, 0],\n",
-    "        [0, 0, 0, 0, 0]\n",
-    "    ], dtype=np.float32)\n",
-    "    \n",
-    "    # Different kernels detect different patterns\n",
-    "    edge_kernel = np.array([\n",
-    "        [1, 1, 1],\n",
-    "        [1, -8, 1],\n",
-    "        [1, 1, 1]\n",
-    "    ], dtype=np.float32)\n",
-    "    \n",
-    "    blur_kernel = np.array([\n",
-    "        [1/9, 1/9, 1/9],\n",
-    "        [1/9, 1/9, 1/9],\n",
-    "        [1/9, 1/9, 1/9]\n",
-    "    ], dtype=np.float32)\n",
-    "    \n",
-    "    # Test edge detection\n",
-    "    edge_result = conv2d_naive(image, edge_kernel)\n",
-    "    print(\"✅ Edge detection:\")\n",
-    "    print(\"   Detects boundaries around the white square\")\n",
-    "    print(\"   Result:\\n\", edge_result)\n",
-    "    \n",
-    "    # Test blurring\n",
-    "    blur_result = conv2d_naive(image, blur_kernel)\n",
-    "    print(\"✅ Blurring:\")\n",
-    "    print(\"   Smooths the image\")\n",
-    "    print(\"   Result:\\n\", blur_result)\n",
-    "    \n",
-    "    print(\"\\n💡 Different kernels = different feature detectors!\")\n",
-    "    print(\"   Neural networks learn these automatically from data!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "47b66d7c",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 🎯 Module Summary\n",
-    "\n",
-    "Congratulations! You've built the foundation of convolutional neural networks:\n",
-    "\n",
-    "### What You've Accomplished\n",
-    "✅ **Convolution Operation**: Understanding the sliding window mechanism  \n",
-    "✅ **Conv2D Layer**: Learnable convolutional layer implementation  \n",
-    "✅ **Pattern Detection**: Visualizing how kernels detect different features  \n",
-    "✅ **ConvNet Architecture**: Composing Conv2D with other layers  \n",
-    "✅ **Real-world Applications**: Understanding computer vision applications  \n",
-    "\n",
-    "### Key Concepts You've Learned\n",
-    "- **Convolution** is pattern matching with sliding windows\n",
-    "- **Local connectivity** means each output depends on a small input region\n",
-    "- **Weight sharing** makes CNNs parameter-efficient\n",
-    "- **Spatial hierarchy** builds complex features from simple patterns\n",
-    "- **Translation invariance** allows recognition regardless of position\n",
-    "\n",
-    "### What's Next\n",
-    "In the next modules, you'll build on this foundation:\n",
-    "- **Advanced CNN features**: Stride, padding, pooling\n",
-    "- **Multi-channel convolution**: RGB images, multiple filters\n",
-    "- **Training**: Learning kernels from data\n",
-    "- **Real applications**: Image classification, object detection\n",
-    "\n",
-    "### Real-World Connection\n",
-    "Your Conv2D layer is now ready to:\n",
-    "- Learn edge detectors, texture recognizers, and shape detectors\n",
-    "- Process real images for computer vision tasks\n",
-    "- Integrate with the rest of the TinyTorch ecosystem\n",
-    "- Scale to complex architectures like ResNet, VGG, etc.\n",
-    "\n",
-    "**Ready for the next challenge?** Let's move on to training these networks!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4f9cbe7e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Final verification\n",
-    "print(\"\\n\" + \"=\"*50)\n",
-    "print(\"🎉 CNN MODULE COMPLETE!\")\n",
-    "print(\"=\"*50)\n",
-    "print(\"✅ Convolution operation understanding\")\n",
-    "print(\"✅ Conv2D layer implementation\")\n",
-    "print(\"✅ Pattern detection visualization\")\n",
-    "print(\"✅ ConvNet architecture composition\")\n",
-    "print(\"✅ Real-world computer vision context\")\n",
-    "print(\"\\n🚀 Ready to train networks in the next module!\") "
-   ]
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "main_language": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/assignments/source/06_dataloader/dataloader_dev.ipynb b/assignments/source/06_dataloader/dataloader_dev.ipynb
deleted file mode 100644
index 0aa3de0f..00000000
--- a/assignments/source/06_dataloader/dataloader_dev.ipynb
+++ /dev/null
@@ -1,1699 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "ed2e9bc6",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "# Module 4: Data - Data Loading and Preprocessing\n",
-    "\n",
-    "Welcome to the Data module! This is where you'll learn how to efficiently load, process, and manage data for machine learning systems.\n",
-    "\n",
-    "## Learning Goals\n",
-    "- Understand data pipelines as the foundation of ML systems\n",
-    "- Implement efficient data loading with memory management\n",
-    "- Build reusable dataset abstractions for different data types\n",
-    "- Master batching strategies and I/O optimization\n",
-    "- Learn systems thinking for data engineering\n",
-    "\n",
-    "## Build → Use → Understand\n",
-    "1. **Build**: Create dataset classes and data loaders\n",
-    "2. **Use**: Load real datasets and train models\n",
-    "3. **Understand**: How data engineering affects system performance\n",
-    "\n",
-    "## Module Dependencies\n",
-    "This module builds on previous modules:\n",
-    "- **tensor** → **activations** → **layers** → **networks** → **data**\n",
-    "- Data feeds into training: data → autograd → training"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "31e92474",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 📦 Where This Code Lives in the Final Package\n",
-    "\n",
-    "**Learning Side:** You work in `assignments/source/06_dataloader/dataloader_dev.py`  \n",
-    "**Building Side:** Code exports to `tinytorch.core.dataloader`\n",
-    "\n",
-    "```python\n",
-    "# Final package structure:\n",
-    "from tinytorch.core.dataloader import Dataset, DataLoader, CIFAR10Dataset\n",
-    "from tinytorch.core.tensor import Tensor\n",
-    "from tinytorch.core.networks import Sequential\n",
-    "```\n",
-    "\n",
-    "**Why this matters:**\n",
-    "- **Learning:** Focused modules for deep understanding\n",
-    "- **Production:** Proper organization like PyTorch's `torch.utils.data`\n",
-    "- **Consistency:** All data loading utilities live together in `core.data`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a97c43aa",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp core.dataloader\n",
-    "\n",
-    "# Setup and imports\n",
-    "import numpy as np\n",
-    "import sys\n",
-    "import os\n",
-    "import pickle\n",
-    "import struct\n",
-    "from typing import List, Tuple, Optional, Union, Iterator\n",
-    "import matplotlib.pyplot as plt\n",
-    "import urllib.request\n",
-    "import tarfile\n",
-    "\n",
-    "# Import our building blocks\n",
-    "from tinytorch.core.tensor import Tensor\n",
-    "\n",
-    "print(\"🔥 TinyTorch Data Module\")\n",
-    "print(f\"NumPy version: {np.__version__}\")\n",
-    "print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n",
-    "print(\"Ready to build data pipelines!\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "67607d09",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import numpy as np\n",
-    "import sys\n",
-    "import os\n",
-    "import pickle\n",
-    "import struct\n",
-    "from typing import List, Tuple, Optional, Union, Iterator\n",
-    "import matplotlib.pyplot as plt\n",
-    "import urllib.request\n",
-    "import tarfile\n",
-    "\n",
-    "# Import our building blocks\n",
-    "from tinytorch.core.tensor import Tensor"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f5d8605d",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def _should_show_plots():\n",
-    "    \"\"\"Check if we should show plots (disable during testing)\"\"\"\n",
-    "    return 'pytest' not in sys.modules and 'test' not in sys.argv"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c4bc9c75",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 1: What is Data Engineering?\n",
-    "\n",
-    "### Definition\n",
-    "**Data engineering** is the foundation of all machine learning systems. It involves loading, processing, and managing data efficiently so that models can learn from it.\n",
-    "\n",
-    "### Why Data Engineering Matters\n",
-    "- **Data is the fuel**: Without proper data pipelines, nothing else works\n",
-    "- **I/O bottlenecks**: Data loading is often the biggest performance bottleneck\n",
-    "- **Memory management**: How you handle data affects everything else\n",
-    "- **Production reality**: Data pipelines are critical in real ML systems\n",
-    "\n",
-    "### The Fundamental Insight\n",
-    "**Data engineering is about managing the flow of information through your system:**\n",
-    "```\n",
-    "Raw Data → Load → Preprocess → Batch → Feed to Model\n",
-    "```\n",
-    "\n",
-    "### Real-World Examples\n",
-    "- **Image datasets**: CIFAR-10, ImageNet, MNIST\n",
-    "- **Text datasets**: Wikipedia, books, social media\n",
-    "- **Tabular data**: CSV files, databases, spreadsheets\n",
-    "- **Audio data**: Speech recordings, music files\n",
-    "\n",
-    "### Systems Thinking\n",
-    "- **Memory efficiency**: Handle datasets larger than RAM\n",
-    "- **I/O optimization**: Read from disk efficiently\n",
-    "- **Batching strategies**: Trade-offs between memory and speed\n",
-    "- **Caching**: When to cache vs recompute\n",
-    "\n",
-    "### Visual Intuition\n",
-    "```\n",
-    "Raw Files: [image1.jpg, image2.jpg, image3.jpg, ...]\n",
-    "Load: [Tensor(32x32x3), Tensor(32x32x3), Tensor(32x32x3), ...]\n",
-    "Batch: [Tensor(32, 32, 32, 3)]  # 32 images at once\n",
-    "Model: Process batch efficiently\n",
-    "```\n",
-    "\n",
-    "Let's start by building the most fundamental component: **Dataset**."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b3cf8aa0",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Dataset:\n",
-    "    \"\"\"\n",
-    "    Base Dataset class: Abstract interface for all datasets.\n",
-    "    \n",
-    "    The fundamental abstraction for data loading in TinyTorch.\n",
-    "    Students implement concrete datasets by inheriting from this class.\n",
-    "    \n",
-    "    TODO: Implement the base Dataset class with required methods.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Define the interface that all datasets must implement\n",
-    "    2. Include methods for getting individual samples and dataset size\n",
-    "    3. Make it easy to extend for different data types\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    dataset = CIFAR10Dataset(\"data/cifar10/\")\n",
-    "    sample, label = dataset[0]  # Get first sample\n",
-    "    size = len(dataset)  # Get dataset size\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use abstract methods that subclasses must implement\n",
-    "    - Include __getitem__ for indexing and __len__ for size\n",
-    "    - Add helper methods for getting sample shapes and number of classes\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:\n",
-    "        \"\"\"\n",
-    "        Get a single sample and label by index.\n",
-    "        \n",
-    "        Args:\n",
-    "            index: Index of the sample to retrieve\n",
-    "            \n",
-    "        Returns:\n",
-    "            Tuple of (data, label) tensors\n",
-    "            \n",
-    "        TODO: Implement abstract method for getting samples.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. This is an abstract method - subclasses will implement it\n",
-    "        2. Return a tuple of (data, label) tensors\n",
-    "        3. Data should be the input features, label should be the target\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        dataset[0] should return (Tensor(image_data), Tensor(label))\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __len__(self) -> int:\n",
-    "        \"\"\"\n",
-    "        Get the total number of samples in the dataset.\n",
-    "        \n",
-    "        TODO: Implement abstract method for getting dataset size.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. This is an abstract method - subclasses will implement it\n",
-    "        2. Return the total number of samples in the dataset\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        len(dataset) should return 50000 for CIFAR-10 training set\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def get_sample_shape(self) -> Tuple[int, ...]:\n",
-    "        \"\"\"\n",
-    "        Get the shape of a single data sample.\n",
-    "        \n",
-    "        TODO: Implement method to get sample shape.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Get the first sample using self[0]\n",
-    "        2. Extract the data part (first element of tuple)\n",
-    "        3. Return the shape of the data tensor\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        For CIFAR-10: returns (3, 32, 32) for RGB images\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def get_num_classes(self) -> int:\n",
-    "        \"\"\"\n",
-    "        Get the number of classes in the dataset.\n",
-    "        \n",
-    "        TODO: Implement abstract method for getting number of classes.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. This is an abstract method - subclasses will implement it\n",
-    "        2. Return the total number of classes in the dataset\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        For CIFAR-10: returns 10 (airplane, car, bird, cat, deer, dog, frog, horse, ship, truck)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a63b5bf2",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class Dataset:\n",
-    "    \"\"\"Base Dataset class: Abstract interface for all datasets.\"\"\"\n",
-    "    \n",
-    "    def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:\n",
-    "        \"\"\"Get a single sample and label by index.\"\"\"\n",
-    "        raise NotImplementedError(\"Subclasses must implement __getitem__\")\n",
-    "    \n",
-    "    def __len__(self) -> int:\n",
-    "        \"\"\"Get the total number of samples in the dataset.\"\"\"\n",
-    "        raise NotImplementedError(\"Subclasses must implement __len__\")\n",
-    "    \n",
-    "    def get_sample_shape(self) -> Tuple[int, ...]:\n",
-    "        \"\"\"Get the shape of a single data sample.\"\"\"\n",
-    "        sample, _ = self[0]\n",
-    "        return sample.shape\n",
-    "    \n",
-    "    def get_num_classes(self) -> int:\n",
-    "        \"\"\"Get the number of classes in the dataset.\"\"\"\n",
-    "        raise NotImplementedError(\"Subclasses must implement get_num_classes\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f693f5a0",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Base Dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d7a94dc5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test the base Dataset class\n",
-    "print(\"Testing base Dataset class...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create a simple test dataset\n",
-    "    class TestDataset(Dataset):\n",
-    "        def __init__(self, num_samples=10):\n",
-    "            self.num_samples = num_samples\n",
-    "            self.data = [Tensor(np.random.randn(3, 32, 32)) for _ in range(num_samples)]\n",
-    "            self.labels = [Tensor(np.array(i % 3)) for i in range(num_samples)]\n",
-    "        \n",
-    "        def __getitem__(self, index):\n",
-    "            return self.data[index], self.labels[index]\n",
-    "        \n",
-    "        def __len__(self):\n",
-    "            return self.num_samples\n",
-    "        \n",
-    "        def get_num_classes(self):\n",
-    "            return 3\n",
-    "    \n",
-    "    # Test the dataset\n",
-    "    dataset = TestDataset(5)\n",
-    "    print(f\"✅ Dataset created with {len(dataset)} samples\")\n",
-    "    \n",
-    "    # Test indexing\n",
-    "    sample, label = dataset[0]\n",
-    "    print(f\"✅ Sample shape: {sample.shape}\")\n",
-    "    print(f\"✅ Label: {label}\")\n",
-    "    \n",
-    "    # Test helper methods\n",
-    "    print(f\"✅ Sample shape: {dataset.get_sample_shape()}\")\n",
-    "    print(f\"✅ Number of classes: {dataset.get_num_classes()}\")\n",
-    "    \n",
-    "    print(\"🎉 Base Dataset class works!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement the base Dataset class above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e9494f12",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 2: Understanding CIFAR-10 Dataset\n",
-    "\n",
-    "Now let's build a real dataset! We'll focus on **CIFAR-10** - the perfect dataset for learning data loading.\n",
-    "\n",
-    "### Why CIFAR-10?\n",
-    "- **Perfect size**: 170MB - large enough for optimization, small enough to manage\n",
-    "- **Real data**: 32x32 color images, 10 classes\n",
-    "- **Classic dataset**: Every ML student should know it\n",
-    "- **Good complexity**: Requires proper data loading techniques\n",
-    "\n",
-    "### The CIFAR-10 Format\n",
-    "```\n",
-    "File structure:\n",
-    "- data_batch_1: 10,000 images + labels\n",
-    "- data_batch_2: 10,000 images + labels\n",
-    "- ...\n",
-    "- test_batch: 10,000 test images\n",
-    "- batches.meta: Class names and metadata\n",
-    "\n",
-    "Binary format:\n",
-    "- Each image: 3073 bytes (3072 for RGB + 1 for label)\n",
-    "- Images stored as: [label, R, G, B, R, G, B, ...]\n",
-    "- 32x32x3 = 3072 bytes per image\n",
-    "```\n",
-    "\n",
-    "### Data Loading Challenges\n",
-    "- **Binary file parsing**: CIFAR-10 uses a custom binary format\n",
-    "- **Memory management**: 60,000 images need efficient handling\n",
-    "- **Batching**: Grouping samples for efficient processing\n",
-    "- **Preprocessing**: Normalization, augmentation, etc.\n",
-    "\n",
-    "Let's implement CIFAR-10 loading step by step!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "11bab5d9",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class CIFAR10Dataset(Dataset):\n",
-    "    \"\"\"\n",
-    "    CIFAR-10 Dataset: Load and manage CIFAR-10 image data.\n",
-    "    \n",
-    "    CIFAR-10 contains 60,000 32x32 color images in 10 classes.\n",
-    "    Perfect for learning data loading and image processing.\n",
-    "    \n",
-    "    Args:\n",
-    "        root_dir: Directory containing CIFAR-10 files\n",
-    "        train: If True, load training data. If False, load test data.\n",
-    "        download: If True, download dataset if not present\n",
-    "        \n",
-    "    TODO: Implement CIFAR-10 dataset loading.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Handle dataset download if needed (with progress bar!)\n",
-    "    2. Parse binary files to extract images and labels\n",
-    "    3. Store data efficiently in memory\n",
-    "    4. Implement indexing and size methods\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    dataset = CIFAR10Dataset(\"data/cifar10/\", train=True)\n",
-    "    image, label = dataset[0]  # Get first image\n",
-    "    print(f\"Image shape: {image.shape}\")  # (3, 32, 32)\n",
-    "    print(f\"Label: {label}\")  # Tensor with class index\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use pickle to load binary files\n",
-    "    - Each batch file contains 'data' and 'labels' keys\n",
-    "    - Reshape data to (3, 32, 32) format\n",
-    "    - Store images and labels as separate lists\n",
-    "    - Add progress bar with urllib.request.urlretrieve(url, filename, reporthook=progress_function)\n",
-    "    - Progress function receives (block_num, block_size, total_size) parameters\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, root_dir: str, train: bool = True, download: bool = True):\n",
-    "        \"\"\"\n",
-    "        Initialize CIFAR-10 dataset.\n",
-    "        \n",
-    "        Args:\n",
-    "            root_dir: Directory to store/load dataset\n",
-    "            train: If True, load training data. If False, load test data.\n",
-    "            download: If True, download dataset if not present\n",
-    "            \n",
-    "        TODO: Implement CIFAR-10 initialization.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Create root directory if it doesn't exist\n",
-    "        2. Download dataset if needed and not present (with progress bar!)\n",
-    "        3. Load binary files and parse data\n",
-    "        4. Store images and labels in memory\n",
-    "        5. Set up class names\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        CIFAR10Dataset(\"data/cifar10/\", train=True)\n",
-    "        creates a dataset with 50,000 training images\n",
-    "        \n",
-    "        PROGRESS BAR HINT:\n",
-    "        def show_progress(block_num, block_size, total_size):\n",
-    "            downloaded = block_num * block_size\n",
-    "            percent = (downloaded * 100) // total_size\n",
-    "            print(f\"\\\\rDownloading: {percent}%\", end='', flush=True)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:\n",
-    "        \"\"\"\n",
-    "        Get a single image and label by index.\n",
-    "        \n",
-    "        Args:\n",
-    "            index: Index of the sample to retrieve\n",
-    "            \n",
-    "        Returns:\n",
-    "            Tuple of (image, label) tensors\n",
-    "            \n",
-    "        TODO: Implement sample retrieval.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Get image from self.images[index]\n",
-    "        2. Get label from self.labels[index]\n",
-    "        3. Return (Tensor(image), Tensor(label))\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        image, label = dataset[0]\n",
-    "        image.shape should be (3, 32, 32)\n",
-    "        label should be integer 0-9\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __len__(self) -> int:\n",
-    "        \"\"\"\n",
-    "        Get the total number of samples in the dataset.\n",
-    "        \n",
-    "        TODO: Return the length of the dataset.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Return len(self.images)\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Training set: 50,000 samples\n",
-    "        Test set: 10,000 samples\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def get_num_classes(self) -> int:\n",
-    "        \"\"\"\n",
-    "        Get the number of classes in CIFAR-10.\n",
-    "        \n",
-    "        TODO: Return the number of classes.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. CIFAR-10 has 10 classes\n",
-    "        2. Return 10\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Returns 10 for CIFAR-10\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "04cf4c66",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class CIFAR10Dataset(Dataset):\n",
-    "    \"\"\"CIFAR-10 Dataset: Load and manage CIFAR-10 image data.\"\"\"\n",
-    "    \n",
-    "    def __init__(self, root_dir: str, train: bool = True, download: bool = True):\n",
-    "        self.root_dir = root_dir\n",
-    "        self.train = train\n",
-    "        self.class_names = ['airplane', 'car', 'bird', 'cat', 'deer', \n",
-    "                           'dog', 'frog', 'horse', 'ship', 'truck']\n",
-    "        \n",
-    "        # Create directory if it doesn't exist\n",
-    "        os.makedirs(root_dir, exist_ok=True)\n",
-    "        \n",
-    "        # Download if needed\n",
-    "        if download:\n",
-    "            self._download_if_needed()\n",
-    "        \n",
-    "        # Load data\n",
-    "        self._load_data()\n",
-    "    \n",
-    "    def _download_if_needed(self):\n",
-    "        \"\"\"Download CIFAR-10 if not present.\"\"\"\n",
-    "        cifar_path = os.path.join(self.root_dir, \"cifar-10-batches-py\")\n",
-    "        if not os.path.exists(cifar_path):\n",
-    "            print(\"🔄 Downloading CIFAR-10 dataset...\")\n",
-    "            print(\"📦 Size: ~170MB (this may take a few minutes)\")\n",
-    "            url = \"https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\"\n",
-    "            filename = os.path.join(self.root_dir, \"cifar-10-python.tar.gz\")\n",
-    "            \n",
-    "            try:\n",
-    "                # Download with progress bar\n",
-    "                def show_progress(block_num, block_size, total_size):\n",
-    "                    \"\"\"Show download progress bar.\"\"\"\n",
-    "                    downloaded = block_num * block_size\n",
-    "                    if total_size > 0:\n",
-    "                        percent = min(100, (downloaded * 100) // total_size)\n",
-    "                        bar_length = 50\n",
-    "                        filled_length = (percent * bar_length) // 100\n",
-    "                        bar = '█' * filled_length + '░' * (bar_length - filled_length)\n",
-    "                        \n",
-    "                        # Convert bytes to MB\n",
-    "                        downloaded_mb = downloaded / (1024 * 1024)\n",
-    "                        total_mb = total_size / (1024 * 1024)\n",
-    "                        \n",
-    "                        print(f\"\\r📥 [{bar}] {percent}% ({downloaded_mb:.1f}/{total_mb:.1f} MB)\", end='', flush=True)\n",
-    "                    else:\n",
-    "                        # Fallback if total size unknown\n",
-    "                        downloaded_mb = downloaded / (1024 * 1024)\n",
-    "                        print(f\"\\r📥 Downloaded: {downloaded_mb:.1f} MB\", end='', flush=True)\n",
-    "                \n",
-    "                urllib.request.urlretrieve(url, filename, reporthook=show_progress)\n",
-    "                print()  # New line after progress bar\n",
-    "                \n",
-    "                # Extract\n",
-    "                print(\"📂 Extracting CIFAR-10 files...\")\n",
-    "                with tarfile.open(filename, 'r:gz') as tar:\n",
-    "                    tar.extractall(self.root_dir, filter='data')\n",
-    "                \n",
-    "                # Clean up\n",
-    "                os.remove(filename)\n",
-    "                print(\"✅ CIFAR-10 downloaded and extracted successfully!\")\n",
-    "                \n",
-    "            except Exception as e:\n",
-    "                print(f\"\\n❌ Download failed: {e}\")\n",
-    "                print(\"Please download CIFAR-10 manually from https://www.cs.toronto.edu/~kriz/cifar.html\")\n",
-    "    \n",
-    "    def _load_data(self):\n",
-    "        \"\"\"Load CIFAR-10 data from binary files.\"\"\"\n",
-    "        cifar_path = os.path.join(self.root_dir, \"cifar-10-batches-py\")\n",
-    "        \n",
-    "        self.images = []\n",
-    "        self.labels = []\n",
-    "        \n",
-    "        if self.train:\n",
-    "            # Load training batches\n",
-    "            for i in range(1, 6):\n",
-    "                batch_file = os.path.join(cifar_path, f\"data_batch_{i}\")\n",
-    "                if os.path.exists(batch_file):\n",
-    "                    with open(batch_file, 'rb') as f:\n",
-    "                        batch = pickle.load(f, encoding='bytes')\n",
-    "                        # Convert bytes keys to strings\n",
-    "                        batch = {k.decode('utf-8') if isinstance(k, bytes) else k: v for k, v in batch.items()}\n",
-    "                        \n",
-    "                        # Extract images and labels\n",
-    "                        images = batch['data'].reshape(-1, 3, 32, 32).astype(np.float32)\n",
-    "                        labels = batch['labels']\n",
-    "                        \n",
-    "                        self.images.extend(images)\n",
-    "                        self.labels.extend(labels)\n",
-    "        else:\n",
-    "            # Load test batch\n",
-    "            test_file = os.path.join(cifar_path, \"test_batch\")\n",
-    "            if os.path.exists(test_file):\n",
-    "                with open(test_file, 'rb') as f:\n",
-    "                    batch = pickle.load(f, encoding='bytes')\n",
-    "                    # Convert bytes keys to strings\n",
-    "                    batch = {k.decode('utf-8') if isinstance(k, bytes) else k: v for k, v in batch.items()}\n",
-    "                    \n",
-    "                    # Extract images and labels\n",
-    "                    self.images = batch['data'].reshape(-1, 3, 32, 32).astype(np.float32)\n",
-    "                    self.labels = batch['labels']\n",
-    "        \n",
-    "        print(f\"✅ Loaded {len(self.images)} {'training' if self.train else 'test'} samples\")\n",
-    "    \n",
-    "    def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:\n",
-    "        \"\"\"Get a single image and label by index.\"\"\"\n",
-    "        image = Tensor(self.images[index])\n",
-    "        label = Tensor(np.array(self.labels[index]))\n",
-    "        return image, label\n",
-    "    \n",
-    "    def __len__(self) -> int:\n",
-    "        \"\"\"Get the total number of samples in the dataset.\"\"\"\n",
-    "        return len(self.images)\n",
-    "    \n",
-    "    def get_num_classes(self) -> int:\n",
-    "        \"\"\"Get the number of classes in CIFAR-10.\"\"\"\n",
-    "        return 10"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "369a8df8",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your CIFAR-10 Dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1ff36707",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test CIFAR-10 dataset (skip download for now)\n",
-    "print(\"Testing CIFAR-10 dataset...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create a mock dataset for testing without download\n",
-    "    class MockCIFAR10Dataset(Dataset):\n",
-    "        def __init__(self, size, train=True):\n",
-    "            self.size = size\n",
-    "            self.train = train\n",
-    "            self.data = [np.random.randint(0, 255, (3, 32, 32), dtype=np.uint8) for _ in range(size)]\n",
-    "            self.labels = [np.random.randint(0, 10) for _ in range(size)]\n",
-    "        \n",
-    "        def __getitem__(self, index):\n",
-    "            return Tensor(self.data[index].astype(np.float32)), Tensor(np.array(self.labels[index]))\n",
-    "        \n",
-    "        def __len__(self):\n",
-    "            return self.size\n",
-    "        \n",
-    "        def get_num_classes(self):\n",
-    "            return 10\n",
-    "    \n",
-    "    # Test the dataset\n",
-    "    dataset = MockCIFAR10Dataset(50)\n",
-    "    print(f\"✅ Dataset created with {len(dataset)} samples\")\n",
-    "    \n",
-    "    # Test indexing\n",
-    "    image, label = dataset[0]\n",
-    "    print(f\"✅ Image shape: {image.shape}\")\n",
-    "    print(f\"✅ Label: {label}\")\n",
-    "    print(f\"✅ Number of classes: {dataset.get_num_classes()}\")\n",
-    "    \n",
-    "    # Test multiple samples\n",
-    "    for i in range(3):\n",
-    "        img, lbl = dataset[i]\n",
-    "        print(f\"✅ Sample {i}: {img.shape}, class {int(lbl.data)}\")\n",
-    "    \n",
-    "    print(\"🎉 CIFAR-10 dataset structure works!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement the CIFAR-10 dataset above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a494f836",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "### 👁️ Visual Feedback: See Your Data!\n",
-    "\n",
-    "Let's add a visualization function to actually **see** the CIFAR-10 images we're loading. \n",
-    "This provides immediate visual feedback and builds intuition about the data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0fc81d85",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "def show_cifar10_samples(dataset, num_samples=8, title=\"CIFAR-10 Samples\"):\n",
-    "    \"\"\"\n",
-    "    Display a grid of CIFAR-10 images with their labels.\n",
-    "    \n",
-    "    Args:\n",
-    "        dataset: CIFAR-10 dataset\n",
-    "        num_samples: Number of samples to display\n",
-    "        title: Title for the plot\n",
-    "        \n",
-    "    TODO: Implement visualization function.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Create a matplotlib subplot grid\n",
-    "    2. Get random samples from dataset\n",
-    "    3. Display each image with its class label\n",
-    "    4. Handle the image format (CHW -> HWC, normalize to 0-1)\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    show_cifar10_samples(dataset, num_samples=8)\n",
-    "    # Shows 8 CIFAR-10 images in a 2x4 grid\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use plt.subplots() to create grid\n",
-    "    - Convert image from (C, H, W) to (H, W, C) for display\n",
-    "    - Normalize pixel values to [0, 1] range\n",
-    "    - Use dataset.class_names for labels\n",
-    "    \n",
-    "    NOTE: This is a development/learning tool, not part of the core package.\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "38c41820",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "def show_cifar10_samples(dataset, num_samples=8, title=\"CIFAR-10 Samples\"):\n",
-    "    \"\"\"Display a grid of CIFAR-10 images with their labels.\"\"\"\n",
-    "    if not _should_show_plots():\n",
-    "        return\n",
-    "    \n",
-    "    # Create subplot grid\n",
-    "    rows = 2\n",
-    "    cols = num_samples // rows\n",
-    "    fig, axes = plt.subplots(rows, cols, figsize=(12, 6))\n",
-    "    fig.suptitle(title, fontsize=16)\n",
-    "    \n",
-    "    # Get random samples\n",
-    "    indices = np.random.choice(len(dataset), num_samples, replace=False)\n",
-    "    \n",
-    "    for i, idx in enumerate(indices):\n",
-    "        row = i // cols\n",
-    "        col = i % cols\n",
-    "        \n",
-    "        # Get image and label\n",
-    "        image, label = dataset[idx]\n",
-    "        \n",
-    "        # Convert from (C, H, W) to (H, W, C) and normalize to [0, 1]\n",
-    "        if hasattr(image, 'data'):\n",
-    "            img_data = image.data\n",
-    "        else:\n",
-    "            img_data = image\n",
-    "        \n",
-    "        # Handle different tensor formats\n",
-    "        if img_data.shape[0] == 3:  # (C, H, W)\n",
-    "            img_display = np.transpose(img_data, (1, 2, 0))\n",
-    "        else:\n",
-    "            img_display = img_data\n",
-    "        \n",
-    "        # Normalize to [0, 1] range\n",
-    "        img_display = img_display.astype(np.float32)\n",
-    "        if img_display.max() > 1.0:\n",
-    "            img_display = img_display / 255.0\n",
-    "        \n",
-    "        # Ensure values are in [0, 1]\n",
-    "        img_display = np.clip(img_display, 0, 1)\n",
-    "        \n",
-    "        # Display image\n",
-    "        if rows == 1:\n",
-    "            ax = axes[col]\n",
-    "        else:\n",
-    "            ax = axes[row, col]\n",
-    "        \n",
-    "        ax.imshow(img_display)\n",
-    "        ax.axis('off')\n",
-    "        \n",
-    "        # Add label\n",
-    "        if hasattr(label, 'data'):\n",
-    "            label_idx = int(label.data)\n",
-    "        else:\n",
-    "            label_idx = int(label)\n",
-    "        \n",
-    "        if hasattr(dataset, 'class_names'):\n",
-    "            class_name = dataset.class_names[label_idx]\n",
-    "            ax.set_title(f'{class_name} ({label_idx})', fontsize=10)\n",
-    "        else:\n",
-    "            ax.set_title(f'Class {label_idx}', fontsize=10)\n",
-    "    \n",
-    "    plt.tight_layout()\n",
-    "    if _should_show_plots():\n",
-    "        plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3fba9b6b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test visual feedback with real CIFAR-10 data\n",
-    "print(\"🎨 Testing visual feedback with real CIFAR-10...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create real CIFAR-10 dataset for visualization\n",
-    "    import tempfile\n",
-    "    import os\n",
-    "    \n",
-    "    with tempfile.TemporaryDirectory() as temp_dir:\n",
-    "        # Load real CIFAR-10 dataset\n",
-    "        cifar_dataset = CIFAR10Dataset(temp_dir, train=True, download=True)\n",
-    "        \n",
-    "        print(f\"✅ Loaded {len(cifar_dataset)} real CIFAR-10 samples\")\n",
-    "        print(f\"✅ Class names: {cifar_dataset.class_names}\")\n",
-    "        \n",
-    "        # Show sample images\n",
-    "        if _should_show_plots():\n",
-    "            print(\"🖼️ Displaying sample images...\")\n",
-    "            show_cifar10_samples(cifar_dataset, num_samples=8, title=\"Real CIFAR-10 Training Samples\")\n",
-    "        \n",
-    "        # Show some statistics\n",
-    "        sample_images = [cifar_dataset[i][0] for i in range(100)]\n",
-    "        pixel_values = [img.data for img in sample_images]\n",
-    "        all_pixels = np.concatenate([pixels.flatten() for pixels in pixel_values])\n",
-    "        \n",
-    "        print(f\"✅ Pixel value range: [{all_pixels.min():.1f}, {all_pixels.max():.1f}]\")\n",
-    "        print(f\"✅ Mean pixel value: {all_pixels.mean():.1f}\")\n",
-    "        print(f\"✅ Std pixel value: {all_pixels.std():.1f}\")\n",
-    "        \n",
-    "        print(\"🎉 Visual feedback works! You can see your data!\")\n",
-    "        \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure CIFAR-10 dataset is implemented correctly!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8e5bfb4f",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 3: Understanding Data Loading\n",
-    "\n",
-    "Now let's build a **DataLoader** to efficiently batch and iterate through our dataset.\n",
-    "\n",
-    "### Why DataLoaders Matter\n",
-    "- **Batching**: Process multiple samples at once (GPU efficiency)\n",
-    "- **Shuffling**: Randomize order for better training\n",
-    "- **Memory management**: Handle large datasets efficiently\n",
-    "- **I/O optimization**: Load data in parallel with training\n",
-    "\n",
-    "### The DataLoader Pattern\n",
-    "```\n",
-    "Dataset: [sample1, sample2, sample3, ...]\n",
-    "DataLoader: [[batch1], [batch2], [batch3], ...]\n",
-    "```\n",
-    "\n",
-    "### Systems Thinking\n",
-    "- **Batch size**: Trade-off between memory and speed\n",
-    "- **Shuffling**: Prevents overfitting to data order\n",
-    "- **Iteration**: Efficient looping through data\n",
-    "- **Memory**: Manage large datasets that don't fit in RAM\n",
-    "\n",
-    "Let's implement a DataLoader!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ecd11395",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class DataLoader:\n",
-    "    \"\"\"\n",
-    "    DataLoader: Efficiently batch and iterate through datasets.\n",
-    "    \n",
-    "    Provides batching, shuffling, and efficient iteration over datasets.\n",
-    "    Essential for training neural networks efficiently.\n",
-    "    \n",
-    "    Args:\n",
-    "        dataset: Dataset to load from\n",
-    "        batch_size: Number of samples per batch\n",
-    "        shuffle: Whether to shuffle data each epoch\n",
-    "        \n",
-    "    TODO: Implement DataLoader with batching and shuffling.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Store dataset and configuration\n",
-    "    2. Implement __iter__ to yield batches\n",
-    "    3. Handle shuffling and batching logic\n",
-    "    4. Stack individual samples into batches\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)\n",
-    "    for batch_images, batch_labels in dataloader:\n",
-    "        print(f\"Batch shape: {batch_images.shape}\")  # (32, 3, 32, 32)\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Use np.random.permutation for shuffling\n",
-    "    - Stack samples using np.stack\n",
-    "    - Yield batches as (batch_data, batch_labels)\n",
-    "    - Handle last batch that might be smaller\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, dataset: Dataset, batch_size: int = 32, shuffle: bool = True):\n",
-    "        \"\"\"\n",
-    "        Initialize DataLoader.\n",
-    "        \n",
-    "        Args:\n",
-    "            dataset: Dataset to load from\n",
-    "            batch_size: Number of samples per batch\n",
-    "            shuffle: Whether to shuffle data each epoch\n",
-    "            \n",
-    "        TODO: Store configuration and dataset.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Store dataset as self.dataset\n",
-    "        2. Store batch_size as self.batch_size\n",
-    "        3. Store shuffle as self.shuffle\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        DataLoader(dataset, batch_size=32, shuffle=True)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __iter__(self) -> Iterator[Tuple[Tensor, Tensor]]:\n",
-    "        \"\"\"\n",
-    "        Iterate through dataset in batches.\n",
-    "        \n",
-    "        Returns:\n",
-    "            Iterator yielding (batch_data, batch_labels) tuples\n",
-    "            \n",
-    "        TODO: Implement batching and shuffling logic.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Create indices list: list(range(len(dataset)))\n",
-    "        2. Shuffle indices if self.shuffle is True\n",
-    "        3. Loop through indices in batch_size chunks\n",
-    "        4. For each batch: collect samples, stack them, yield batch\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        for batch_data, batch_labels in dataloader:\n",
-    "            # batch_data.shape: (batch_size, ...)\n",
-    "            # batch_labels.shape: (batch_size,)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def __len__(self) -> int:\n",
-    "        \"\"\"\n",
-    "        Get the number of batches per epoch.\n",
-    "        \n",
-    "        TODO: Calculate number of batches.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Get dataset size: len(self.dataset)\n",
-    "        2. Calculate: (dataset_size + batch_size - 1) // batch_size\n",
-    "        3. This handles the last partial batch correctly\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        Dataset size: 100, batch_size: 32\n",
-    "        Number of batches: 4 (32, 32, 32, 4)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7f7ddaee",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class DataLoader:\n",
-    "    \"\"\"DataLoader: Efficiently batch and iterate through datasets.\"\"\"\n",
-    "    \n",
-    "    def __init__(self, dataset: Dataset, batch_size: int = 32, shuffle: bool = True):\n",
-    "        self.dataset = dataset\n",
-    "        self.batch_size = batch_size\n",
-    "        self.shuffle = shuffle\n",
-    "    \n",
-    "    def __iter__(self) -> Iterator[Tuple[Tensor, Tensor]]:\n",
-    "        \"\"\"Iterate through dataset in batches.\"\"\"\n",
-    "        # Create indices\n",
-    "        indices = list(range(len(self.dataset)))\n",
-    "        \n",
-    "        # Shuffle if requested\n",
-    "        if self.shuffle:\n",
-    "            np.random.shuffle(indices)\n",
-    "        \n",
-    "        # Generate batches\n",
-    "        for i in range(0, len(indices), self.batch_size):\n",
-    "            batch_indices = indices[i:i + self.batch_size]\n",
-    "            \n",
-    "            # Collect samples for this batch\n",
-    "            batch_data = []\n",
-    "            batch_labels = []\n",
-    "            \n",
-    "            for idx in batch_indices:\n",
-    "                data, label = self.dataset[idx]\n",
-    "                batch_data.append(data.data)\n",
-    "                batch_labels.append(label.data)\n",
-    "            \n",
-    "            # Stack into batches\n",
-    "            batch_data = np.stack(batch_data, axis=0)\n",
-    "            batch_labels = np.stack(batch_labels, axis=0)\n",
-    "            \n",
-    "            yield Tensor(batch_data), Tensor(batch_labels)\n",
-    "    \n",
-    "    def __len__(self) -> int:\n",
-    "        \"\"\"Get the number of batches per epoch.\"\"\"\n",
-    "        return (len(self.dataset) + self.batch_size - 1) // self.batch_size"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9162a338",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your DataLoader"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "008b14b4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test DataLoader\n",
-    "print(\"Testing DataLoader...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create a test dataset\n",
-    "    class SimpleDataset(Dataset):\n",
-    "        def __init__(self, size=100):\n",
-    "            self.size = size\n",
-    "            self.data = [np.random.randn(3, 32, 32) for _ in range(size)]\n",
-    "            self.labels = [i % 10 for i in range(size)]\n",
-    "        \n",
-    "        def __getitem__(self, index):\n",
-    "            return Tensor(self.data[index]), Tensor(np.array(self.labels[index]))\n",
-    "        \n",
-    "        def __len__(self):\n",
-    "            return self.size\n",
-    "        \n",
-    "        def get_num_classes(self):\n",
-    "            return 10\n",
-    "    \n",
-    "    # Test DataLoader\n",
-    "    dataset = SimpleDataset(100)\n",
-    "    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)\n",
-    "    \n",
-    "    print(f\"✅ Dataset size: {len(dataset)}\")\n",
-    "    print(f\"✅ Number of batches: {len(dataloader)}\")\n",
-    "    \n",
-    "    # Test iteration\n",
-    "    batch_count = 0\n",
-    "    for batch_data, batch_labels in dataloader:\n",
-    "        batch_count += 1\n",
-    "        print(f\"✅ Batch {batch_count}: data shape {batch_data.shape}, labels shape {batch_labels.shape}\")\n",
-    "        if batch_count >= 3:  # Only show first 3 batches\n",
-    "            break\n",
-    "    \n",
-    "    print(\"🎉 DataLoader works!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement the DataLoader above!\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3ba456a9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test DataLoader with visual feedback using real CIFAR-10\n",
-    "print(\"🎨 Testing DataLoader with visual feedback...\")\n",
-    "\n",
-    "try:\n",
-    "    import tempfile\n",
-    "    \n",
-    "    with tempfile.TemporaryDirectory() as temp_dir:\n",
-    "        # Load real CIFAR-10 dataset\n",
-    "        cifar_dataset = CIFAR10Dataset(temp_dir, train=True, download=True)\n",
-    "        \n",
-    "        # Create DataLoader\n",
-    "        dataloader = DataLoader(cifar_dataset, batch_size=16, shuffle=True)\n",
-    "        \n",
-    "        print(f\"✅ Created DataLoader with {len(dataloader)} batches\")\n",
-    "        \n",
-    "        # Get first batch\n",
-    "        batch_data, batch_labels = next(iter(dataloader))\n",
-    "        print(f\"✅ First batch shape: {batch_data.shape}\")\n",
-    "        print(f\"✅ First batch labels: {batch_labels.shape}\")\n",
-    "        \n",
-    "        # Show first few images from the batch\n",
-    "        print(\"🖼️ Displaying first batch images...\")\n",
-    "        \n",
-    "        # Create a temporary dataset-like object for visualization\n",
-    "        class BatchDataset:\n",
-    "            def __init__(self, batch_data, batch_labels, class_names):\n",
-    "                self.batch_data = batch_data\n",
-    "                self.batch_labels = batch_labels\n",
-    "                self.class_names = class_names\n",
-    "            \n",
-    "            def __getitem__(self, index):\n",
-    "                return Tensor(self.batch_data.data[index]), Tensor(self.batch_labels.data[index])\n",
-    "            \n",
-    "            def __len__(self):\n",
-    "                return self.batch_data.shape[0]\n",
-    "        \n",
-    "        batch_dataset = BatchDataset(batch_data, batch_labels, cifar_dataset.class_names)\n",
-    "        show_cifar10_samples(batch_dataset, num_samples=8, title=\"DataLoader Batch - Real CIFAR-10\")\n",
-    "        \n",
-    "        print(\"🎉 DataLoader visual feedback works!\")\n",
-    "        \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure DataLoader and visualization are implemented correctly!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "626b84a2",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 4: Understanding Data Preprocessing\n",
-    "\n",
-    "Finally, let's build a **Normalizer** to preprocess our data for better training.\n",
-    "\n",
-    "### Why Normalization Matters\n",
-    "- **Gradient stability**: Prevents exploding/vanishing gradients\n",
-    "- **Training speed**: Faster convergence\n",
-    "- **Numerical stability**: Prevents overflow/underflow\n",
-    "- **Consistent scales**: All features have similar ranges\n",
-    "\n",
-    "### Common Normalization Techniques\n",
-    "- **Min-Max**: Scale to [0, 1] range\n",
-    "- **Z-score**: Zero mean, unit variance\n",
-    "- **ImageNet**: Specific mean/std for pretrained models\n",
-    "\n",
-    "### The Normalization Process\n",
-    "```\n",
-    "Raw Data: [0, 255] pixel values\n",
-    "Normalized: [-1, 1] or [0, 1] range\n",
-    "```\n",
-    "\n",
-    "Let's implement a flexible normalizer!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "75d7b857",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Normalizer:\n",
-    "    \"\"\"\n",
-    "    Data Normalizer: Standardize data for better training.\n",
-    "    \n",
-    "    Computes mean and standard deviation from training data,\n",
-    "    then applies normalization to new data.\n",
-    "    \n",
-    "    TODO: Implement data normalization.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Fit: Compute mean and std from training data\n",
-    "    2. Transform: Apply normalization using computed stats\n",
-    "    3. Handle both single tensors and batches\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    normalizer = Normalizer()\n",
-    "    normalizer.fit(training_data)  # Compute stats\n",
-    "    normalized = normalizer.transform(new_data)  # Apply normalization\n",
-    "    \n",
-    "    HINTS:\n",
-    "    - Store mean and std as instance variables\n",
-    "    - Use np.mean and np.std for statistics\n",
-    "    - Apply: (data - mean) / std\n",
-    "    - Handle division by zero (add small epsilon)\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self):\n",
-    "        \"\"\"\n",
-    "        Initialize normalizer.\n",
-    "        \n",
-    "        TODO: Initialize mean and std to None.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Set self.mean = None\n",
-    "        2. Set self.std = None\n",
-    "        3. Set self.epsilon = 1e-8 (for numerical stability)\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        normalizer = Normalizer()\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def fit(self, data: List[Tensor]):\n",
-    "        \"\"\"\n",
-    "        Compute normalization statistics from training data.\n",
-    "        \n",
-    "        Args:\n",
-    "            data: List of tensors to compute statistics from\n",
-    "            \n",
-    "        TODO: Compute mean and standard deviation.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Stack all tensors: np.stack([t.data for t in data])\n",
-    "        2. Compute mean: np.mean(stacked_data)\n",
-    "        3. Compute std: np.std(stacked_data)\n",
-    "        4. Store as self.mean and self.std\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        normalizer.fit([tensor1, tensor2, tensor3])\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")\n",
-    "    \n",
-    "    def transform(self, data: Union[Tensor, List[Tensor]]) -> Union[Tensor, List[Tensor]]:\n",
-    "        \"\"\"\n",
-    "        Apply normalization to data.\n",
-    "        \n",
-    "        Args:\n",
-    "            data: Tensor or list of tensors to normalize\n",
-    "            \n",
-    "        Returns:\n",
-    "            Normalized tensor(s)\n",
-    "            \n",
-    "        TODO: Apply normalization using computed statistics.\n",
-    "        \n",
-    "        STEP-BY-STEP:\n",
-    "        1. Check if mean and std are computed (not None)\n",
-    "        2. If single tensor: apply (data - mean) / (std + epsilon)\n",
-    "        3. If list: apply to each tensor in the list\n",
-    "        4. Return normalized data\n",
-    "        \n",
-    "        EXAMPLE:\n",
-    "        normalized = normalizer.transform(tensor)\n",
-    "        \"\"\"\n",
-    "        raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0316f181",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "class Normalizer:\n",
-    "    \"\"\"Data Normalizer: Standardize data for better training.\"\"\"\n",
-    "    \n",
-    "    def __init__(self):\n",
-    "        self.mean = None\n",
-    "        self.std = None\n",
-    "        self.epsilon = 1e-8\n",
-    "    \n",
-    "    def fit(self, data: List[Tensor]):\n",
-    "        \"\"\"Compute normalization statistics from training data.\"\"\"\n",
-    "        # Stack all data\n",
-    "        all_data = np.stack([t.data for t in data])\n",
-    "        \n",
-    "        # Compute statistics\n",
-    "        self.mean = np.mean(all_data)\n",
-    "        self.std = np.std(all_data)\n",
-    "        \n",
-    "        print(f\"✅ Computed normalization stats: mean={self.mean:.4f}, std={self.std:.4f}\")\n",
-    "    \n",
-    "    def transform(self, data: Union[Tensor, List[Tensor]]) -> Union[Tensor, List[Tensor]]:\n",
-    "        \"\"\"Apply normalization to data.\"\"\"\n",
-    "        if self.mean is None or self.std is None:\n",
-    "            raise ValueError(\"Must call fit() before transform()\")\n",
-    "        \n",
-    "        if isinstance(data, list):\n",
-    "            # Transform list of tensors\n",
-    "            return [Tensor((t.data - self.mean) / (self.std + self.epsilon)) for t in data]\n",
-    "        else:\n",
-    "            # Transform single tensor\n",
-    "            return Tensor((data.data - self.mean) / (self.std + self.epsilon))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a1916eb7",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Normalizer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e627de51",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test Normalizer\n",
-    "print(\"Testing Normalizer...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create test data\n",
-    "    data = [\n",
-    "        Tensor(np.random.randn(3, 32, 32) * 50 + 100),  # Mean ~100, std ~50\n",
-    "        Tensor(np.random.randn(3, 32, 32) * 50 + 100),\n",
-    "        Tensor(np.random.randn(3, 32, 32) * 50 + 100)\n",
-    "    ]\n",
-    "    \n",
-    "    # Test normalizer\n",
-    "    normalizer = Normalizer()\n",
-    "    \n",
-    "    # Fit to data\n",
-    "    normalizer.fit(data)\n",
-    "    \n",
-    "    # Transform data\n",
-    "    normalized = normalizer.transform(data)\n",
-    "    \n",
-    "    # Check results\n",
-    "    print(f\"✅ Original data mean: {np.mean([t.data for t in data]):.4f}\")\n",
-    "    print(f\"✅ Original data std: {np.std([t.data for t in data]):.4f}\")\n",
-    "    print(f\"✅ Normalized data mean: {np.mean([t.data for t in normalized]):.4f}\")\n",
-    "    print(f\"✅ Normalized data std: {np.std([t.data for t in normalized]):.4f}\")\n",
-    "    \n",
-    "    # Test single tensor\n",
-    "    single_tensor = Tensor(np.random.randn(3, 32, 32) * 50 + 100)\n",
-    "    normalized_single = normalizer.transform(single_tensor)\n",
-    "    print(f\"✅ Single tensor normalized: {normalized_single.shape}\")\n",
-    "    \n",
-    "    print(\"🎉 Normalizer works!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement the Normalizer above!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d1cdb799",
-   "metadata": {
-    "cell_marker": "\"\"\"",
-    "lines_to_next_cell": 1
-   },
-   "source": [
-    "## Step 5: Building a Complete Data Pipeline\n",
-    "\n",
-    "Now let's put everything together into a complete data pipeline!\n",
-    "\n",
-    "### The Complete Pipeline\n",
-    "```\n",
-    "Raw Data → Dataset → DataLoader → Normalizer → Model\n",
-    "```\n",
-    "\n",
-    "This is the foundation of every machine learning system!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4b999f7f",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def create_data_pipeline(dataset_path: str = \"data/cifar10/\", \n",
-    "                        batch_size: int = 32, \n",
-    "                        normalize: bool = True,\n",
-    "                        shuffle: bool = True):\n",
-    "    \"\"\"\n",
-    "    Create a complete data pipeline for training.\n",
-    "    \n",
-    "    Args:\n",
-    "        dataset_path: Path to dataset\n",
-    "        batch_size: Batch size for training\n",
-    "        normalize: Whether to normalize data\n",
-    "        shuffle: Whether to shuffle data\n",
-    "        \n",
-    "    Returns:\n",
-    "        Tuple of (train_loader, test_loader)\n",
-    "        \n",
-    "    TODO: Implement complete data pipeline.\n",
-    "    \n",
-    "    APPROACH:\n",
-    "    1. Create train and test datasets\n",
-    "    2. Create data loaders\n",
-    "    3. Fit normalizer on training data\n",
-    "    4. Return all components\n",
-    "    \n",
-    "    EXAMPLE:\n",
-    "    train_loader, test_loader = create_data_pipeline()\n",
-    "    for batch_data, batch_labels in train_loader:\n",
-    "        # Ready for training!\n",
-    "    \"\"\"\n",
-    "    raise NotImplementedError(\"Student implementation required\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6a934d43",
-   "metadata": {
-    "lines_to_next_cell": 1
-   },
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "#| export\n",
-    "def create_data_pipeline(dataset_path: str = \"data/cifar10/\", \n",
-    "                        batch_size: int = 32, \n",
-    "                        normalize: bool = True,\n",
-    "                        shuffle: bool = True):\n",
-    "    \"\"\"Create a complete data pipeline for training.\"\"\"\n",
-    "    \n",
-    "    print(\"🔧 Creating data pipeline...\")\n",
-    "    \n",
-    "    # Create datasets with real CIFAR-10 data\n",
-    "    train_dataset = CIFAR10Dataset(dataset_path, train=True, download=True)\n",
-    "    test_dataset = CIFAR10Dataset(dataset_path, train=False, download=True)\n",
-    "    \n",
-    "    # Create data loaders\n",
-    "    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)\n",
-    "    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)\n",
-    "    \n",
-    "    # Create normalizer\n",
-    "    normalizer = None\n",
-    "    if normalize:\n",
-    "        normalizer = Normalizer()\n",
-    "        # Fit on a subset of training data for efficiency\n",
-    "        sample_data = [train_dataset[i][0] for i in range(min(1000, len(train_dataset)))]\n",
-    "        normalizer.fit(sample_data)\n",
-    "        print(f\"✅ Computed normalization stats: mean={normalizer.mean:.4f}, std={normalizer.std:.4f}\")\n",
-    "    \n",
-    "    print(f\"✅ Pipeline created:\")\n",
-    "    print(f\"   - Training batches: {len(train_loader)}\")\n",
-    "    print(f\"   - Test batches: {len(test_loader)}\")\n",
-    "    print(f\"   - Batch size: {batch_size}\")\n",
-    "    print(f\"   - Normalization: {normalize}\")\n",
-    "    \n",
-    "    return train_loader, test_loader"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "168f6520",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "### 🧪 Test Your Complete Data Pipeline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "31bfa3b4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test complete data pipeline\n",
-    "print(\"Testing complete data pipeline...\")\n",
-    "\n",
-    "try:\n",
-    "    # Create pipeline\n",
-    "    train_loader, test_loader = create_data_pipeline(\n",
-    "        batch_size=16, normalize=True, shuffle=True\n",
-    "    )\n",
-    "    \n",
-    "    # Test training loop\n",
-    "    print(\"\\n🔥 Testing training loop:\")\n",
-    "    for i, (batch_data, batch_labels) in enumerate(train_loader):\n",
-    "        print(f\"   Batch {i+1}: data {batch_data.shape}, labels {batch_labels.shape}\")\n",
-    "        \n",
-    "        # Note: Data is already normalized in the pipeline if normalize=True\n",
-    "        \n",
-    "        if i >= 2:  # Only show first 3 batches\n",
-    "            break\n",
-    "    \n",
-    "    # Test test loop\n",
-    "    print(\"\\n🧪 Testing test loop:\")\n",
-    "    for i, (batch_data, batch_labels) in enumerate(test_loader):\n",
-    "        print(f\"   Test batch {i+1}: data {batch_data.shape}, labels {batch_labels.shape}\")\n",
-    "        if i >= 1:  # Only show first 2 batches\n",
-    "            break\n",
-    "    \n",
-    "    print(\"\\n🎉 Complete data pipeline works!\")\n",
-    "    print(\"Ready for training neural networks!\")\n",
-    "    \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure to implement the data pipeline above!\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ee2216c7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Test complete pipeline with visual feedback\n",
-    "print(\"🎨 Testing complete pipeline with visual feedback...\")\n",
-    "\n",
-    "try:\n",
-    "    import tempfile\n",
-    "    \n",
-    "    with tempfile.TemporaryDirectory() as temp_dir:\n",
-    "        # Create complete pipeline\n",
-    "        train_loader, test_loader = create_data_pipeline(\n",
-    "            dataset_path=temp_dir,\n",
-    "            batch_size=16, \n",
-    "            normalize=True, \n",
-    "            shuffle=True\n",
-    "        )\n",
-    "        \n",
-    "        # Get a batch from training data\n",
-    "        train_batch_data, train_batch_labels = next(iter(train_loader))\n",
-    "        print(f\"✅ Training batch shape: {train_batch_data.shape}\")\n",
-    "        \n",
-    "        # Get a batch from test data\n",
-    "        test_batch_data, test_batch_labels = next(iter(test_loader))\n",
-    "        print(f\"✅ Test batch shape: {test_batch_data.shape}\")\n",
-    "        \n",
-    "        # Show training batch images\n",
-    "        print(\"🖼️ Displaying training batch...\")\n",
-    "        class PipelineBatchDataset:\n",
-    "            def __init__(self, batch_data, batch_labels):\n",
-    "                self.batch_data = batch_data\n",
-    "                self.batch_labels = batch_labels\n",
-    "                self.class_names = ['airplane', 'car', 'bird', 'cat', 'deer', \n",
-    "                                   'dog', 'frog', 'horse', 'ship', 'truck']\n",
-    "            \n",
-    "            def __getitem__(self, index):\n",
-    "                return Tensor(self.batch_data.data[index]), Tensor(self.batch_labels.data[index])\n",
-    "            \n",
-    "            def __len__(self):\n",
-    "                return self.batch_data.shape[0]\n",
-    "        \n",
-    "        train_batch_dataset = PipelineBatchDataset(train_batch_data, train_batch_labels)\n",
-    "        show_cifar10_samples(train_batch_dataset, num_samples=8, title=\"Complete Pipeline - Training Batch\")\n",
-    "        \n",
-    "        # Show test batch images\n",
-    "        print(\"🖼️ Displaying test batch...\")\n",
-    "        test_batch_dataset = PipelineBatchDataset(test_batch_data, test_batch_labels)\n",
-    "        show_cifar10_samples(test_batch_dataset, num_samples=8, title=\"Complete Pipeline - Test Batch\")\n",
-    "        \n",
-    "        # Show data statistics\n",
-    "        print(f\"✅ Training data range: [{train_batch_data.data.min():.3f}, {train_batch_data.data.max():.3f}]\")\n",
-    "        print(f\"✅ Training data mean: {train_batch_data.data.mean():.3f}\")\n",
-    "        print(f\"✅ Training data std: {train_batch_data.data.std():.3f}\")\n",
-    "        \n",
-    "        print(\"🎉 Complete pipeline visual feedback works!\")\n",
-    "        print(\"🚀 You can see your entire data pipeline in action!\")\n",
-    "        \n",
-    "except Exception as e:\n",
-    "    print(f\"❌ Error: {e}\")\n",
-    "    print(\"Make sure complete pipeline and visualization work correctly!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c4d835e9",
-   "metadata": {
-    "cell_marker": "\"\"\""
-   },
-   "source": [
-    "## 🎯 Summary\n",
-    "\n",
-    "Congratulations! You've built a complete data loading system:\n",
-    "\n",
-    "### What You Built\n",
-    "1. **Dataset**: Abstract interface for data loading\n",
-    "2. **CIFAR10Dataset**: Real dataset implementation\n",
-    "3. **DataLoader**: Efficient batching and iteration\n",
-    "4. **Normalizer**: Data preprocessing for better training\n",
-    "5. **Data Pipeline**: Complete system integration\n",
-    "\n",
-    "### Key Concepts Learned\n",
-    "- **Data engineering**: The foundation of ML systems\n",
-    "- **Batching**: Efficient processing of multiple samples\n",
-    "- **Normalization**: Preprocessing for stable training\n",
-    "- **Systems thinking**: Memory, I/O, and performance considerations\n",
-    "\n",
-    "### Next Steps\n",
-    "- **Autograd**: Automatic differentiation for training\n",
-    "- **Training**: Optimization loops and loss functions\n",
-    "- **Advanced data**: Augmentation, distributed loading, etc.\n",
-    "\n",
-    "### Real-World Impact\n",
-    "This data loading system is the foundation of every ML pipeline:\n",
-    "- **Production systems**: Handle millions of samples efficiently\n",
-    "- **Research**: Enable experimentation with different datasets\n",
-    "- **MLOps**: Integrate with training and deployment pipelines\n",
-    "\n",
-    "You now understand how data flows through ML systems! 🚀"
-   ]
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "main_language": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/INSTRUCTOR_GUIDE.md b/docs/INSTRUCTOR_GUIDE.md
index 4b2f685d..a634d87d 100644
--- a/docs/INSTRUCTOR_GUIDE.md
+++ b/docs/INSTRUCTOR_GUIDE.md
@@ -75,21 +75,21 @@ python -c "from tinytorch.core.activations import ReLU; print(ReLU()([-1, 0, 1])
 ## 🛠️ **Instructor Workflow** (Python-First Development)
 
 ### **🐍 Python-First Philosophy**
-- **Always work in raw Python files** (`modules/XX/XX_dev.py`)
+- **Always work in raw Python files** (`assignments/source/XX/XX_dev.py`)
 - **Generate Jupyter notebooks on demand** using Jupytext
 - **NBGrader compliance** through automated cell metadata
 - **nbdev for package building** and exports
 
 ### **Step 1: Create/Edit Solution (Python File)**
 ```bash
-cd modules/00_setup
+cd assignments/source/00_setup
 # Edit the raw Python file (source of truth)
 code setup_dev.py                # or vim/nano/your editor
 ```
 
 ### **Step 2: Test Solution**
 ```bash
-python -m pytest modules/00_setup/tests/ -v  # Verify solution works
+python -m pytest assignments/source/00_setup/tests/ -v  # Verify solution works
 ```
 
 ### **Step 3: Export to Package (nbdev)**
@@ -115,7 +115,7 @@ tito nbgrader autograde 00_setup  # Auto-grade with pytest
 
 ### **🔄 Complete Workflow Diagram**
 ```
-modules/XX/XX_dev.py    (Source of Truth)
+assignments/source/XX/XX_dev.py    (Source of Truth)
         ↓
     [nbdev export]      (Package Building)
         ↓
@@ -132,9 +132,9 @@ assignments/source/XX/  (Student Assignments)
 
 ## 🛠️ **Student Workflow** (5 Simple Steps)
 
-### **Step 1: Open Module**
+### **Step 1: Open Assignment**
 ```bash
-cd modules/00_setup
+cd assignments/source/00_setup
 jupyter lab setup_dev.py
 ```
 
@@ -150,7 +150,7 @@ python bin/tito module export 00_setup
 
 ### **Step 4: Test Work**
 ```bash
-python -m pytest modules/00_setup/tests/ -v
+python -m pytest assignments/source/00_setup/tests/ -v
 ```
 
 ### **Step 5: Use Their Code**
@@ -314,7 +314,7 @@ tito module status            # Check all modules
 1. **Clone repository**: `git clone [repo_url]`
 2. **Test your setup**: `tito system doctor`
 3. **Review module 00**: `cat modules/00_setup/README.md`
-4. **Open in Jupyter**: `jupyter lab modules/00_setup/setup_dev.py`
+4. **Open in Jupyter**: `jupyter lab assignments/source/00_setup/setup_dev.py`
 
 ### **Your First Class**
 ```bash
diff --git a/docs/README.md b/docs/README.md
index 60df36a4..eacf1d0c 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -121,11 +121,11 @@ Each module follows this pattern:
 ### **Instructors**
 1. Read the [Instructor Guide](INSTRUCTOR_GUIDE.md)
 2. Verify your system: `tito system doctor`
-3. Test the first module: `cd modules/00_setup && jupyter lab setup_dev.py`
+3. Test the first assignment: `cd assignments/source/00_setup && jupyter lab setup_dev.py`
 
 ### **Students**
 1. Read the [Student Guide](STUDENT_GUIDE.md)
-2. Start with: `cd modules/00_setup && jupyter lab setup_dev.py`
+2. Start with: `cd assignments/source/00_setup && jupyter lab setup_dev.py`
 3. Follow the 5-step workflow for each module
 
 ### **Developers**
diff --git a/docs/STUDENT_GUIDE.md b/docs/STUDENT_GUIDE.md
index 59452b55..7b52f06d 100644
--- a/docs/STUDENT_GUIDE.md
+++ b/docs/STUDENT_GUIDE.md
@@ -20,9 +20,9 @@ tito system info         # Check your system
 tito system doctor       # Verify everything works
 ```
 
-### **2. Start Your First Module**
+### **2. Start Your First Assignment**
 ```bash
-cd modules/00_setup
+cd assignments/source/00_setup
 jupyter lab setup_dev.py
 ```
 
@@ -54,9 +54,9 @@ Track your journey through TinyTorch:
 
 This is your rhythm for every module:
 
-### **Step 1: Open Module**
+### **Step 1: Open Assignment**
 ```bash
-cd modules/00_setup
+cd assignments/source/00_setup
 jupyter lab setup_dev.py
 ```
 
@@ -284,7 +284,7 @@ tito module status                   # See all module progress
 ### **Your First Session**
 1. **Open terminal**: `cd TinyTorch`
 2. **Check system**: `tito system doctor`
-3. **Start module**: `cd modules/00_setup && jupyter lab setup_dev.py`
+3. **Start assignment**: `cd assignments/source/00_setup && jupyter lab setup_dev.py`
 4. **Follow instructions**: Complete the TODO sections
 5. **Test your work**: `python -m pytest tests/ -v`
 
diff --git a/docs/development/README.md b/docs/development/README.md
index 2f8edc54..5da8207f 100644
--- a/docs/development/README.md
+++ b/docs/development/README.md
@@ -4,23 +4,13 @@
 
 *Note: AI assistants use `.cursor/rules/` for coding patterns. This documentation focuses on methodology, workflow, and educational design for human developers.*
 
-## 🎯 **Quick Start** (New Human Developers)
+## 🎯 **Current Architecture**
 
-1. **Read First**: [Module Development Guide](module-development-guide.md) - Complete methodology and philosophy
-2. **Follow**: [Module Creation Checklist](module-creation-checklist.md) - Step-by-step process
-3. **Reference**: [Quick Reference](quick-module-reference.md) - Commands and common patterns
+TinyTorch uses a **clean separation of concerns** architecture:
 
-## 📖 **Documentation Purpose**
-
-### **Human Development Guides** (This Directory)
-- **Why**: Educational methodology, design philosophy, workflow
-- **How**: Step-by-step processes, quality standards, best practices
-- **Context**: Real-world connections, systems thinking, pedagogical goals
-
-### **AI Coding Rules** (`.cursor/rules/`)
-- **What**: Specific coding patterns, implementation examples, anti-patterns
-- **Enforcement**: Automatic guidance during development
-- **Technical**: Code structure, testing patterns, NBDev directives
+- **NBGrader**: Assignment creation and student workflow only
+- **nbdev**: Package export and building only
+- **Clear workflow**: NBGrader generates assignments → students complete them → module export builds package
 
 ## 🔑 **Core Development Philosophy**
 
@@ -44,43 +34,48 @@
 
 ## 🛠️ **Development Workflow**
 
-### **Create New Module**
+### **Create New Assignment**
 ```bash
 # 1. Create structure
-mkdir modules/{module}
-mkdir modules/{module}/tests
+mkdir assignments/source/{module}
+mkdir assignments/source/{module}/tests
 
-# 2. Write complete implementation
-# Follow: module-development-guide.md
+# 2. Write complete implementation with NBGrader solution delimiters
+# assignments/source/{module}/{module}_dev.py
 
-# 3. Test and validate  
-# Follow: module-creation-checklist.md
+# 3. Test and validate
+cd assignments/source/{module}
+pytest tests/ -v
 ```
 
-### **Quality Assurance**
-All modules must:
-- ✅ Use real data, not synthetic/mock data
-- ✅ Include progress feedback for long operations
-- ✅ Provide visual confirmation of working code
-- ✅ Test with realistic data scales
-- ✅ Follow "Build → Use → Understand" progression
-- ✅ Include comprehensive TODO guidance
-- ✅ Separate development richness from clean exports
+### **Student Workflow**
+```bash
+# 1. NBGrader: Generate assignments from source
+tito nbgrader generate {module}
+
+# 2. Students: Complete assignments
+# Work in assignments/source/{module}/{module}.ipynb
+
+# 3. nbdev: Export completed work to package
+tito module export {module}
+
+# 4. Test package integration
+tito module test {module}
+```
 
 ## 📋 **Documentation Structure**
 
-### **Core Development Process**
-- **[Module Development Guide](module-development-guide.md)** - Complete methodology and best practices
-- **[Module Creation Checklist](module-creation-checklist.md)** - Comprehensive step-by-step process
-- **[Quick Reference](quick-module-reference.md)** - Commands, markers, and common patterns
+### **Core Guides**
+- **[Testing Guidelines](testing-guidelines.md)** - Testing standards and practices
+- **[Template Files](module-template_files/)** - Complete file templates
 
-### **Templates and Examples**
-- **[Module Template](module-template.md)** - Reusable template snippets
-- **[Module Template Files](module-template_files/)** - Complete file templates
+### **Educational Philosophy**
+- **[Pedagogy Directory](../pedagogy/)** - Learning theory and course design
+- **[Vision](../pedagogy/vision.md)** - Overall educational philosophy
 
 ## 🎓 **Educational Design Principles**
 
-### **For Module Developers**
+### **For Assignment Developers**
 1. **Start with real data** - Choose production datasets first
 2. **Design for immediate gratification** - Students see results quickly
 3. **Build intuition before abstraction** - Concrete examples first
@@ -113,12 +108,12 @@ All modules must:
 ## 🎯 **Success Metrics**
 
 **Human developers should be able to:**
-- Understand the educational philosophy behind module design
-- Create modules that follow TinyTorch principles
+- Understand the educational philosophy behind assignment design
+- Create assignments that follow TinyTorch principles
 - Design learning experiences that build real-world skills
 - Balance educational goals with production quality
 
-**Modules should achieve:**
+**Assignments should achieve:**
 - High student engagement and completion rates
 - Real-world relevance and production quality
 - Smooth progression through the curriculum
@@ -126,4 +121,4 @@ All modules must:
 
 ---
 
-**Remember**: We're teaching ML systems engineering, not just algorithms. Every module should reflect real-world practices while maintaining educational excellence. 
\ No newline at end of file
+**Remember**: We're teaching ML systems engineering, not just algorithms. Every assignment should reflect real-world practices while maintaining educational excellence. 
\ No newline at end of file
diff --git a/docs/development/module-creation-checklist.md b/docs/development/module-creation-checklist.md
deleted file mode 100644
index 015e5676..00000000
--- a/docs/development/module-creation-checklist.md
+++ /dev/null
@@ -1,165 +0,0 @@
-# 📋 Module Creation Checklist
-
-**Comprehensive step-by-step guide for creating high-quality TinyTorch modules.**
-
-## 🎯 **Planning Phase**
-
-### **Module Design**
-- [ ] **Choose real dataset** (CIFAR-10, ImageNet, etc. - no synthetic data)
-- [ ] **Define learning objectives** (what will students build and understand?)
-- [ ] **Identify production connections** (how does this relate to real ML systems?)
-- [ ] **Plan visual feedback** (how will students see their code working?)
-- [ ] **Design progression** (easy → medium → hard with clear indicators)
-
-### **Educational Approach**
-- [ ] **Choose learning pattern**: Build → Use → [Reflect/Analyze/Optimize]
-- [ ] **Plan immediate feedback** (students see results quickly)
-- [ ] **Design real-world connections** (production ML relevance)
-- [ ] **Structure progressive complexity** (build understanding step by step)
-
-## 🛠️ **Implementation Phase**
-
-### **Core Development**
-- [ ] **Create module directory**: `modules/{module}/`
-- [ ] **Create main file**: `{module}_dev.py` with Jupytext header
-- [ ] **Add NBDev directives**: `#| default_exp core.{module}` at top
-- [ ] **Implement complete working version** first (get it working)
-- [ ] **Add educational structure** (markdown explanations, TODO guidance)
-- [ ] **Include visual feedback** (development only, not exported)
-- [ ] **Add progress indicators** for long operations
-
-### **File Structure**
-```
-modules/{module}/
-├── {module}_dev.py          # Main development file
-├── module.yaml              # Simple metadata
-├── tests/
-│   └── test_{module}.py     # Comprehensive tests
-└── README.md                # Module overview
-```
-
-### **Educational Content**
-- [ ] **Clear conceptual explanations** before implementation
-- [ ] **Comprehensive TODO guidance** with approach, examples, hints
-- [ ] **Real-world context** and production connections
-- [ ] **Visual confirmation** of working code (development only)
-- [ ] **Progressive difficulty** with clear indicators
-
-## 🧪 **Testing Phase**
-
-### **Test Creation**
-- [ ] **Create test file**: `tests/test_{module}.py`
-- [ ] **Use real data** throughout (no mock/synthetic data)
-- [ ] **Test realistic scales** (performance at real data sizes)
-- [ ] **Include edge cases** (empty input, wrong shapes, etc.)
-- [ ] **Add performance tests** (reasonable execution time)
-
-### **Test Verification**
-- [ ] **All tests pass**: `tito module test --module {module}`
-- [ ] **Tests use real data** (production datasets, realistic parameters)
-- [ ] **Performance acceptable** (reasonable execution time)
-- [ ] **Edge cases covered** (error handling, boundary conditions)
-
-## 📦 **Integration Phase**
-
-### **Package Export**
-- [ ] **Export to package**: `tito package sync --module {module}`
-- [ ] **Verify exports**: Check `tinytorch/core/{module}.py` exists
-- [ ] **Test imports**: `from tinytorch.core.{module} import ClassName`
-- [ ] **No circular dependencies** or import issues
-
-### **CLI Integration**
-- [ ] **Status shows correctly**: `tito module status`
-- [ ] **Tests run via CLI**: `tito module test --module {module}`
-- [ ] **Notebooks convert**: `tito module notebooks --module {module}`
-
-## 📚 **Documentation Phase**
-
-### **Module Documentation**
-- [ ] **Create README.md** with overview and usage examples
-- [ ] **Document learning objectives** and key concepts
-- [ ] **Include usage examples** (both Python and notebook)
-- [ ] **Add troubleshooting** common issues
-
-### **Metadata**
-- [ ] **Create module.yaml** with basic info (name, title, description)
-- [ ] **Set dependencies** (prerequisites, builds_on, enables)
-- [ ] **Define exports_to** (tinytorch package location)
-
-## ✅ **Quality Assurance**
-
-### **Code Quality**
-- [ ] **Real data throughout** (no synthetic/mock data)
-- [ ] **Progress feedback** for long operations
-- [ ] **Visual confirmation** of working code
-- [ ] **Performance optimized** for student experience
-- [ ] **Clean exports** (development richness separate from package)
-
-### **Educational Quality**
-- [ ] **Clear learning progression** (Build → Use → [Pattern])
-- [ ] **Immediate feedback** and validation
-- [ ] **Real-world relevance** and connections
-- [ ] **Comprehensive guidance** (approach, examples, hints)
-- [ ] **Appropriate difficulty** progression
-
-### **Systems Quality**
-- [ ] **Error handling** and graceful failures
-- [ ] **Memory efficiency** for large datasets
-- [ ] **Caching** for repeated operations
-- [ ] **User experience** considerations
-- [ ] **Production-ready** patterns
-
-## 🔄 **Conversion & Export**
-
-### **Notebook Generation**
-- [ ] **Convert to notebook**: `tito module notebooks --module {module}`
-- [ ] **Verify notebook structure** (cells, markdown, code)
-- [ ] **Test in Jupyter**: Open and run the generated notebook
-
-### **Package Integration**
-- [ ] **Export to package**: `tito package sync --module {module}`
-- [ ] **Verify package structure**: Check `tinytorch/core/`
-- [ ] **Test imports**: Import from package works correctly
-- [ ] **Run integration tests**: All modules still work together
-
-## 🎯 **Final Verification**
-
-### **Student Experience**
-- [ ] **Clear learning objectives** achieved
-- [ ] **Immediate feedback** throughout
-- [ ] **Real-world connections** obvious
-- [ ] **Smooth difficulty progression**
-- [ ] **Comprehensive guidance** without being prescriptive
-
-### **Technical Excellence**
-- [ ] **All tests pass** with real data
-- [ ] **Performance acceptable** at realistic scales
-- [ ] **Clean code structure** and organization
-- [ ] **Proper error handling** and edge cases
-- [ ] **Integration works** with existing modules
-
-### **Production Readiness**
-- [ ] **Real datasets** used throughout
-- [ ] **Production patterns** demonstrated
-- [ ] **Systems thinking** integrated
-- [ ] **Performance considerations** addressed
-- [ ] **User experience** optimized
-
-## 🚀 **Release Checklist**
-
-### **Final Steps**
-- [ ] **All tests pass**: `tito module test --module {module}`
-- [ ] **Package exports**: `tito package sync --module {module}`
-- [ ] **Documentation complete**: README, docstrings, examples
-- [ ] **Integration verified**: Works with other modules
-- [ ] **Student path tested**: Follow your own guidance
-
-### **Commit and Deploy**
-- [ ] **Commit changes**: Git commit with descriptive message
-- [ ] **Update main status**: `tito module status` shows complete
-- [ ] **Verify CLI integration**: All commands work correctly
-- [ ] **Test end-to-end**: Full student workflow functions
-
----
-
-**💡 Remember**: This is ML systems engineering education. Every module should reflect real-world practices while maintaining educational excellence. Students are building production-quality skills, not just academic exercises. 
\ No newline at end of file
diff --git a/docs/development/module-development-guide.md b/docs/development/module-development-guide.md
deleted file mode 100644
index 6c953498..00000000
--- a/docs/development/module-development-guide.md
+++ /dev/null
@@ -1,318 +0,0 @@
-# 📖 TinyTorch Module Development Guide
-
-**Complete methodology for creating educational modules with real-world ML engineering practices.**
-
-## 🎯 Philosophy
-
-**"Build → Use → Understand → Repeat"** with real data and immediate feedback.
-
-Create complete, working implementations that automatically generate student exercise versions while maintaining production-quality exports.
-
-## 🔑 Core Principles
-
-### **Real Data, Real Systems**
-- **Use production datasets**: No mock/fake data - students work with CIFAR-10, not synthetic data
-- **Show progress feedback**: Downloads, training need visual progress indicators  
-- **Cache for efficiency**: Download once, use repeatedly
-- **Real-world scale**: Use actual dataset sizes, not toy examples
-
-### **Immediate Visual Feedback**
-- **Visual confirmation**: Students see their code working (images, plots, results)
-- **Development vs. Export separation**: Rich feedback in `_dev.py`, clean exports to package
-- **Progress indicators**: Status messages, progress bars for long operations
-- **Real-time validation**: Students can verify each step immediately
-
-### **Educational Excellence**
-- **Progressive complexity**: Easy → Medium → Hard with clear difficulty indicators
-- **Comprehensive guidance**: TODO sections with approach, examples, hints, systems thinking
-- **Real-world connections**: Connect every concept to production ML engineering
-- **Immediate testing**: Test each component with real inputs as you build
-
-## 🏗️ Development Workflow
-
-### Step 1: Choose the Learning Pattern
-- **Select engagement pattern**: Reflect, Analyze, or Optimize?
-- **Use the Pattern Selection Guide** from [Pedagogical Principles](../pedagogy/pedagogical-principles.md):
-  - **Build → Use → Reflect**: Early modules, design decisions, systems thinking
-  - **Build → Use → Analyze**: Middle modules, technical depth, performance
-  - **Build → Use → Optimize**: Advanced modules, iteration, production focus
-- **Document your choice** with clear rationale
-
-### Step 2: Plan the Learning Journey
-- **Define learning objectives**: What should students implement vs. receive?
-- **Choose real data**: What production dataset will they use?
-- **Design progression**: How does complexity build through the module?
-- **Map to production**: How does this connect to real ML systems?
-- **Design pattern-specific activities**: Questions, exercises, or challenges
-
-### Step 3: Write Complete Implementation
-Create `modules/{module}/{module}_dev.py` with NBDev structure:
-
-```python
-# ---
-# jupyter:
-#   jupytext:
-#     text_representation:
-#       extension: .py
-#       format_name: percent
-#       format_version: '1.3'
-#       jupytext_version: 1.17.1
-# ---
-
-# %% [markdown]
-"""
-# Module: {Title} - {Purpose}
-
-## 🎯 Learning Pattern: Build → Use → [Pattern]
-
-**Pattern Choice**: [Reflect/Analyze/Optimize]
-**Rationale**: [Why this pattern fits the learning objectives]
-
-**Key Activities**:
-- [Pattern-specific activity 1]
-- [Pattern-specific activity 2]
-- [Pattern-specific activity 3]
-
-## Learning Objectives
-- ✅ Build {core_concept} from scratch
-- ✅ Use it with real data ({dataset_name})
-- ✅ [Engage] through {pattern_specific_activities}
-- ✅ Connect to production ML systems
-
-## What You'll Build
-{description_of_what_students_build}
-"""
-
-# %%
-#| default_exp core.{module}
-import numpy as np
-import matplotlib.pyplot as plt
-from typing import Union, List, Optional
-
-# %%
-#| export
-class MainClass:
-    """
-    {Description of the class}
-    
-    TODO: {What students need to implement}
-    
-    APPROACH:
-    1. {Step 1 with specific guidance}
-    2. {Step 2 with specific guidance}
-    3. {Step 3 with specific guidance}
-    
-    EXAMPLE:
-    Input: {concrete_example}
-    Expected: {expected_output}
-    
-    HINTS:
-    - {Helpful hint about approach}
-    - {Systems thinking hint}
-    - {Real-world connection}
-    """
-    def __init__(self, params):
-        raise NotImplementedError("Student implementation required")
-
-# %%
-#| hide
-#| export
-class MainClass:
-    """Complete implementation (hidden from students)."""
-    def __init__(self, params):
-        # Actual working implementation
-        pass
-
-# %% [markdown]
-"""
-## 🧪 Test Your Implementation
-"""
-
-# %%
-# Test with real data
-try:
-    # Test student implementation
-    result = MainClass(real_data_example)
-    print(f"✅ Success: {result}")
-except NotImplementedError:
-    print("⚠️ Implement the class above first!")
-
-# Visual feedback (development only - not exported)
-def show_results(data):
-    """Show visual confirmation of working code."""
-    plt.figure(figsize=(10, 6))
-    # Visualization code
-    plt.show()
-
-if _should_show_plots():
-    show_results(real_data)
-```
-
-### Step 4: Create Tests with Real Data
-Create `modules/{module}/tests/test_{module}.py`:
-
-```python
-import pytest
-import numpy as np
-from {module}_dev import MainClass
-
-def test_with_real_data():
-    """Test with actual production data."""
-    # Use real datasets, not mocks
-    real_data = load_real_dataset()
-    
-    instance = MainClass(real_data)
-    result = instance.process()
-    
-    # Test real properties
-    assert result.shape == expected_real_shape
-    assert result.dtype == expected_real_dtype
-    # Test with actual data characteristics
-```
-
-### Step 5: Convert and Export
-```bash
-# Convert to notebook (using Jupytext)
-tito module notebooks --module {module}
-
-# Export to package
-python bin/tito.py sync --module {module}
-
-# Test everything
-python bin/tito.py test --module {module}
-```
-
-## 🏷️ NBDev Directives
-
-### Core Directives
-- `#| default_exp core.{module}` - Sets export destination
-- `#| export` - Marks code for export to package
-- `#| hide` + `#| export` - Hidden implementation (instructor solution)
-- `# %% [markdown]` - Markdown cells for explanations
-- `# %%` - Code cells
-
-### Educational Structure
-- **Concept explanation** → **Implementation guidance** → **Hidden solution** → **Testing** → **Visual feedback**
-
-## 🎨 Difficulty System
-
-- **🟢 Easy (5-10 min)**: Constructor, properties, basic operations
-- **🟡 Medium (10-20 min)**: Conditional logic, data processing, error handling  
-- **🔴 Hard (20+ min)**: Complex algorithms, system integration, optimization
-
-## 📋 Implementation Guidelines
-
-### Students Implement (Core Learning)
-- **Main functionality**: Core algorithms and data structures
-- **Data processing**: Loading, preprocessing, batching
-- **Error handling**: Input validation, type checking
-- **Basic operations**: Mathematical operations, transformations
-
-### Students Receive (Focus on Learning Goals)
-- **Complex setup**: Download progress bars, caching systems
-- **Utility functions**: Visualization, debugging helpers
-- **Advanced features**: Optimization, GPU support
-- **Infrastructure**: Test frameworks, import management
-
-### TODO Guidance Quality
-```python
-"""
-TODO: {Clear, specific task}
-
-APPROACH:
-1. {Concrete first step}
-2. {Concrete second step}  
-3. {Concrete third step}
-
-EXAMPLE:
-Input: {actual_data_example}
-Expected: {concrete_expected_output}
-
-HINTS:
-- {Helpful guidance without giving code}
-- {Systems thinking consideration}
-- {Real-world connection}
-
-SYSTEMS THINKING:
-- {Performance consideration}
-- {Scalability question}
-- {User experience aspect}
-"""
-```
-
-## 🗂️ Module Structure
-
-```
-modules/{module}/
-├── {module}_dev.py              # 🔧 Complete implementation
-├── {module}_dev.ipynb           # 📓 Generated notebook
-├── tests/
-│   └── test_{module}.py         # 🧪 Real data tests
-├── README.md                    # 📖 Module guide
-└── data/                        # 📊 Cached datasets (if needed)
-```
-
-## ✅ Quality Standards
-
-### Before Release
-- [ ] Uses real data, not synthetic/mock data
-- [ ] Includes progress feedback for long operations
-- [ ] Visual feedback functions (development only)
-- [ ] Tests use actual datasets at realistic scales
-- [ ] TODO guidance includes systems thinking
-- [ ] Clean separation between development and exports
-- [ ] Follows "Build → Use → Understand" progression
-
-### Integration Requirements
-- [ ] Exports correctly to `tinytorch.core.{module}`
-- [ ] No circular dependencies
-- [ ] Consistent with existing module patterns
-- [ ] Compatible with TinyTorch CLI tools
-
-## 💡 Best Practices
-
-### Development Process
-1. **Start with real data**: Choose production dataset first
-2. **Write complete implementation**: Get it working before adding markers
-3. **Add rich feedback**: Visual confirmation, progress indicators
-4. **Test the student path**: Follow your own TODO guidance
-5. **Optimize user experience**: Consider performance, caching, error messages
-
-### Systems Thinking
-- **Performance**: How does this scale with larger datasets?
-- **Caching**: How do we avoid repeated expensive operations?
-- **User Experience**: How do students know the code is working?
-- **Production Relevance**: How does this connect to real ML systems?
-
-### Educational Design
-- **Immediate gratification**: Students see results quickly
-- **Progressive complexity**: Build understanding step by step
-- **Real-world connections**: Connect every concept to production
-- **Visual confirmation**: Students see their code working
-
-## 🔄 Continuous Improvement
-
-After teaching with a module:
-1. **Monitor student experience**: Where do they get stuck?
-2. **Improve guidance**: Better TODO instructions, clearer hints
-3. **Enhance feedback**: More visual confirmation, better progress indicators
-4. **Optimize performance**: Faster data loading, better caching
-5. **Update documentation**: Share learnings with other developers
-
-## 🎯 Success Metrics
-
-**Students should be able to:**
-- Explain what they built in simple terms
-- Modify code to solve related problems
-- Connect module concepts to real ML systems
-- Debug issues by understanding the system
-
-**Modules should achieve:**
-- High student engagement and completion rates
-- Smooth progression to next modules
-- Real-world relevance and production quality
-- Consistent patterns across the curriculum
-
----
-
-**Remember**: We're teaching ML systems engineering, not just algorithms. Every module should reflect real-world practices and challenges while maintaining the "Build → Use → Understand" educational cycle. 
\ No newline at end of file
diff --git a/docs/development/module-template.md b/docs/development/module-template.md
deleted file mode 100644
index d28bb53f..00000000
--- a/docs/development/module-template.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# Module Template: "Where This Code Lives" Section
-
-## 📦 Where This Code Lives in the Final Package
-
-**Learning Side:** You work in `modules/{module_name}/{module_name}_dev.py`  
-**Building Side:** Code exports to `tinytorch.core.{destination}`
-
-```python
-# Final package structure:
-from tinytorch.core.{destination} import {exported_classes}
-from tinytorch.core.tensor import Tensor
-```
-
-**Why this matters:**
-- **Learning:** Focused modules for deep understanding
-- **Production:** Proper organization like industry frameworks
-- **Consistency:** Related functionality grouped together
-
-## Template Variables
-
-Replace these placeholders in each module:
-
-- `{module_name}`: The module directory name (e.g., "tensor", "layers", "cnn")
-- `{destination}`: Where the code exports in the final package (e.g., "tensor", "layers", "activations")
-- `{exported_classes}`: The main classes/functions being exported (e.g., "Tensor", "Dense, Conv2D", "ReLU, Sigmoid")
-
-## Examples
-
-### Tensor Module
-```python
-# Learning Side: modules/tensor/tensor_dev.py
-# Building Side: tinytorch.core.tensor
-from tinytorch.core.tensor import Tensor
-```
-
-### Layers Module  
-```python
-# Learning Side: modules/layers/layers_dev.py
-# Building Side: tinytorch.core.layers
-from tinytorch.core.layers import Dense, Conv2D
-```
-
-### CNN Module
-```python
-# Learning Side: modules/cnn/cnn_dev.py
-# Building Side: tinytorch.core.layers (Conv2D lives with Dense)
-from tinytorch.core.layers import Dense, Conv2D
-```
-
-## Usage Instructions
-
-1. Copy this template section into each module's `*_dev.py` file
-2. Replace the template variables with module-specific values
-3. Update the `#| default_exp` directive to match the destination
-4. Ensure the exported classes match what's actually being exported 
\ No newline at end of file
diff --git a/docs/development/nbgrader-integration.md b/docs/development/nbgrader-integration.md
deleted file mode 100644
index ab0c7145..00000000
--- a/docs/development/nbgrader-integration.md
+++ /dev/null
@@ -1,386 +0,0 @@
-# NBGrader Integration Guide
-
-This guide explains how TinyTorch integrates with nbgrader for enhanced assignment management and auto-grading.
-
-## Overview
-
-TinyTorch supports **three levels of student interaction**:
-
-1. **🎓 Self-Learning**: Regular student notebooks with rich educational content
-2. **📝 Assignments**: nbgrader-compatible assignments with auto-grading
-3. **🔧 Production**: Working package with instructor solutions
-
-## Architecture
-
-### Instructor Development Flow
-
-```mermaid
-graph TD
-    A[Complete Implementation<br/>modules/tensor/tensor_dev.py] --> B[NBDev Export<br/>tinytorch/core/tensor.py]
-    A --> C[Student Notebook<br/>modules/tensor/tensor_student.ipynb]
-    A --> D[nbgrader Assignment<br/>modules/tensor/tensor_assignment.ipynb]
-    D --> E[Auto-grading<br/>Grade submissions]
-    C --> F[Self-paced Learning]
-    B --> G[Working Package]
-```
-
-### Dual Marking System
-
-TinyTorch supports both marking systems simultaneously:
-
-```python
-# Enhanced module with both systems
-class Tensor:
-    def __init__(self, data):
-        """
-        Create a tensor from data.
-        
-        Args:
-            data: Input data (scalar, list, or numpy array)
-        """
-        #| exercise_start
-        #| hint: Use np.array() to convert input data
-        #| solution_test: tensor.shape should match input shape
-        #| difficulty: easy
-        
-        ### BEGIN SOLUTION
-        self._data = np.array(data)
-        ### END SOLUTION
-        
-        #| exercise_end
-        
-    ### BEGIN HIDDEN TESTS
-    def test_tensor_creation(self):
-        """Hidden tests for auto-grading"""
-        t = Tensor([1, 2, 3])
-        assert t.shape == (3,)
-        assert isinstance(t.data, np.ndarray)
-    ### END HIDDEN TESTS
-```
-
-## Usage
-
-### Generate Student Notebooks (Self-Learning)
-
-```bash
-# Generate regular student notebooks
-python bin/generate_student_notebooks.py --module tensor
-
-# Result: modules/tensor/tensor_student.ipynb
-# - Rich educational content
-# - TODO placeholders with hints
-# - Self-paced learning
-```
-
-### Generate nbgrader Assignments
-
-```bash
-# Generate nbgrader-compatible assignments
-python bin/generate_student_notebooks.py --module tensor --nbgrader
-
-# Result: modules/tensor/tensor_assignment.ipynb
-# - nbgrader markers preserved
-# - Auto-grading ready
-# - Hidden tests included
-```
-
-### Batch Generation
-
-```bash
-# Generate all modules
-python bin/generate_student_notebooks.py --all
-python bin/generate_student_notebooks.py --all --nbgrader
-```
-
-## nbgrader Configuration
-
-### Setup nbgrader Environment
-
-```bash
-# Install nbgrader
-pip install nbgrader
-
-# Initialize nbgrader in course directory
-nbgrader quickstart course_name
-
-# Configure nbgrader
-jupyter nbextension install --sys-prefix --py nbgrader --overwrite
-jupyter nbextension enable --sys-prefix --py nbgrader
-jupyter serverextension enable --sys-prefix --py nbgrader
-```
-
-### Course Configuration
-
-Create `nbgrader_config.py`:
-
-```python
-# nbgrader_config.py
-c = get_config()
-
-# Course settings
-c.CourseDirectory.course_id = "ml-systems-tinytorch"
-c.CourseDirectory.source_directory = "assignments"
-c.CourseDirectory.release_directory = "release"
-c.CourseDirectory.submitted_directory = "submitted"
-c.CourseDirectory.autograded_directory = "autograded"
-c.CourseDirectory.feedback_directory = "feedback"
-
-# Auto-grading settings
-c.Execute.timeout = 300  # 5 minutes per cell
-c.Execute.allow_errors = True
-c.Execute.error_on_timeout = True
-
-# Feedback settings
-c.ClearSolutions.code_stub = {
-    "python": "# YOUR CODE HERE\nraise NotImplementedError()"
-}
-```
-
-## Assignment Workflow
-
-### 1. Create Assignment
-
-```bash
-# Copy assignment notebook to nbgrader source
-cp modules/tensor/tensor_assignment.ipynb assignments/tensor/tensor.ipynb
-
-# Generate assignment
-nbgrader generate_assignment tensor
-
-# Release to students
-nbgrader release_assignment tensor
-```
-
-### 2. Student Submission
-
-Students work on assignments in the `release/tensor/` directory:
-
-```python
-# Students see this:
-class Tensor:
-    def __init__(self, data):
-        ### BEGIN SOLUTION
-        # YOUR CODE HERE
-        raise NotImplementedError()
-        ### END SOLUTION
-        
-    # Hidden tests run automatically
-```
-
-### 3. Auto-grading
-
-```bash
-# Collect submissions
-nbgrader collect tensor
-
-# Auto-grade submissions
-nbgrader autograde tensor
-
-# Generate feedback
-nbgrader generate_feedback tensor
-```
-
-## Advanced Features
-
-### 1. Partial Credit
-
-```python
-# In instructor version
-class Tensor:
-    def multiply(self, other):
-        ### BEGIN SOLUTION
-        # Full implementation (10 points)
-        result = self._data * other._data
-        return Tensor(result)
-        ### END SOLUTION
-        
-### BEGIN HIDDEN TESTS
-def test_multiply_basic():
-    """Basic multiplication (5 points)"""
-    t1 = Tensor([1, 2, 3])
-    t2 = Tensor([2, 3, 4])
-    result = t1.multiply(t2)
-    assert result.data.tolist() == [2, 6, 12]
-
-def test_multiply_advanced():
-    """Advanced multiplication (5 points)"""
-    t1 = Tensor([[1, 2], [3, 4]])
-    t2 = Tensor([[2, 3], [4, 5]])
-    result = t1.multiply(t2)
-    assert result.shape == (2, 2)
-### END HIDDEN TESTS
-```
-
-### 2. Progressive Difficulty
-
-```python
-# Easy exercise (auto-graded)
-### BEGIN SOLUTION
-def add_tensors(a, b):
-    return Tensor(a.data + b.data)
-### END SOLUTION
-
-# Medium exercise (auto-graded + manual review)
-### BEGIN SOLUTION
-def matrix_multiply(a, b):
-    # Implementation with error handling
-    if a.shape[1] != b.shape[0]:
-        raise ValueError("Incompatible shapes")
-    return Tensor(np.dot(a.data, b.data))
-### END SOLUTION
-
-# Hard exercise (manual grading)
-"""
-Design Question: Explain your tensor broadcasting strategy.
-Discuss trade-offs between memory usage and computation speed.
-"""
-```
-
-### 3. Integration with TinyTorch CLI
-
-Extend the `tito` CLI to support nbgrader:
-
-```bash
-# Generate assignments
-tito assignment --create tensor
-
-# Grade submissions
-tito assignment --grade tensor
-
-# Release feedback
-tito assignment --feedback tensor
-```
-
-## Benefits
-
-### For Instructors
-
-1. **Dual-purpose content**: Same source creates both learning and grading materials
-2. **Auto-grading**: Reduces grading workload significantly
-3. **Consistent evaluation**: Standardized testing across students
-4. **Detailed feedback**: Automatic feedback generation
-5. **Grade tracking**: Integration with LMS systems
-
-### For Students
-
-1. **Immediate feedback**: Know if implementation is correct
-2. **Progressive learning**: Build on verified foundations
-3. **Flexible learning**: Choose between self-paced or assignment modes
-4. **Real testing**: Same tests used in production package
-
-### For Course Management
-
-1. **Scalability**: Handle large class sizes
-2. **Consistency**: Same quality across all students
-3. **Analytics**: Track student progress and common issues
-4. **Reusability**: Assignments work across semesters
-
-## Migration Strategy
-
-### Phase 1: Enhanced Marking (Current)
-- Add nbgrader markers to existing modules
-- Enhance student notebook generator
-- Test dual generation system
-
-### Phase 2: nbgrader Integration
-- Set up nbgrader environment
-- Configure auto-grading workflows
-- Train instructors on new system
-
-### Phase 3: Full Deployment
-- Deploy to production course
-- Collect feedback and iterate
-- Expand to all modules
-
-## Best Practices
-
-### 1. Test Design
-
-```python
-# Good: Specific, focused tests
-def test_tensor_creation():
-    t = Tensor([1, 2, 3])
-    assert t.shape == (3,)
-    assert t.data.tolist() == [1, 2, 3]
-
-# Good: Edge case testing
-def test_tensor_empty():
-    t = Tensor([])
-    assert t.shape == (0,)
-    assert t.size == 0
-```
-
-### 2. Student Guidance
-
-```python
-# Good: Clear instructions
-def forward(self, x):
-    """
-    Forward pass through the layer.
-    
-    Args:
-        x: Input tensor of shape (batch_size, input_size)
-    
-    Returns:
-        Output tensor of shape (batch_size, output_size)
-    
-    TODO: Implement matrix multiplication and bias addition
-    - Use self.weights for the weight matrix
-    - Use self.bias for the bias vector
-    - Return Tensor(result)
-    """
-    ### BEGIN SOLUTION
-    result = x.data @ self.weights
-    if self.use_bias:
-        result += self.bias
-    return Tensor(result)
-    ### END SOLUTION
-```
-
-### 3. Error Handling
-
-```python
-# Include error handling in solutions
-def matrix_multiply(a, b):
-    ### BEGIN SOLUTION
-    if a.shape[1] != b.shape[0]:
-        raise ValueError(f"Cannot multiply shapes {a.shape} and {b.shape}")
-    
-    result = np.dot(a.data, b.data)
-    return Tensor(result)
-    ### END SOLUTION
-```
-
-## Troubleshooting
-
-### Common Issues
-
-1. **Marker conflicts**: Ensure nbgrader and TinyTorch markers don't interfere
-2. **Cell metadata**: Check that nbgrader cell metadata is preserved
-3. **Import issues**: Verify that package imports work in both environments
-4. **Test failures**: Ensure hidden tests are robust and fair
-
-### Debug Commands
-
-```bash
-# Check notebook structure
-nbgrader validate assignment.ipynb
-
-# Test auto-grading locally
-nbgrader autograde --create --force assignment
-
-# Validate student notebook
-python -c "from modules.tensor.tensor_assignment import *; print('✅ Imports working')"
-```
-
-## Conclusion
-
-The enhanced TinyTorch + nbgrader system provides:
-
-- **Flexibility**: Support both self-learning and formal assessment
-- **Scalability**: Handle large courses with automated grading
-- **Quality**: Consistent, fair evaluation across all students
-- **Efficiency**: Reduced instructor workload while maintaining quality
-- **Integration**: Seamless with existing TinyTorch architecture
-
-This system transforms TinyTorch from a learning framework into a complete course management solution while preserving its educational philosophy. 
\ No newline at end of file
diff --git a/docs/development/quick-module-reference.md b/docs/development/quick-module-reference.md
deleted file mode 100644
index c09bc983..00000000
--- a/docs/development/quick-module-reference.md
+++ /dev/null
@@ -1,239 +0,0 @@
-# 🚀 Quick Module Reference
-
-**Fast reference for module development - commands, patterns, and essential workflows.**
-
-## 🔥 **Essential Commands**
-
-### **System Commands**
-```bash
-tito system info              # System information and course navigation
-tito system doctor            # Environment diagnosis
-tito system jupyter           # Start Jupyter Lab
-```
-
-### **Module Commands**
-```bash
-tito module status            # Check all module status
-tito module status --details  # Detailed file structure
-tito module test --module X   # Test specific module
-tito module test --all        # Test all modules
-tito module notebooks --module X  # Convert Python to notebook (Jupytext)
-```
-
-### **Package Commands**
-```bash
-tito package sync            # Export all notebooks to package
-tito package sync --module X # Export specific module
-tito package reset           # Reset package to clean state
-tito package nbdev --export  # Run nbdev export
-```
-
-## 🎯 **Development Workflow**
-
-### **1. Module Planning**
-- [ ] Choose real dataset (CIFAR-10, ImageNet, etc.)
-- [ ] Define learning objectives and progression
-- [ ] Identify production ML connections
-- [ ] Plan visual feedback and progress indicators
-- [ ] Decide what to provide vs. what students implement
-
-### **2. Write Complete Implementation**
-Create `modules/{module}/{module}_dev.py`:
-```python
-# %% [markdown]
-# # Module: {Title}
-# 
-# ## 🎯 Learning Pattern: Build → Use → [Pattern]
-# **Pattern Choice**: [Reflect/Analyze/Optimize]
-# **Rationale**: [Why this pattern fits]
-# 
-# Learning objectives and overview
-
-# %%
-#| default_exp core.{module}
-
-import numpy as np
-
-# %%
-#| export
-class YourClass:
-    """
-    {Description of the class}
-    
-    TODO: {What students need to implement}
-    
-    APPROACH:
-    1. {Step 1 with specific guidance}
-    2. {Step 2 with specific guidance}
-    3. {Step 3 with specific guidance}
-    
-    EXAMPLE:
-    Input: {concrete_example}
-    Expected: {expected_output}
-    
-    HINTS:
-    - {Helpful hint about approach}
-    - {Systems thinking hint}
-    - {Real-world connection}
-    """
-    def __init__(self, params):
-        raise NotImplementedError("Student implementation required")
-
-# %%
-#| hide
-#| export
-class YourClass:
-    """Complete implementation (hidden from students)."""
-    def __init__(self, params):
-        # Actual working implementation
-        pass
-```
-
-### **3. Design Pattern-Specific Activities**
-- **Reflect**: Add reflection questions and trade-off analysis
-- **Analyze**: Include profiling tools and debugging exercises
-- **Optimize**: Create performance challenges and iteration tasks
-
-### **4. Convert and Generate**
-```bash
-# Convert Python to notebook (using Jupytext)
-tito module notebooks --module {module}
-
-# Generate student version
-python3 bin/generate_student_notebooks.py --module {module}
-```
-
-### **5. Test and Verify**
-```bash
-# Test both versions work
-jupyter lab modules/{module}/{module}_dev.ipynb
-jupyter lab modules/{module}/{module}_dev_student.ipynb
-
-# Test integration
-tito package sync --module {module}
-tito module test --module {module}
-```
-
-## 🏷️ **Essential NBDev Directives**
-
-| Directive | Purpose | Example |
-|-----------|---------|---------|
-| `#| default_exp core.{module}` | Set export destination | Top of file |
-| `#| export` | Export to package | Classes/functions |
-| `#| hide` | Hide from students | Instructor solutions |
-| `#| hide #| export` | Export but hide | Complete implementations |
-
-## 📁 **Module Structure**
-
-```
-modules/{module}/
-├── {module}_dev.py          # Main development file (Jupytext format)
-├── {module}_dev.ipynb       # Jupyter notebook (auto-generated)
-├── module.yaml              # Simple metadata (name, title, description, etc.)
-├── tests/
-│   └── test_{module}.py     # Comprehensive pytest tests
-└── README.md                # Module overview and usage
-```
-
-## ⚡ **Common Patterns**
-
-### **Real Data Loading**
-```python
-# ✅ Good: Real data with progress feedback
-def load_cifar10():
-    """Load CIFAR-10 with progress bar."""
-    from tqdm import tqdm
-    # Show download progress
-    # Cache for performance
-    # Handle errors gracefully
-```
-
-### **Visual Feedback (Development Only)**
-```python
-# Development visualization (not exported)
-def _show_results(data):
-    """Show visual confirmation (development only)."""
-    if not _in_development():
-        return
-    plt.figure(figsize=(10, 6))
-    # Rich visualization
-    plt.show()
-```
-
-### **Student Implementation Guidance**
-```python
-def method_to_implement(self):
-    """
-    TODO: Implement this method
-    
-    APPROACH:
-    1. Parse input data and validate shapes
-    2. Apply the core algorithm step by step
-    3. Return results in expected format
-    
-    EXAMPLE:
-    Input: tensor([1, 2, 3])
-    Expected: tensor([2, 4, 6])
-    
-    HINTS:
-    - Start with the simple case first
-    - Think about edge cases (empty input, wrong shapes)
-    - Use vectorized operations for performance
-    """
-    raise NotImplementedError("Student implementation required")
-```
-
-## 🧪 **Testing Patterns**
-
-### **Test with Real Data**
-```python
-def test_with_real_data():
-    """Test with actual production data."""
-    # Load real dataset
-    data = load_real_cifar10_sample()
-    
-    # Test with realistic parameters
-    model = YourClass(realistic_params)
-    result = model.process(data)
-    
-    # Verify real properties
-    assert result.shape == expected_shape
-    assert result.dtype == expected_dtype
-```
-
-### **Performance Testing**
-```python
-def test_performance():
-    """Ensure reasonable performance."""
-    import time
-    
-    large_data = create_realistic_large_dataset()
-    start = time.time()
-    result = process(large_data)
-    elapsed = time.time() - start
-    
-    # Should complete in reasonable time
-    assert elapsed < 5.0  # 5 seconds max
-```
-
-## 🎯 **Quality Checklist**
-
-### **Before Release**
-- [ ] Uses real data throughout (no synthetic/mock data)
-- [ ] Includes progress feedback for long operations
-- [ ] Provides visual confirmation of working code
-- [ ] Tests with realistic data scales
-- [ ] Follows "Build → Use → [Pattern]" progression
-- [ ] Comprehensive TODO guidance with examples
-- [ ] Clean separation: rich development, clean exports
-
-### **Integration Testing**
-- [ ] Module exports correctly to `tinytorch.core.{module}`
-- [ ] No circular import issues
-- [ ] Compatible with existing modules
-- [ ] Works with TinyTorch CLI tools
-- [ ] Consistent with established patterns
-
----
-
-**💡 Pro Tip**: Start with real data and production concerns first. Educational structure and TODO guidance come after you have working, realistic code. 
\ No newline at end of file
diff --git a/docs/pedagogy/testing-architecture.md b/docs/pedagogy/testing-architecture.md
index 58055341..306e12c2 100644
--- a/docs/pedagogy/testing-architecture.md
+++ b/docs/pedagogy/testing-architecture.md
@@ -118,7 +118,7 @@ python -m pytest tests/test_tensor.py -v
 When creating new modules, follow this pattern:
 
 ### 1. Core Implementation
-- Implement basic functionality in `modules/{module}/{module}_dev.py`
+- Implement basic functionality in `assignments/source/{module}/{module}_dev.py`
 - Add `#| export` directives for NBDev export
 - Include both student and instructor versions
 
diff --git a/tinytorch/_modidx.py b/tinytorch/_modidx.py
index 07682c61..3a5bce3a 100644
--- a/tinytorch/_modidx.py
+++ b/tinytorch/_modidx.py
@@ -159,6 +159,10 @@ d = { 'settings': { 'branch': 'main',
                                                                                  'tinytorch/core/utils.py'),
                                       'tinytorch.core.utils.DeveloperProfile.__init__': ( '00_setup/setup_dev.html#developerprofile.__init__',
                                                                                           'tinytorch/core/utils.py'),
+                                      'tinytorch.core.utils.DeveloperProfile.__str__': ( '00_setup/setup_dev.html#developerprofile.__str__',
+                                                                                         'tinytorch/core/utils.py'),
+                                      'tinytorch.core.utils.DeveloperProfile.get_profile_info': ( '00_setup/setup_dev.html#developerprofile.get_profile_info',
+                                                                                                  'tinytorch/core/utils.py'),
                                       'tinytorch.core.utils.DeveloperProfile.get_signature': ( '00_setup/setup_dev.html#developerprofile.get_signature',
                                                                                                'tinytorch/core/utils.py'),
                                       'tinytorch.core.utils.SystemInfo': ('00_setup/setup_dev.html#systeminfo', 'tinytorch/core/utils.py'),
diff --git a/tinytorch/core/utils.py b/tinytorch/core/utils.py
index b3181eee..3f0a7a4b 100644
--- a/tinytorch/core/utils.py
+++ b/tinytorch/core/utils.py
@@ -3,133 +3,129 @@
 # %% auto 0
 __all__ = ['hello_tinytorch', 'add_numbers', 'SystemInfo', 'DeveloperProfile']
 
-# %% ../../assignments/source/00_setup/setup_dev.ipynb 3
-def hello_tinytorch():
-    """
-    A simple hello world function for TinyTorch.
-    
-    TODO: Implement this function to display TinyTorch ASCII art and welcome message.
-    Load the flame art from tinytorch_flame.txt file with graceful fallback.
-    
-    HINTS:
-    1. Try to load ASCII art from 'tinytorch_flame.txt' in current directory
-    2. If file exists, read and print the content
-    3. Add "Tiny🔥Torch" and "Build ML Systems from Scratch!" messages
-    4. If file doesn't exist, just print the emoji version
-    5. Handle any exceptions gracefully
-    
-    EXAMPLE OUTPUT:
-    [ASCII art from file]
-    Tiny🔥Torch
-    Build ML Systems from Scratch!
-    """
-    # YOUR CODE HERE
-    raise NotImplementedError()
+# %% ../../assignments/source/00_setup/setup_dev.ipynb 2
+# Required imports for TinyTorch utilities
+import sys
+import platform
+from datetime import datetime
+import os
+from pathlib import Path
 
 # %% ../../assignments/source/00_setup/setup_dev.ipynb 4
+def hello_tinytorch():
+    """
+    Display a welcome message for TinyTorch.
+    
+    This function should:
+    1. Try to load ASCII art from 'tinytorch_flame.txt' if it exists
+    2. If the file doesn't exist, display a simple text banner
+    3. Print "TinyTorch" and "Build ML Systems from Scratch!"
+    4. Handle any exceptions gracefully
+    """
+    ### BEGIN SOLUTION
+    # YOUR CODE HERE
+    raise NotImplementedError()
+    ### END SOLUTION
+
+# %% ../../assignments/source/00_setup/setup_dev.ipynb 5
 def add_numbers(a, b):
     """
     Add two numbers together.
     
-    TODO: Implement addition of two numbers.
-    This is the foundation of all mathematical operations in ML.
-    
     Args:
         a: First number (int or float)
         b: Second number (int or float)
         
     Returns:
         Sum of a and b
-        
-    EXAMPLE:
-    add_numbers(2, 3) should return 5
-    add_numbers(1.5, 2.5) should return 4.0
     """
+    ### BEGIN SOLUTION
     # YOUR CODE HERE
     raise NotImplementedError()
+    ### END SOLUTION
 
-# %% ../../assignments/source/00_setup/setup_dev.ipynb 9
+# %% ../../assignments/source/00_setup/setup_dev.ipynb 6
 class SystemInfo:
     """
-    Simple system information class.
-    
-    TODO: Implement this class to collect and display system information.
-    
-    REQUIREMENTS:
-    1. __init__: Collect Python version, platform, and machine information
-    2. __str__: Return formatted system info string
-    3. is_compatible: Check if Python version >= 3.8
-    
-    HINTS:
-    - Use sys.version_info for Python version
-    - Use platform.system() for platform name  
-    - Use platform.machine() for machine architecture
-    - Store these as instance attributes in __init__
+    A class for collecting and displaying system information.
     """
     
     def __init__(self):
         """
-        Initialize system information collection.
-        
-        TODO: Collect Python version, platform, and machine information.
-        Store as instance attributes: self.python_version, self.platform, self.machine
+        Initialize the SystemInfo object.
+        Collect Python version, platform, and machine information.
         """
+        ### BEGIN SOLUTION
         # YOUR CODE HERE
         raise NotImplementedError()
+        ### END SOLUTION
     
     def __str__(self):
         """
-        Return human-readable system information.
-        
-        TODO: Format system info as a readable string.
-        FORMAT: "Python X.Y on Platform (Architecture)"
-        EXAMPLE: "Python 3.9 on Darwin (arm64)"
+        Return a formatted string representation of system information.
+        Format: "Python X.Y.Z on Platform (Architecture)"
         """
+        ### BEGIN SOLUTION
         # YOUR CODE HERE
         raise NotImplementedError()
+        ### END SOLUTION
     
     def is_compatible(self):
         """
-        Check if system meets minimum requirements.
-        
-        TODO: Check if Python version >= 3.8
-        Return True if compatible, False otherwise
+        Check if the Python version is compatible (>= 3.8).
+        Returns True if compatible, False otherwise.
         """
+        ### BEGIN SOLUTION
         # YOUR CODE HERE
         raise NotImplementedError()
+        ### END SOLUTION
 
-# %% ../../assignments/source/00_setup/setup_dev.ipynb 15
+# %% ../../assignments/source/00_setup/setup_dev.ipynb 7
 class DeveloperProfile:
     """
-    Developer profile for personalizing TinyTorch experience.
-    
-    TODO: OPTIONAL CHALLENGE - Implement this class for extra credit!
-    
-    REQUIREMENTS:
-    1. Store developer information (name, email, etc.)
-    2. Load ASCII art from file with fallback
-    3. Generate formatted profile display
-    4. Create professional signature
-    
-    This is an advanced exercise - only attempt after completing the required parts!
+    A class representing a developer profile.
     """
     
-    def __init__(self, name="Student", email="student@example.com"):
+    def __init__(self, name="Student", email="student@example.com", affiliation="TinyTorch Community", specialization="ML Systems"):
         """
-        Initialize developer profile.
+        Initialize a developer profile.
         
-        TODO: Store developer information with defaults.
-        Feel free to customize with your own info!
+        Args:
+            name: Developer's name
+            email: Developer's email
+            affiliation: Developer's affiliation or organization
+            specialization: Developer's area of specialization
         """
-        # YOUR CODE HERE (OPTIONAL)
-        self.name = name
-        self.email = email
+        ### BEGIN SOLUTION
+        # YOUR CODE HERE
+        raise NotImplementedError()
+        ### END SOLUTION
+    
+    def __str__(self):
+        """
+        Return a basic string representation of the developer.
+        Format: "Name (email)"
+        """
+        ### BEGIN SOLUTION
+        # YOUR CODE HERE
+        raise NotImplementedError()
+        ### END SOLUTION
     
     def get_signature(self):
         """
-        Get a short signature for code headers.
-        
-        TODO: Return a signature like "Built by Name (email)"
+        Return a formatted signature for the developer.
+        Should include name, affiliation, and specialization.
         """
-        # YOUR CODE HERE (OPTIONAL)
-        return f"Built by {self.name} ({self.email})"
+        ### BEGIN SOLUTION
+        # YOUR CODE HERE
+        raise NotImplementedError()
+        ### END SOLUTION
+    
+    def get_profile_info(self):
+        """
+        Return comprehensive profile information as a dictionary.
+        """
+        ### BEGIN SOLUTION
+        # YOUR CODE HERE
+        raise NotImplementedError()
+        ### END SOLUTION