diff --git a/modules/source/01_tensor/tensor_dev.ipynb b/modules/source/01_tensor/tensor_dev.ipynb
index c6d011b6..9cad66bf 100644
--- a/modules/source/01_tensor/tensor_dev.ipynb
+++ b/modules/source/01_tensor/tensor_dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "6ca4b9f5",
+   "id": "22bf7b48",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -51,7 +51,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3dcaaffc",
+   "id": "7d24677b",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -69,7 +69,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e70ae12a",
+   "id": "447a0b7a",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -116,7 +116,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "7a1e48b5",
+   "id": "c2b4bc17",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -175,7 +175,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "42f2279e",
+   "id": "1dc8a950",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -214,7 +214,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "cb1e99f0",
+   "id": "334562a5",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -252,7 +252,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4a090be0",
+   "id": "27b3b08d",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -724,7 +724,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "a49cddfd",
+   "id": "345f0782",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -742,7 +742,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "79195fe8",
+   "id": "503244d4",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -791,7 +791,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "7cbed527",
+   "id": "5176cde0",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -839,7 +839,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "30f53e64",
+   "id": "45461424",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 2
@@ -882,7 +882,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e13b5c91",
+   "id": "7ba6f505",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -900,7 +900,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "26ab9e58",
+   "id": "9471ca95",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -957,7 +957,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "8ab4eb75",
+   "id": "453ed0e5",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 2
@@ -1057,7 +1057,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "75e72654",
+   "id": "ae6dca6f",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1075,7 +1075,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "434f6550",
+   "id": "d1bf193f",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -1132,7 +1132,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "de04fa2e",
+   "id": "23a70fb2",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 2
@@ -1235,7 +1235,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "2f4cd90a",
+   "id": "a320a34f",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1253,7 +1253,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e497f3d1",
+   "id": "04a65af9",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -1323,7 +1323,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "c944cd8b",
+   "id": "509140c2",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 2
@@ -1417,7 +1417,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e8312574",
+   "id": "21664f47",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1435,7 +1435,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "66d6beb6",
+   "id": "5f9edd66",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -1508,7 +1508,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "71042cd1",
+   "id": "8b900870",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 2
@@ -1583,7 +1583,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "9d5518b2",
+   "id": "a98400bf",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 2
@@ -1644,7 +1644,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "23b79c43",
+   "id": "c74f78e6",
    "metadata": {
     "lines_to_next_cell": 1
    },
@@ -1666,7 +1666,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c8fdde9c",
+   "id": "6766dc8a",
    "metadata": {
     "lines_to_next_cell": 2,
     "nbgrader": {
@@ -1794,7 +1794,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "f0f02362",
+   "id": "602da67a",
    "metadata": {
     "cell_marker": "\"\"\""
    },
diff --git a/modules/source/02_activations/activations_dev.ipynb b/modules/source/02_activations/activations_dev.ipynb
index 91358874..f3dc5445 100644
--- a/modules/source/02_activations/activations_dev.ipynb
+++ b/modules/source/02_activations/activations_dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "8f7f092b",
+   "id": "41637b5b",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -34,7 +34,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ba7543b3",
+   "id": "eb80f71c",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -59,7 +59,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "5f04cb4a",
+   "id": "ad445b19",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -78,7 +78,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3042497e",
+   "id": "7fc4b3ae",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -102,7 +102,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "609861d1",
+   "id": "6c49b0a7",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -144,7 +144,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "9b7b4834",
+   "id": "a82d5ffc",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -166,7 +166,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "29845a4a",
+   "id": "d954190f",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -190,7 +190,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d5fc598c",
+   "id": "1d26aa84",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -228,7 +228,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b6cb596e",
+   "id": "cd112f28",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -287,7 +287,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "84674501",
+   "id": "87407a56",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -303,7 +303,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3fbc497a",
+   "id": "8599e53a",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -344,7 +344,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "a076a2f1",
+   "id": "96438263",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -386,7 +386,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a079c21f",
+   "id": "6bdad44d",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -442,7 +442,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "8cf41efa",
+   "id": "853265df",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -458,7 +458,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9bf84e40",
+   "id": "e3f2e5fd",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -505,7 +505,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "26a36cf2",
+   "id": "d137e456",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -544,7 +544,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a93086db",
+   "id": "3a3ec4c5",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -600,7 +600,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4488836b",
+   "id": "b2ad2baa",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -616,7 +616,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "46388fa5",
+   "id": "b92572ae",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -664,7 +664,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "f341ff48",
+   "id": "d1cdd503",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -707,7 +707,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a9684ba3",
+   "id": "90f15779",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -768,7 +768,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "7c8ef48f",
+   "id": "eb655b3b",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -784,7 +784,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c42d7ec8",
+   "id": "838060ac",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -832,7 +832,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ba4edcdd",
+   "id": "a8047ea8",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -870,7 +870,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "2821ef9e",
+   "id": "aa266bb7",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -942,7 +942,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "70c31533",
+   "id": "80e6ad27",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -958,7 +958,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "852f5832",
+   "id": "f3db3810",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -1016,7 +1016,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "b0107716",
+   "id": "2db83cef",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 2
@@ -1029,7 +1029,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "87c16b51",
+   "id": "428eaa1b",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -1049,7 +1049,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0a812659",
+   "id": "fe7666b9",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1063,7 +1063,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0015101e",
+   "id": "fac9ee55",
    "metadata": {
     "lines_to_next_cell": 2,
     "nbgrader": {
@@ -1162,7 +1162,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d0575067",
+   "id": "6a9cc930",
    "metadata": {
     "cell_marker": "\"\"\""
    },
diff --git a/modules/source/03_layers/layers_dev.ipynb b/modules/source/03_layers/layers_dev.ipynb
index 76e3f822..2007b4b5 100644
--- a/modules/source/03_layers/layers_dev.ipynb
+++ b/modules/source/03_layers/layers_dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "2cfa2aae",
+   "id": "46b4a258",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -53,7 +53,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c7a84c05",
+   "id": "bc3a80e9",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -77,7 +77,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e52c72c2",
+   "id": "76d31667",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -101,7 +101,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e05eee85",
+   "id": "e0421bae",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -139,7 +139,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "f489f983",
+   "id": "6670b0b1",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -160,7 +160,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "fff4865c",
+   "id": "2dc8d8c8",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -211,7 +211,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "da931144",
+   "id": "a973eb44",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -335,7 +335,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "77988775",
+   "id": "d4cbdf9d",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -351,7 +351,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4c2e0b2e",
+   "id": "174fe10a",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -411,7 +411,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d2fa31b7",
+   "id": "e961f791",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -473,7 +473,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "88715659",
+   "id": "b924d865",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -556,6 +556,10 @@
     "        return Tensor(output_data)\n",
     "        ### END SOLUTION\n",
     "\n",
+    "    def __call__(self, x, training=True):\n",
+    "        \"\"\"Allows the layer to be called like a function.\"\"\"\n",
+    "        return self.forward(x, training)\n",
+    "\n",
     "    def parameters(self):\n",
     "        \"\"\"Dropout has no parameters.\"\"\"\n",
     "        return []\n",
@@ -566,7 +570,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0d33ff6f",
+   "id": "ee0bc9a1",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -582,7 +586,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cb0b7ae8",
+   "id": "c76974a1",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -658,7 +662,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4b4aac2f",
+   "id": "231dae31",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 2
@@ -718,7 +722,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0f4f3b7d",
+   "id": "bbc4aad9",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -777,7 +781,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "99df0451",
+   "id": "0ca58dc7",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -832,7 +836,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5e82e08c",
+   "id": "a9b7ae8a",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -873,7 +877,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "85ea5db5",
+   "id": "5570a366",
    "metadata": {
     "lines_to_next_cell": 1
    },
@@ -895,7 +899,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ebddc165",
+   "id": "b2e11bf8",
    "metadata": {
     "lines_to_next_cell": 2,
     "nbgrader": {
@@ -986,7 +990,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "31c54ee3",
+   "id": "4c9212f9",
    "metadata": {
     "cell_marker": "\"\"\""
    },
diff --git a/modules/source/04_losses/losses_dev.ipynb b/modules/source/04_losses/losses_dev.ipynb
index e0301d50..39f53cf2 100644
--- a/modules/source/04_losses/losses_dev.ipynb
+++ b/modules/source/04_losses/losses_dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "7a46b4ac",
+   "id": "9d798b1c",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "95565c7e",
+   "id": "91804987",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -59,7 +59,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "1c9b95bd",
+   "id": "c09dc686",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -80,7 +80,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "054331fd",
+   "id": "51189bc1",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -113,7 +113,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e60f5944",
+   "id": "cc227c2d",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -189,7 +189,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "2a8ac601",
+   "id": "49e5039b",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -235,7 +235,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3e628237",
+   "id": "b1e1cbd0",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -247,7 +247,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "84a9e420",
+   "id": "820e9937",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -297,7 +297,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c570112b",
+   "id": "854758b3",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -348,7 +348,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7e637f17",
+   "id": "6b57e650",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -389,7 +389,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "54c1f877",
+   "id": "b8be9f2c",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -459,7 +459,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "24575e1c",
+   "id": "aca5154a",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -531,7 +531,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f7e6aa15",
+   "id": "7391538b",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -577,7 +577,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "cde7a8f4",
+   "id": "0b9b254c",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -670,7 +670,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0e3a2600",
+   "id": "eb59fb50",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -746,7 +746,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "45e2f990",
+   "id": "c59fbbfd",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -797,7 +797,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "30fd6c76",
+   "id": "599727d1",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -906,7 +906,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3bd407b8",
+   "id": "54a20f3f",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -982,7 +982,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "985dd530",
+   "id": "1bab9d23",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -1033,7 +1033,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d7c0a96b",
+   "id": "ca40b581",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1090,7 +1090,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c5baee20",
+   "id": "76b4eb81",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -1146,7 +1146,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e2216e97",
+   "id": "b90c91f0",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -1211,7 +1211,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "435562a9",
+   "id": "e2fc1aa7",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1286,7 +1286,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0e2b66af",
+   "id": "573fa75d",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -1336,7 +1336,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "518ead17",
+   "id": "b7f12c78",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -1393,7 +1393,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "7d0c2aa4",
+   "id": "4c6ebac9",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1457,7 +1457,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7345a14f",
+   "id": "d0b635c1",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -1513,7 +1513,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "73d7096f",
+   "id": "d770e887",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -1526,7 +1526,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5a5f3a29",
+   "id": "55fd411d",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -1606,7 +1606,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6e91917e",
+   "id": "b66f2370",
    "metadata": {
     "lines_to_next_cell": 2
    },
@@ -1619,7 +1619,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "16461a93",
+   "id": "ce0d9c33",
    "metadata": {
     "cell_marker": "\"\"\""
    },
diff --git a/modules/source/06_optimizers/optimizers_dev.ipynb b/modules/source/06_optimizers/optimizers_dev.ipynb
index dfd5ac71..7ef0e46e 100644
--- a/modules/source/06_optimizers/optimizers_dev.ipynb
+++ b/modules/source/06_optimizers/optimizers_dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "3d8f1c62",
+   "id": "518b6ae0",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -51,7 +51,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a2b41da9",
+   "id": "30bbc6f8",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -68,15 +68,12 @@
     "from typing import List, Union, Optional, Dict, Any\n",
     "\n",
     "# Import Tensor from Module 01 (now with gradient support from Module 05)\n",
-    "import sys\n",
-    "import os\n",
-    "sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))\n",
-    "from tensor_dev import Tensor"
+    "from tinytorch.core.tensor import Tensor"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "3169e215",
+   "id": "9057f3bf",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -133,7 +130,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "baec0321",
+   "id": "3b2f074e",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -219,7 +216,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "49716b34",
+   "id": "3000c581",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -247,7 +244,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "06d956dd",
+   "id": "d9343aa4",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -258,6 +255,7 @@
    },
    "outputs": [],
    "source": [
+    "#| export\n",
     "class Optimizer:\n",
     "    \"\"\"\n",
     "    Base class for all optimizers.\n",
@@ -332,7 +330,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "82015c9d",
+   "id": "0ded4383",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -348,7 +346,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c0f57a08",
+   "id": "25d61648",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -401,7 +399,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "7d9b8ceb",
+   "id": "bf5adabc",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -473,7 +471,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ae4679bb",
+   "id": "12f0f4b6",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -484,6 +482,7 @@
    },
    "outputs": [],
    "source": [
+    "#| export\n",
     "class SGD(Optimizer):\n",
     "    \"\"\"\n",
     "    Stochastic Gradient Descent with momentum.\n",
@@ -576,7 +575,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ced264d8",
+   "id": "815d0bab",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -592,7 +591,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "68ae4ccf",
+   "id": "c01ebc69",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -659,7 +658,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "480929e4",
+   "id": "c656b1b4",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -733,7 +732,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "2d7e339f",
+   "id": "b545ed16",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -744,6 +743,7 @@
    },
    "outputs": [],
    "source": [
+    "#| export\n",
     "class Adam(Optimizer):\n",
     "    \"\"\"\n",
     "    Adam optimizer with adaptive learning rates.\n",
@@ -853,7 +853,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "6f114c5b",
+   "id": "b688bced",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -869,7 +869,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7f64abcc",
+   "id": "61fa7116",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -945,7 +945,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "16ccfeaa",
+   "id": "7cb028b2",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1019,7 +1019,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "23c16f99",
+   "id": "277056cc",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -1030,6 +1030,7 @@
    },
    "outputs": [],
    "source": [
+    "#| export\n",
     "class AdamW(Optimizer):\n",
     "    \"\"\"\n",
     "    AdamW optimizer with decoupled weight decay.\n",
@@ -1133,7 +1134,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0269f86a",
+   "id": "d59b1b2b",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1149,7 +1150,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "016d7b36",
+   "id": "619464ee",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -1224,7 +1225,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "295d5ee6",
+   "id": "29f5ad7b",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 2
@@ -1251,7 +1252,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "47d676c3",
+   "id": "9dd160f5",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1297,7 +1298,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "67290db6",
+   "id": "ab882d12",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -1355,7 +1356,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "21136a44",
+   "id": "abac74aa",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -1434,7 +1435,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "b171c224",
+   "id": "146f209d",
    "metadata": {
     "lines_to_next_cell": 1
    },
@@ -1456,7 +1457,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "46ae99ae",
+   "id": "1726f746",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -1607,7 +1608,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "896f4c69",
+   "id": "7328ac69",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1618,7 +1619,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "35b39338",
+   "id": "c662a5f7",
    "metadata": {
     "cell_marker": "\"\"\""
    },
diff --git a/modules/source/07_training/training_dev.ipynb b/modules/source/07_training/training_dev.ipynb
index 06cc7480..9fe44d21 100644
--- a/modules/source/07_training/training_dev.ipynb
+++ b/modules/source/07_training/training_dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "765eea82",
+   "id": "78521710",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -52,7 +52,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "38b1402a",
+   "id": "d912bff5",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -75,22 +75,15 @@
     "import os\n",
     "\n",
     "# Import dependencies from other modules\n",
-    "sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))\n",
-    "from tensor_dev import Tensor\n",
-    "\n",
-    "sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))\n",
-    "from layers_dev import Linear\n",
-    "\n",
-    "sys.path.append(os.path.join(os.path.dirname(__file__), '..', '04_losses'))\n",
-    "from losses_dev import MSELoss, CrossEntropyLoss\n",
-    "\n",
-    "sys.path.append(os.path.join(os.path.dirname(__file__), '..', '06_optimizers'))\n",
-    "from optimizers_dev import SGD, AdamW"
+    "from tinytorch.core.tensor import Tensor\n",
+    "from tinytorch.core.layers import Linear\n",
+    "from tinytorch.core.losses import MSELoss, CrossEntropyLoss\n",
+    "from tinytorch.core.optimizers import SGD, AdamW"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "89550fb8",
+   "id": "2f4fc27e",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -119,7 +112,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d0b48f7a",
+   "id": "4fa19758",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -166,7 +159,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4ed8a995",
+   "id": "8599a0f1",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -180,7 +173,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ebfa93fc",
+   "id": "ed5a85db",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -221,7 +214,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "347b09da",
+   "id": "9dac2b34",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -233,6 +226,7 @@
    },
    "outputs": [],
    "source": [
+    "#| export\n",
     "class CosineSchedule:\n",
     "    \"\"\"\n",
     "    Cosine annealing learning rate schedule.\n",
@@ -274,7 +268,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "c1db4e03",
+   "id": "c146074f",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -290,7 +284,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c27f6878",
+   "id": "ee33397e",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -334,7 +328,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "81fc482c",
+   "id": "da8efa9f",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -380,7 +374,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "db99efd3",
+   "id": "29a5573c",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -457,7 +451,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3b0b188d",
+   "id": "7c1510f3",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -473,7 +467,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "91bf937f",
+   "id": "754c9cd5",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -540,7 +534,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "dde7833e",
+   "id": "a827fb93",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -597,7 +591,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fa8339e1",
+   "id": "63354dd4",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -609,6 +603,7 @@
    },
    "outputs": [],
    "source": [
+    "#| export\n",
     "class Trainer:\n",
     "    \"\"\"\n",
     "    Complete training orchestrator for neural networks.\n",
@@ -875,7 +870,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "529dfcf5",
+   "id": "9266bc60",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -891,7 +886,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "03510440",
+   "id": "8ce52aba",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -972,7 +967,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "905180bd",
+   "id": "7ad86345",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 2
@@ -985,7 +980,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3c631938",
+   "id": "9953bcd4",
    "metadata": {
     "lines_to_next_cell": 1
    },
@@ -1009,7 +1004,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "8b65c5ab",
+   "id": "2eab95b6",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1023,7 +1018,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "29eea538",
+   "id": "0580d838",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -1151,7 +1146,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ae1bc4b9",
+   "id": "62eadf89",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -1169,7 +1164,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ad8ae396",
+   "id": "ebe885e5",
    "metadata": {
     "cell_marker": "\"\"\""
    },
diff --git a/tinytorch/_modidx.py b/tinytorch/_modidx.py
new file mode 100644
index 00000000..8ad72781
--- /dev/null
+++ b/tinytorch/_modidx.py
@@ -0,0 +1,204 @@
+# ╔═══════════════════════════════════════════════════════════════════════════════╗
+# ║                        🚨 CRITICAL WARNING 🚨                                ║
+# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
+# ║                                                                               ║
+# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
+# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
+# ║                                                                               ║
+# ║  ✅ TO EDIT: modules/source/[unknown]/[unknown]_dev.py              ║
+# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
+# ║                                                                               ║
+# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
+# ║     Editing it directly may break module functionality and training.         ║
+# ║                                                                               ║
+# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
+# ║     happens! The tinytorch/ directory is just the compiled output.           ║
+# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# Autogenerated by nbdev
+
+d = { 'settings': { 'branch': 'main',
+                'doc_baseurl': '/TinyTorch/',
+                'doc_host': 'https://tinytorch.github.io',
+                'git_url': 'https://github.com/tinytorch/TinyTorch/',
+                'lib_path': 'tinytorch'},
+  'syms': { 'tinytorch.core.activations': { 'tinytorch.core.activations.GELU': ( '02_activations/activations_dev.html#gelu',
+                                                                                 'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.GELU.__call__': ( '02_activations/activations_dev.html#gelu.__call__',
+                                                                                          'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.GELU.backward': ( '02_activations/activations_dev.html#gelu.backward',
+                                                                                          'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.GELU.forward': ( '02_activations/activations_dev.html#gelu.forward',
+                                                                                         'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.ReLU': ( '02_activations/activations_dev.html#relu',
+                                                                                 'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.ReLU.__call__': ( '02_activations/activations_dev.html#relu.__call__',
+                                                                                          'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.ReLU.backward': ( '02_activations/activations_dev.html#relu.backward',
+                                                                                          'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.ReLU.forward': ( '02_activations/activations_dev.html#relu.forward',
+                                                                                         'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Sigmoid': ( '02_activations/activations_dev.html#sigmoid',
+                                                                                    'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Sigmoid.__call__': ( '02_activations/activations_dev.html#sigmoid.__call__',
+                                                                                             'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Sigmoid.backward': ( '02_activations/activations_dev.html#sigmoid.backward',
+                                                                                             'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Sigmoid.forward': ( '02_activations/activations_dev.html#sigmoid.forward',
+                                                                                            'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Softmax': ( '02_activations/activations_dev.html#softmax',
+                                                                                    'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Softmax.__call__': ( '02_activations/activations_dev.html#softmax.__call__',
+                                                                                             'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Softmax.backward': ( '02_activations/activations_dev.html#softmax.backward',
+                                                                                             'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Softmax.forward': ( '02_activations/activations_dev.html#softmax.forward',
+                                                                                            'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Tanh': ( '02_activations/activations_dev.html#tanh',
+                                                                                 'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Tanh.__call__': ( '02_activations/activations_dev.html#tanh.__call__',
+                                                                                          'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Tanh.backward': ( '02_activations/activations_dev.html#tanh.backward',
+                                                                                          'tinytorch/core/activations.py'),
+                                            'tinytorch.core.activations.Tanh.forward': ( '02_activations/activations_dev.html#tanh.forward',
+                                                                                         'tinytorch/core/activations.py')},
+            'tinytorch.core.autograd': {},
+            'tinytorch.core.layers': { 'tinytorch.core.layers.Dropout': ('03_layers/layers_dev.html#dropout', 'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Dropout.__call__': ( '03_layers/layers_dev.html#dropout.__call__',
+                                                                                   'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Dropout.__init__': ( '03_layers/layers_dev.html#dropout.__init__',
+                                                                                   'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Dropout.__repr__': ( '03_layers/layers_dev.html#dropout.__repr__',
+                                                                                   'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Dropout.forward': ( '03_layers/layers_dev.html#dropout.forward',
+                                                                                  'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Dropout.parameters': ( '03_layers/layers_dev.html#dropout.parameters',
+                                                                                     'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Linear': ('03_layers/layers_dev.html#linear', 'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Linear.__call__': ( '03_layers/layers_dev.html#linear.__call__',
+                                                                                  'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Linear.__init__': ( '03_layers/layers_dev.html#linear.__init__',
+                                                                                  'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Linear.__repr__': ( '03_layers/layers_dev.html#linear.__repr__',
+                                                                                  'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Linear.forward': ( '03_layers/layers_dev.html#linear.forward',
+                                                                                 'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Linear.parameters': ( '03_layers/layers_dev.html#linear.parameters',
+                                                                                    'tinytorch/core/layers.py')},
+            'tinytorch.core.losses': { 'tinytorch.core.losses.BinaryCrossEntropyLoss': ( '04_losses/losses_dev.html#binarycrossentropyloss',
+                                                                                         'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.BinaryCrossEntropyLoss.__call__': ( '04_losses/losses_dev.html#binarycrossentropyloss.__call__',
+                                                                                                  'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.BinaryCrossEntropyLoss.__init__': ( '04_losses/losses_dev.html#binarycrossentropyloss.__init__',
+                                                                                                  'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.BinaryCrossEntropyLoss.backward': ( '04_losses/losses_dev.html#binarycrossentropyloss.backward',
+                                                                                                  'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.BinaryCrossEntropyLoss.forward': ( '04_losses/losses_dev.html#binarycrossentropyloss.forward',
+                                                                                                 'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.CrossEntropyLoss': ( '04_losses/losses_dev.html#crossentropyloss',
+                                                                                   'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.CrossEntropyLoss.__call__': ( '04_losses/losses_dev.html#crossentropyloss.__call__',
+                                                                                            'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.CrossEntropyLoss.__init__': ( '04_losses/losses_dev.html#crossentropyloss.__init__',
+                                                                                            'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.CrossEntropyLoss.backward': ( '04_losses/losses_dev.html#crossentropyloss.backward',
+                                                                                            'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.CrossEntropyLoss.forward': ( '04_losses/losses_dev.html#crossentropyloss.forward',
+                                                                                           'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.MSELoss': ('04_losses/losses_dev.html#mseloss', 'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.MSELoss.__call__': ( '04_losses/losses_dev.html#mseloss.__call__',
+                                                                                   'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.MSELoss.__init__': ( '04_losses/losses_dev.html#mseloss.__init__',
+                                                                                   'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.MSELoss.backward': ( '04_losses/losses_dev.html#mseloss.backward',
+                                                                                   'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.MSELoss.forward': ( '04_losses/losses_dev.html#mseloss.forward',
+                                                                                  'tinytorch/core/losses.py'),
+                                       'tinytorch.core.losses.import_previous_module': ( '04_losses/losses_dev.html#import_previous_module',
+                                                                                         'tinytorch/core/losses.py')},
+            'tinytorch.core.optimizers': { 'tinytorch.core.optimizers.Adam': ( '06_optimizers/optimizers_dev.html#adam',
+                                                                               'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.Adam.__init__': ( '06_optimizers/optimizers_dev.html#adam.__init__',
+                                                                                        'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.Adam.step': ( '06_optimizers/optimizers_dev.html#adam.step',
+                                                                                    'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.AdamW': ( '06_optimizers/optimizers_dev.html#adamw',
+                                                                                'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.AdamW.__init__': ( '06_optimizers/optimizers_dev.html#adamw.__init__',
+                                                                                         'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.AdamW.step': ( '06_optimizers/optimizers_dev.html#adamw.step',
+                                                                                     'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.Optimizer': ( '06_optimizers/optimizers_dev.html#optimizer',
+                                                                                    'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.Optimizer.__init__': ( '06_optimizers/optimizers_dev.html#optimizer.__init__',
+                                                                                             'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.Optimizer.step': ( '06_optimizers/optimizers_dev.html#optimizer.step',
+                                                                                         'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.Optimizer.zero_grad': ( '06_optimizers/optimizers_dev.html#optimizer.zero_grad',
+                                                                                              'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.SGD': ( '06_optimizers/optimizers_dev.html#sgd',
+                                                                              'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.SGD.__init__': ( '06_optimizers/optimizers_dev.html#sgd.__init__',
+                                                                                       'tinytorch/core/optimizers.py'),
+                                           'tinytorch.core.optimizers.SGD.step': ( '06_optimizers/optimizers_dev.html#sgd.step',
+                                                                                   'tinytorch/core/optimizers.py')},
+            'tinytorch.core.tensor': { 'tinytorch.core.tensor.Tensor': ('01_tensor/tensor_dev.html#tensor', 'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.__add__': ( '01_tensor/tensor_dev.html#tensor.__add__',
+                                                                                 'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.__init__': ( '01_tensor/tensor_dev.html#tensor.__init__',
+                                                                                  'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.__mul__': ( '01_tensor/tensor_dev.html#tensor.__mul__',
+                                                                                 'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.__repr__': ( '01_tensor/tensor_dev.html#tensor.__repr__',
+                                                                                  'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.__str__': ( '01_tensor/tensor_dev.html#tensor.__str__',
+                                                                                 'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.__sub__': ( '01_tensor/tensor_dev.html#tensor.__sub__',
+                                                                                 'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.__truediv__': ( '01_tensor/tensor_dev.html#tensor.__truediv__',
+                                                                                     'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.backward': ( '01_tensor/tensor_dev.html#tensor.backward',
+                                                                                  'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.matmul': ( '01_tensor/tensor_dev.html#tensor.matmul',
+                                                                                'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.max': ( '01_tensor/tensor_dev.html#tensor.max',
+                                                                             'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.mean': ( '01_tensor/tensor_dev.html#tensor.mean',
+                                                                              'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.numpy': ( '01_tensor/tensor_dev.html#tensor.numpy',
+                                                                               'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.reshape': ( '01_tensor/tensor_dev.html#tensor.reshape',
+                                                                                 'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.sum': ( '01_tensor/tensor_dev.html#tensor.sum',
+                                                                             'tinytorch/core/tensor.py'),
+                                       'tinytorch.core.tensor.Tensor.transpose': ( '01_tensor/tensor_dev.html#tensor.transpose',
+                                                                                   'tinytorch/core/tensor.py')},
+            'tinytorch.core.training': { 'tinytorch.core.training.CosineSchedule': ( '07_training/training_dev.html#cosineschedule',
+                                                                                     'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.CosineSchedule.__init__': ( '07_training/training_dev.html#cosineschedule.__init__',
+                                                                                              'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.CosineSchedule.get_lr': ( '07_training/training_dev.html#cosineschedule.get_lr',
+                                                                                            'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer': ( '07_training/training_dev.html#trainer',
+                                                                              'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer.__init__': ( '07_training/training_dev.html#trainer.__init__',
+                                                                                       'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer._get_model_state': ( '07_training/training_dev.html#trainer._get_model_state',
+                                                                                               'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer._get_optimizer_state': ( '07_training/training_dev.html#trainer._get_optimizer_state',
+                                                                                                   'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer._get_scheduler_state': ( '07_training/training_dev.html#trainer._get_scheduler_state',
+                                                                                                   'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer._set_model_state': ( '07_training/training_dev.html#trainer._set_model_state',
+                                                                                               'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer._set_optimizer_state': ( '07_training/training_dev.html#trainer._set_optimizer_state',
+                                                                                                   'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer._set_scheduler_state': ( '07_training/training_dev.html#trainer._set_scheduler_state',
+                                                                                                   'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer.evaluate': ( '07_training/training_dev.html#trainer.evaluate',
+                                                                                       'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer.load_checkpoint': ( '07_training/training_dev.html#trainer.load_checkpoint',
+                                                                                              'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer.save_checkpoint': ( '07_training/training_dev.html#trainer.save_checkpoint',
+                                                                                              'tinytorch/core/training.py'),
+                                         'tinytorch.core.training.Trainer.train_epoch': ( '07_training/training_dev.html#trainer.train_epoch',
+                                                                                          'tinytorch/core/training.py')}}}
diff --git a/tinytorch/applications/tinygpt.py b/tinytorch/applications/tinygpt.py
deleted file mode 100644
index 80dabc9a..00000000
--- a/tinytorch/applications/tinygpt.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/20_capstone/capstone_dev.ipynb.
-
-# %% auto 0
-__all__ = []
-
-# %% ../../modules/source/20_capstone/capstone_dev.ipynb 2
-#| default_exp applications.tinygpt
-#| export
diff --git a/tinytorch/benchmarking/benchmark.py b/tinytorch/benchmarking/benchmark.py
deleted file mode 100644
index 59888381..00000000
--- a/tinytorch/benchmarking/benchmark.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/19_benchmarking/benchmarking_dev.ipynb.
-
-# %% auto 0
-__all__ = []
-
-# %% ../../modules/source/19_benchmarking/benchmarking_dev.ipynb 0
-#| default_exp benchmarking.benchmark
-#| export
diff --git a/tinytorch/core/activations.py b/tinytorch/core/activations.py
index e9c19589..fff7d636 100644
--- a/tinytorch/core/activations.py
+++ b/tinytorch/core/activations.py
@@ -1,5 +1,19 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/02_activations/activations_dev.ipynb.
-
+# ╔═══════════════════════════════════════════════════════════════════════════════╗
+# ║                        🚨 CRITICAL WARNING 🚨                                ║
+# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
+# ║                                                                               ║
+# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
+# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
+# ║                                                                               ║
+# ║  ✅ TO EDIT: modules/source/03_activations/activations_dev.py       ║
+# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
+# ║                                                                               ║
+# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
+# ║     Editing it directly may break module functionality and training.         ║
+# ║                                                                               ║
+# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
+# ║     happens! The tinytorch/ directory is just the compiled output.           ║
+# ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
 __all__ = ['Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax']
 
diff --git a/tinytorch/core/attention.py b/tinytorch/core/attention.py
deleted file mode 100644
index 7d6df426..00000000
--- a/tinytorch/core/attention.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/12_attention/attention_dev.ipynb.
-
-# %% auto 0
-__all__ = []
-
-# %% ../../modules/source/12_attention/attention_dev.ipynb 0
-#| default_exp core.attention
-#| export
diff --git a/tinytorch/core/layers.py b/tinytorch/core/layers.py
index 7ea028c4..f2729cd9 100644
--- a/tinytorch/core/layers.py
+++ b/tinytorch/core/layers.py
@@ -1,5 +1,19 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/03_layers/layers_dev.ipynb.
-
+# ╔═══════════════════════════════════════════════════════════════════════════════╗
+# ║                        🚨 CRITICAL WARNING 🚨                                ║
+# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
+# ║                                                                               ║
+# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
+# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
+# ║                                                                               ║
+# ║  ✅ TO EDIT: modules/source/04_layers/layers_dev.py                 ║
+# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
+# ║                                                                               ║
+# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
+# ║     Editing it directly may break module functionality and training.         ║
+# ║                                                                               ║
+# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
+# ║     happens! The tinytorch/ directory is just the compiled output.           ║
+# ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
 __all__ = ['Linear', 'Dropout']
 
@@ -194,6 +208,10 @@ class Dropout:
         return Tensor(output_data)
         ### END SOLUTION
 
+    def __call__(self, x, training=True):
+        """Allows the layer to be called like a function."""
+        return self.forward(x, training)
+
     def parameters(self):
         """Dropout has no parameters."""
         return []
diff --git a/tinytorch/core/losses.py b/tinytorch/core/losses.py
index 68c47944..348bed68 100644
--- a/tinytorch/core/losses.py
+++ b/tinytorch/core/losses.py
@@ -1,5 +1,19 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_losses/losses_dev.ipynb.
-
+# ╔═══════════════════════════════════════════════════════════════════════════════╗
+# ║                        🚨 CRITICAL WARNING 🚨                                ║
+# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
+# ║                                                                               ║
+# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
+# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
+# ║                                                                               ║
+# ║  ✅ TO EDIT: modules/source/XX_losses/losses_dev.py                 ║
+# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
+# ║                                                                               ║
+# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
+# ║     Editing it directly may break module functionality and training.         ║
+# ║                                                                               ║
+# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
+# ║     happens! The tinytorch/ directory is just the compiled output.           ║
+# ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
 __all__ = ['import_previous_module', 'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss']
 
diff --git a/tinytorch/core/optimizers.py b/tinytorch/core/optimizers.py
index eec75d99..3698541c 100644
--- a/tinytorch/core/optimizers.py
+++ b/tinytorch/core/optimizers.py
@@ -1,5 +1,19 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/06_optimizers/optimizers_dev.ipynb.
-
+# ╔═══════════════════════════════════════════════════════════════════════════════╗
+# ║                        🚨 CRITICAL WARNING 🚨                                ║
+# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
+# ║                                                                               ║
+# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
+# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
+# ║                                                                               ║
+# ║  ✅ TO EDIT: modules/source/10_optimizers/optimizers_dev.py         ║
+# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
+# ║                                                                               ║
+# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
+# ║     Editing it directly may break module functionality and training.         ║
+# ║                                                                               ║
+# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
+# ║     happens! The tinytorch/ directory is just the compiled output.           ║
+# ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
 __all__ = ['Optimizer', 'SGD', 'Adam', 'AdamW']
 
@@ -7,10 +21,10 @@ __all__ = ['Optimizer', 'SGD', 'Adam', 'AdamW']
 import numpy as np
 from typing import List, Union, Optional, Dict, Any
 
-# Import Tensor from Module 01
-from tinytorch.core.tensor import Tensor
+# Import Tensor from Module 01 (now with gradient support from Module 05)
+from .tensor import Tensor
 
-# %% Base Optimizer class
+# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 5
 class Optimizer:
     """
     Base class for all optimizers.
@@ -37,6 +51,7 @@ class Optimizer:
 
         HINT: Check that each parameter has requires_grad=True
         """
+        ### BEGIN SOLUTION
         # Validate and store parameters
         if not isinstance(params, list):
             params = list(params)
@@ -50,6 +65,7 @@ class Optimizer:
 
         self.params = params
         self.step_count = 0  # For algorithms that need step counting
+        ### END SOLUTION
 
     def zero_grad(self):
         """
@@ -67,8 +83,10 @@ class Optimizer:
 
         WHY: Gradients accumulate by default, so we need to clear them between batches
         """
+        ### BEGIN SOLUTION
         for param in self.params:
             param.grad = None
+        ### END SOLUTION
 
     def step(self):
         """
@@ -78,9 +96,7 @@ class Optimizer:
         """
         raise NotImplementedError("Subclasses must implement step()")
 
-
-
-# %% SGD Optimizer
+# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 9
 class SGD(Optimizer):
     """
     Stochastic Gradient Descent with momentum.
@@ -108,6 +124,7 @@ class SGD(Optimizer):
         - Momentum buffers should be initialized as None
         - They'll be created lazily on first step
         """
+        ### BEGIN SOLUTION
         super().__init__(params)
 
         self.lr = lr
@@ -116,6 +133,7 @@ class SGD(Optimizer):
 
         # Initialize momentum buffers (created lazily)
         self.momentum_buffers = [None for _ in self.params]
+        ### END SOLUTION
 
     def step(self):
         """
@@ -139,6 +157,7 @@ class SGD(Optimizer):
         - Initialize momentum buffers on first use
         - Use in-place operations to save memory
         """
+        ### BEGIN SOLUTION
         for i, param in enumerate(self.params):
             if param.grad is None:
                 continue
@@ -165,10 +184,9 @@ class SGD(Optimizer):
 
         # Increment step counter
         self.step_count += 1
+        ### END SOLUTION
 
-
-
-# %% Adam Optimizer
+# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 13
 class Adam(Optimizer):
     """
     Adam optimizer with adaptive learning rates.
@@ -198,6 +216,7 @@ class Adam(Optimizer):
         EXAMPLE:
         >>> optimizer = Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))
         """
+        ### BEGIN SOLUTION
         super().__init__(params)
 
         self.lr = lr
@@ -208,6 +227,7 @@ class Adam(Optimizer):
         # Initialize moment buffers (created lazily)
         self.m_buffers = [None for _ in self.params]  # First moment (mean)
         self.v_buffers = [None for _ in self.params]  # Second moment (variance)
+        ### END SOLUTION
 
     def step(self):
         """
@@ -235,6 +255,7 @@ class Adam(Optimizer):
         - Use step_count for bias correction
         - Square gradients element-wise for second moment
         """
+        ### BEGIN SOLUTION
         # Increment step counter first (needed for bias correction)
         self.step_count += 1
 
@@ -270,10 +291,9 @@ class Adam(Optimizer):
 
             # Update parameter
             param.data = param.data - self.lr * m_hat / (np.sqrt(v_hat) + self.eps)
+        ### END SOLUTION
 
-
-
-# %% AdamW Optimizer
+# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 17
 class AdamW(Optimizer):
     """
     AdamW optimizer with decoupled weight decay.
@@ -301,6 +321,7 @@ class AdamW(Optimizer):
         EXAMPLE:
         >>> optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
         """
+        ### BEGIN SOLUTION
         super().__init__(params)
 
         self.lr = lr
@@ -311,6 +332,7 @@ class AdamW(Optimizer):
         # Initialize moment buffers (same as Adam)
         self.m_buffers = [None for _ in self.params]
         self.v_buffers = [None for _ in self.params]
+        ### END SOLUTION
 
     def step(self):
         """
@@ -336,6 +358,7 @@ class AdamW(Optimizer):
 
         HINT: Apply weight decay after gradient update for proper decoupling
         """
+        ### BEGIN SOLUTION
         # Increment step counter first
         self.step_count += 1
 
@@ -369,4 +392,4 @@ class AdamW(Optimizer):
             # Apply decoupled weight decay
             if self.weight_decay != 0:
                 param.data = param.data * (1 - self.lr * self.weight_decay)
-
+        ### END SOLUTION
diff --git a/tinytorch/core/spatial.py b/tinytorch/core/spatial.py
deleted file mode 100644
index faa47403..00000000
--- a/tinytorch/core/spatial.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/09_spatial/spatial_dev.ipynb.
-
-# %% auto 0
-__all__ = []
-
-# %% ../../modules/source/09_spatial/spatial_dev.ipynb 1
-import numpy as np
-import sys
-import os
-import time
-
-# Import dependencies from other modules
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
-from tensor_dev import Tensor
-
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))
-from layers_dev import Module
-
-# Note: Keeping simplified implementations for reference during development
-class _SimplifiedTensor:
-        """Simplified tensor for spatial operations development."""
-
-        def __init__(self, data, requires_grad=False):
-            self.data = np.array(data, dtype=np.float32)
-            self.shape = self.data.shape
-            self.requires_grad = requires_grad
-            self.grad = None
-
-        def __repr__(self):
-            return f"Tensor(shape={self.shape}, data=\n{self.data})"
-
-        def __add__(self, other):
-            if isinstance(other, Tensor):
-                return Tensor(self.data + other.data)
-            return Tensor(self.data + other)
-
-        def __mul__(self, other):
-            if isinstance(other, Tensor):
-                return Tensor(self.data * other.data)
-            return Tensor(self.data * other)
-
-        def sum(self):
-            return Tensor(np.sum(self.data))
-
-        def mean(self):
-            return Tensor(np.mean(self.data))
-
-    # Create a simple Module base class for inheritance
-    class Module:
-        """Simple base class for neural network modules."""
-        def __init__(self):
-            pass
-
-        def forward(self, x):
-            raise NotImplementedError("Subclasses must implement forward()")
-
-        def parameters(self):
-            """Return list of parameters for this module."""
-            params = []
-            for attr_name in dir(self):
-                attr = getattr(self, attr_name)
-                if hasattr(attr, 'data') and hasattr(attr, 'requires_grad'):
-                    params.append(attr)
-            return params
diff --git a/tinytorch/core/tensor.py b/tinytorch/core/tensor.py
index 22b11dad..fb786066 100644
--- a/tinytorch/core/tensor.py
+++ b/tinytorch/core/tensor.py
@@ -1,5 +1,19 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/01_tensor/tensor_dev.ipynb.
-
+# ╔═══════════════════════════════════════════════════════════════════════════════╗
+# ║                        🚨 CRITICAL WARNING 🚨                                ║
+# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
+# ║                                                                               ║
+# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
+# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
+# ║                                                                               ║
+# ║  ✅ TO EDIT: modules/source/02_tensor/tensor_dev.py                 ║
+# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
+# ║                                                                               ║
+# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
+# ║     Editing it directly may break module functionality and training.         ║
+# ║                                                                               ║
+# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
+# ║     happens! The tinytorch/ directory is just the compiled output.           ║
+# ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
 __all__ = ['Tensor']
 
diff --git a/tinytorch/core/training.py b/tinytorch/core/training.py
index 54947841..e4082b8f 100644
--- a/tinytorch/core/training.py
+++ b/tinytorch/core/training.py
@@ -1,7 +1,21 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/07_training/training_dev.ipynb.
-
+# ╔═══════════════════════════════════════════════════════════════════════════════╗
+# ║                        🚨 CRITICAL WARNING 🚨                                ║
+# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
+# ║                                                                               ║
+# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
+# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
+# ║                                                                               ║
+# ║  ✅ TO EDIT: modules/source/11_training/training_dev.py             ║
+# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
+# ║                                                                               ║
+# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
+# ║     Editing it directly may break module functionality and training.         ║
+# ║                                                                               ║
+# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
+# ║     happens! The tinytorch/ directory is just the compiled output.           ║
+# ╚═══════════════════════════════════════════════════════════════════════════════╝
 # %% auto 0
-__all__ = []
+__all__ = ['CosineSchedule', 'Trainer']
 
 # %% ../../modules/source/07_training/training_dev.ipynb 1
 import numpy as np
@@ -13,14 +27,310 @@ import sys
 import os
 
 # Import dependencies from other modules
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
-from tensor_dev import Tensor
+from .tensor import Tensor
+from .layers import Linear
+from .losses import MSELoss, CrossEntropyLoss
+from .optimizers import SGD, AdamW
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))
-from layers_dev import Linear
+# %% ../../modules/source/07_training/training_dev.ipynb 6
+class CosineSchedule:
+    """
+    Cosine annealing learning rate schedule.
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '04_losses'))
-from losses_dev import MSELoss, CrossEntropyLoss
+    Starts at max_lr, decreases following a cosine curve to min_lr over T epochs.
+    This provides aggressive learning initially, then fine-tuning at the end.
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '06_optimizers'))
-from optimizers_dev import SGD, AdamW
+    TODO: Implement cosine annealing schedule
+
+    APPROACH:
+    1. Store max_lr, min_lr, and total_epochs
+    2. In get_lr(), compute cosine factor: (1 + cos(π * epoch / total_epochs)) / 2
+    3. Interpolate: min_lr + (max_lr - min_lr) * cosine_factor
+
+    EXAMPLE:
+    >>> schedule = CosineSchedule(max_lr=0.1, min_lr=0.01, total_epochs=100)
+    >>> print(schedule.get_lr(0))    # Start: 0.1
+    >>> print(schedule.get_lr(50))   # Middle: ~0.055
+    >>> print(schedule.get_lr(100))  # End: 0.01
+
+    HINT: Use np.cos() and np.pi for the cosine calculation
+    """
+    ### BEGIN SOLUTION
+    def __init__(self, max_lr: float = 0.1, min_lr: float = 0.01, total_epochs: int = 100):
+        self.max_lr = max_lr
+        self.min_lr = min_lr
+        self.total_epochs = total_epochs
+
+    def get_lr(self, epoch: int) -> float:
+        """Get learning rate for current epoch."""
+        if epoch >= self.total_epochs:
+            return self.min_lr
+
+        # Cosine annealing formula
+        cosine_factor = (1 + np.cos(np.pi * epoch / self.total_epochs)) / 2
+        return self.min_lr + (self.max_lr - self.min_lr) * cosine_factor
+    ### END SOLUTION
+
+# %% ../../modules/source/07_training/training_dev.ipynb 14
+class Trainer:
+    """
+    Complete training orchestrator for neural networks.
+
+    Handles the full training lifecycle: forward pass, loss computation,
+    backward pass, optimization, scheduling, checkpointing, and evaluation.
+
+    This is the central class that brings together all the components
+    you've built in previous modules.
+
+    TODO: Implement complete Trainer class
+
+    APPROACH:
+    1. Store model, optimizer, loss function, and optional scheduler
+    2. train_epoch(): Loop through data, compute loss, update parameters
+    3. evaluate(): Similar loop but without gradient updates
+    4. save/load_checkpoint(): Persist training state for resumption
+
+    DESIGN PATTERNS:
+    - Context managers for train/eval modes
+    - Gradient accumulation for effective large batch sizes
+    - Progress tracking for monitoring
+    - Flexible scheduling integration
+    """
+    ### BEGIN SOLUTION
+    def __init__(self, model, optimizer, loss_fn, scheduler=None, grad_clip_norm=None):
+        """
+        Initialize trainer with model and training components.
+
+        Args:
+            model: Neural network to train
+            optimizer: Parameter update strategy (SGD, Adam, etc.)
+            loss_fn: Loss function (CrossEntropy, MSE, etc.)
+            scheduler: Optional learning rate scheduler
+            grad_clip_norm: Optional gradient clipping threshold
+        """
+        self.model = model
+        self.optimizer = optimizer
+        self.loss_fn = loss_fn
+        self.scheduler = scheduler
+        self.grad_clip_norm = grad_clip_norm
+
+        # Training state
+        self.epoch = 0
+        self.step = 0
+        self.training_mode = True
+
+        # History tracking
+        self.history = {
+            'train_loss': [],
+            'eval_loss': [],
+            'learning_rates': []
+        }
+
+    def train_epoch(self, dataloader, accumulation_steps=1):
+        """
+        Train for one epoch through the dataset.
+
+        Args:
+            dataloader: Iterable yielding (inputs, targets) batches
+            accumulation_steps: Number of batches to accumulate before update
+
+        Returns:
+            Average loss for the epoch
+        """
+        self.model.training = True
+        self.training_mode = True
+
+        total_loss = 0.0
+        num_batches = 0
+        accumulated_loss = 0.0
+
+        for batch_idx, (inputs, targets) in enumerate(dataloader):
+            # Forward pass
+            outputs = self.model.forward(inputs)
+            loss = self.loss_fn.forward(outputs, targets)
+
+            # Scale loss for accumulation
+            scaled_loss = loss.data / accumulation_steps
+            accumulated_loss += scaled_loss
+
+            # Backward pass
+            if hasattr(loss, 'backward'):
+                loss.backward()
+
+            # Update parameters every accumulation_steps
+            if (batch_idx + 1) % accumulation_steps == 0:
+                # Gradient clipping
+                if self.grad_clip_norm is not None:
+                    params = []
+                    if hasattr(self.model, 'parameters'):
+                        params = self.model.parameters()
+                    clip_grad_norm(params, self.grad_clip_norm)
+
+                # Optimizer step
+                self.optimizer.step()
+                self.optimizer.zero_grad()
+
+                total_loss += accumulated_loss
+                accumulated_loss = 0.0
+                num_batches += 1
+                self.step += 1
+
+        # Handle remaining accumulated gradients
+        if accumulated_loss > 0:
+            if self.grad_clip_norm is not None:
+                params = []
+                if hasattr(self.model, 'parameters'):
+                    params = self.model.parameters()
+                clip_grad_norm(params, self.grad_clip_norm)
+
+            self.optimizer.step()
+            self.optimizer.zero_grad()
+            total_loss += accumulated_loss
+            num_batches += 1
+
+        avg_loss = total_loss / max(num_batches, 1)
+        self.history['train_loss'].append(avg_loss)
+
+        # Update scheduler
+        if self.scheduler is not None:
+            current_lr = self.scheduler.get_lr(self.epoch)
+            # Update optimizer learning rate
+            if hasattr(self.optimizer, 'lr'):
+                self.optimizer.lr = current_lr
+            self.history['learning_rates'].append(current_lr)
+
+        self.epoch += 1
+        return avg_loss
+
+    def evaluate(self, dataloader):
+        """
+        Evaluate model on dataset without updating parameters.
+
+        Args:
+            dataloader: Iterable yielding (inputs, targets) batches
+
+        Returns:
+            Average loss and accuracy
+        """
+        self.model.training = False
+        self.training_mode = False
+
+        total_loss = 0.0
+        correct = 0
+        total = 0
+
+        for inputs, targets in dataloader:
+            # Forward pass only
+            outputs = self.model.forward(inputs)
+            loss = self.loss_fn.forward(outputs, targets)
+
+            total_loss += loss.data
+
+            # Calculate accuracy (for classification)
+            if hasattr(outputs, 'data') and hasattr(targets, 'data'):
+                if len(outputs.data.shape) > 1:  # Multi-class
+                    predictions = np.argmax(outputs.data, axis=1)
+                    if len(targets.data.shape) == 1:  # Integer targets
+                        correct += np.sum(predictions == targets.data)
+                    else:  # One-hot targets
+                        correct += np.sum(predictions == np.argmax(targets.data, axis=1))
+                    total += len(predictions)
+
+        avg_loss = total_loss / len(dataloader) if len(dataloader) > 0 else 0.0
+        accuracy = correct / total if total > 0 else 0.0
+
+        self.history['eval_loss'].append(avg_loss)
+
+        return avg_loss, accuracy
+
+    def save_checkpoint(self, path: str):
+        """
+        Save complete training state for resumption.
+
+        Args:
+            path: File path to save checkpoint
+        """
+        checkpoint = {
+            'epoch': self.epoch,
+            'step': self.step,
+            'model_state': self._get_model_state(),
+            'optimizer_state': self._get_optimizer_state(),
+            'scheduler_state': self._get_scheduler_state(),
+            'history': self.history,
+            'training_mode': self.training_mode
+        }
+
+        Path(path).parent.mkdir(parents=True, exist_ok=True)
+        with open(path, 'wb') as f:
+            pickle.dump(checkpoint, f)
+
+    def load_checkpoint(self, path: str):
+        """
+        Load training state from checkpoint.
+
+        Args:
+            path: File path to load checkpoint from
+        """
+        with open(path, 'rb') as f:
+            checkpoint = pickle.load(f)
+
+        self.epoch = checkpoint['epoch']
+        self.step = checkpoint['step']
+        self.history = checkpoint['history']
+        self.training_mode = checkpoint['training_mode']
+
+        # Restore states (simplified for educational purposes)
+        if 'model_state' in checkpoint:
+            self._set_model_state(checkpoint['model_state'])
+        if 'optimizer_state' in checkpoint:
+            self._set_optimizer_state(checkpoint['optimizer_state'])
+        if 'scheduler_state' in checkpoint:
+            self._set_scheduler_state(checkpoint['scheduler_state'])
+
+    def _get_model_state(self):
+        """Extract model parameters for checkpointing."""
+        if hasattr(self.model, 'parameters'):
+            return {i: param.data.copy() for i, param in enumerate(self.model.parameters())}
+        return {}
+
+    def _set_model_state(self, state):
+        """Restore model parameters from checkpoint."""
+        if hasattr(self.model, 'parameters'):
+            for i, param in enumerate(self.model.parameters()):
+                if i in state:
+                    param.data = state[i].copy()
+
+    def _get_optimizer_state(self):
+        """Extract optimizer state for checkpointing."""
+        state = {}
+        if hasattr(self.optimizer, 'lr'):
+            state['lr'] = self.optimizer.lr
+        if hasattr(self.optimizer, 'momentum_buffers'):
+            state['momentum_buffers'] = self.optimizer.momentum_buffers.copy()
+        return state
+
+    def _set_optimizer_state(self, state):
+        """Restore optimizer state from checkpoint."""
+        if 'lr' in state and hasattr(self.optimizer, 'lr'):
+            self.optimizer.lr = state['lr']
+        if 'momentum_buffers' in state and hasattr(self.optimizer, 'momentum_buffers'):
+            self.optimizer.momentum_buffers = state['momentum_buffers']
+
+    def _get_scheduler_state(self):
+        """Extract scheduler state for checkpointing."""
+        if self.scheduler is None:
+            return None
+        return {
+            'max_lr': getattr(self.scheduler, 'max_lr', None),
+            'min_lr': getattr(self.scheduler, 'min_lr', None),
+            'total_epochs': getattr(self.scheduler, 'total_epochs', None)
+        }
+
+    def _set_scheduler_state(self, state):
+        """Restore scheduler state from checkpoint."""
+        if state is None or self.scheduler is None:
+            return
+        for key, value in state.items():
+            if hasattr(self.scheduler, key):
+                setattr(self.scheduler, key, value)
+    ### END SOLUTION
diff --git a/tinytorch/generation/kv_cache.py b/tinytorch/generation/kv_cache.py
deleted file mode 100644
index ed623a38..00000000
--- a/tinytorch/generation/kv_cache.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/14_kvcaching/kvcaching_dev.ipynb.
-
-# %% auto 0
-__all__ = ['Tensor']
-
-# %% ../../modules/source/14_kvcaching/kvcaching_dev.ipynb 1
-import numpy as np
-import time
-from typing import Tuple, Optional, Dict, List
-from dataclasses import dataclass
-
-# Import our TinyTorch components (Modules 01-13)
-### BEGIN SOLUTION
-# Note: In real implementation, these would import from previous modules
-# For now, we'll implement minimal versions to focus on caching concepts
-
-class Tensor:
-    """Minimal Tensor for KV Caching focus (from Module 01)"""
-    def __init__(self, data, requires_grad=False):
-        self.data = np.array(data)
-        self.shape = self.data.shape
-        self.requires_grad = requires_grad
-        self.grad = None
-
-    def __getitem__(self, key):
-        return Tensor(self.data[key])
-
-    def __setitem__(self, key, value):
-        if isinstance(value, Tensor):
-            self.data[key] = value.data
-        else:
-            self.data[key] = value
-
-    def size(self, dim=None):
-        if dim is None:
-            return self.shape
-        return self.shape[dim]
-
-    def view(self, *shape):
-        return Tensor(self.data.reshape(shape))
-
-    def transpose(self, dim0, dim1):
-        axes = list(range(len(self.shape)))
-        axes[dim0], axes[dim1] = axes[dim1], axes[dim0]
-        return Tensor(np.transpose(self.data, axes))
-
-    @staticmethod
-    def cat(tensors, dim=0):
-        """Concatenate tensors along dimension"""
-        arrays = [t.data for t in tensors]
-        return Tensor(np.concatenate(arrays, axis=dim))
-
-    @staticmethod
-    def zeros(*shape):
-        """Create zero tensor"""
-        return Tensor(np.zeros(shape))
-### END SOLUTION
diff --git a/tinytorch/models/transformer.py b/tinytorch/models/transformer.py
deleted file mode 100644
index 6e63d29b..00000000
--- a/tinytorch/models/transformer.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/13_transformers/transformers_dev.ipynb.
-
-# %% auto 0
-__all__ = ['Tensor', 'Linear', 'MultiHeadAttention', 'Embedding', 'gelu']
-
-# %% ../../modules/source/13_transformers/transformers_dev.ipynb 1
-import numpy as np
-import math
-from typing import Optional, List
-
-# Minimal implementations for development - in practice these import from previous modules
-class Tensor:
-    """Minimal Tensor class for transformer development - imports from Module 01 in practice."""
-    def __init__(self, data, requires_grad=False):
-        self.data = np.array(data)
-        self.shape = self.data.shape
-        self.size = self.data.size
-        self.requires_grad = requires_grad
-        self.grad = None
-
-    def __add__(self, other):
-        if isinstance(other, Tensor):
-            return Tensor(self.data + other.data)
-        return Tensor(self.data + other)
-
-    def __mul__(self, other):
-        if isinstance(other, Tensor):
-            return Tensor(self.data * other.data)
-        return Tensor(self.data * other)
-
-    def matmul(self, other):
-        return Tensor(np.dot(self.data, other.data))
-
-    def sum(self, axis=None, keepdims=False):
-        return Tensor(self.data.sum(axis=axis, keepdims=keepdims))
-
-    def mean(self, axis=None, keepdims=False):
-        return Tensor(self.data.mean(axis=axis, keepdims=keepdims))
-
-    def reshape(self, *shape):
-        return Tensor(self.data.reshape(shape))
-
-    def __repr__(self):
-        return f"Tensor(data={self.data}, shape={self.shape})"
-
-class Linear:
-    """Minimal Linear layer - imports from Module 03 in practice."""
-    def __init__(self, in_features, out_features, bias=True):
-        # Xavier/Glorot initialization
-        std = math.sqrt(2.0 / (in_features + out_features))
-        self.weight = Tensor(np.random.normal(0, std, (in_features, out_features)))
-        self.bias = Tensor(np.zeros(out_features)) if bias else None
-
-    def forward(self, x):
-        output = x.matmul(self.weight)
-        if self.bias is not None:
-            output = output + self.bias
-        return output
-
-    def parameters(self):
-        params = [self.weight]
-        if self.bias is not None:
-            params.append(self.bias)
-        return params
-
-class MultiHeadAttention:
-    """Minimal MultiHeadAttention - imports from Module 12 in practice."""
-    def __init__(self, embed_dim, num_heads):
-        assert embed_dim % num_heads == 0
-        self.embed_dim = embed_dim
-        self.num_heads = num_heads
-        self.head_dim = embed_dim // num_heads
-
-        self.q_proj = Linear(embed_dim, embed_dim)
-        self.k_proj = Linear(embed_dim, embed_dim)
-        self.v_proj = Linear(embed_dim, embed_dim)
-        self.out_proj = Linear(embed_dim, embed_dim)
-
-    def forward(self, x, mask=None):
-        batch_size, seq_len, embed_dim = x.shape
-
-        # Linear projections
-        Q = self.q_proj.forward(x)
-        K = self.k_proj.forward(x)
-        V = self.v_proj.forward(x)
-
-        # Reshape for multi-head attention
-        Q = Q.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
-        K = K.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
-        V = V.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
-
-        # Transpose to (batch_size, num_heads, seq_len, head_dim)
-        Q = Tensor(np.transpose(Q.data, (0, 2, 1, 3)))
-        K = Tensor(np.transpose(K.data, (0, 2, 1, 3)))
-        V = Tensor(np.transpose(V.data, (0, 2, 1, 3)))
-
-        # Scaled dot-product attention
-        scores = Tensor(np.matmul(Q.data, np.transpose(K.data, (0, 1, 3, 2))))
-        scores = scores * (1.0 / math.sqrt(self.head_dim))
-
-        # Apply causal mask for autoregressive generation
-        if mask is not None:
-            scores = Tensor(scores.data + mask.data)
-
-        # Softmax
-        attention_weights = self._softmax(scores)
-
-        # Apply attention to values
-        out = Tensor(np.matmul(attention_weights.data, V.data))
-
-        # Transpose back and reshape
-        out = Tensor(np.transpose(out.data, (0, 2, 1, 3)))
-        out = out.reshape(batch_size, seq_len, embed_dim)
-
-        # Final linear projection
-        return self.out_proj.forward(out)
-
-    def _softmax(self, x):
-        """Numerically stable softmax."""
-        exp_x = Tensor(np.exp(x.data - np.max(x.data, axis=-1, keepdims=True)))
-        return Tensor(exp_x.data / np.sum(exp_x.data, axis=-1, keepdims=True))
-
-    def parameters(self):
-        params = []
-        params.extend(self.q_proj.parameters())
-        params.extend(self.k_proj.parameters())
-        params.extend(self.v_proj.parameters())
-        params.extend(self.out_proj.parameters())
-        return params
-
-class Embedding:
-    """Minimal Embedding layer - imports from Module 11 in practice."""
-    def __init__(self, vocab_size, embed_dim):
-        self.vocab_size = vocab_size
-        self.embed_dim = embed_dim
-        # Initialize with small random values
-        self.weight = Tensor(np.random.normal(0, 0.02, (vocab_size, embed_dim)))
-
-    def forward(self, indices):
-        # Simple embedding lookup
-        return Tensor(self.weight.data[indices.data])
-
-    def parameters(self):
-        return [self.weight]
-
-def gelu(x):
-    """GELU activation function."""
-    return Tensor(0.5 * x.data * (1 + np.tanh(np.sqrt(2 / np.pi) * (x.data + 0.044715 * x.data**3))))
diff --git a/tinytorch/optimization/acceleration.py b/tinytorch/optimization/acceleration.py
deleted file mode 100644
index d0ca1d13..00000000
--- a/tinytorch/optimization/acceleration.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/16_acceleration/acceleration_dev.ipynb.
-
-# %% auto 0
-__all__ = []
-
-# %% ../../modules/source/16_acceleration/acceleration_dev.ipynb 0
-#| default_exp optimization.acceleration
-#| export
diff --git a/tinytorch/optimization/compression.py b/tinytorch/optimization/compression.py
deleted file mode 100644
index 01ef28ae..00000000
--- a/tinytorch/optimization/compression.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/18_compression/compression_dev.ipynb.
-
-# %% auto 0
-__all__ = ['Tensor', 'Linear', 'Sequential']
-
-# %% ../../modules/source/18_compression/compression_dev.ipynb 1
-import numpy as np
-import copy
-from typing import List, Dict, Any, Tuple, Optional
-import time
-
-# Import from previous modules
-# Note: In the full package, these would be imports like:
-# from tinytorch.core.tensor import Tensor
-# from tinytorch.core.layers import Linear
-# For development, we'll create minimal implementations
-
-class Tensor:
-    """Minimal Tensor class for compression development - imports from Module 01 in practice."""
-    def __init__(self, data, requires_grad=False):
-        self.data = np.array(data)
-        self.shape = self.data.shape
-        self.size = self.data.size
-        self.requires_grad = requires_grad
-        self.grad = None
-
-    def __add__(self, other):
-        if isinstance(other, Tensor):
-            return Tensor(self.data + other.data)
-        return Tensor(self.data + other)
-
-    def __mul__(self, other):
-        if isinstance(other, Tensor):
-            return Tensor(self.data * other.data)
-        return Tensor(self.data * other)
-
-    def matmul(self, other):
-        return Tensor(np.dot(self.data, other.data))
-
-    def abs(self):
-        return Tensor(np.abs(self.data))
-
-    def sum(self, axis=None):
-        return Tensor(self.data.sum(axis=axis))
-
-    def __repr__(self):
-        return f"Tensor(shape={self.shape})"
-
-class Linear:
-    """Minimal Linear layer for compression development - imports from Module 03 in practice."""
-    def __init__(self, in_features, out_features, bias=True):
-        self.in_features = in_features
-        self.out_features = out_features
-        # Initialize with He initialization
-        self.weight = Tensor(np.random.randn(in_features, out_features) * np.sqrt(2.0 / in_features))
-        self.bias = Tensor(np.zeros(out_features)) if bias else None
-
-    def forward(self, x):
-        output = x.matmul(self.weight)
-        if self.bias is not None:
-            output = output + self.bias
-        return output
-
-    def parameters(self):
-        params = [self.weight]
-        if self.bias is not None:
-            params.append(self.bias)
-        return params
-
-class Sequential:
-    """Minimal Sequential container for model compression."""
-    def __init__(self, *layers):
-        self.layers = list(layers)
-
-    def forward(self, x):
-        for layer in self.layers:
-            x = layer.forward(x)
-        return x
-
-    def parameters(self):
-        params = []
-        for layer in self.layers:
-            if hasattr(layer, 'parameters'):
-                params.extend(layer.parameters())
-        return params
diff --git a/tinytorch/optimization/quantization.py b/tinytorch/optimization/quantization.py
deleted file mode 100644
index f5bc7cfd..00000000
--- a/tinytorch/optimization/quantization.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/17_quantization/quantization_dev.ipynb.
-
-# %% auto 0
-__all__ = []
-
-# %% ../../modules/source/17_quantization/quantization_dev.ipynb 0
-#| default_exp optimization.quantization
-#| export
diff --git a/tinytorch/profiling/profiler.py b/tinytorch/profiling/profiler.py
deleted file mode 100644
index 82aca521..00000000
--- a/tinytorch/profiling/profiler.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/15_profiling/profiling_dev.ipynb.
-
-# %% auto 0
-__all__ = []
-
-# %% ../../modules/source/15_profiling/profiling_dev.ipynb 1
-import time
-import numpy as np
-import tracemalloc
-from typing import Dict, List, Any, Optional, Tuple
-from collections import defaultdict
-import gc
-
-# Import our TinyTorch components for profiling
-import sys
-import os
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '09_spatial'))
-
-# For testing purposes - in real package these would be proper imports
-try:
-    from tensor_dev import Tensor
-    from layers_dev import Linear, Sequential
-    from spatial_dev import Conv2d
-except ImportError:
-    # Fallback - create minimal implementations for testing
-    class Tensor:
-        def __init__(self, data):
-            self.data = np.array(data)
-            self.shape = self.data.shape
-        def __mul__(self, other):
-            return Tensor(self.data * other.data)
-        def sum(self):
-            return Tensor(np.sum(self.data))
diff --git a/tinytorch/text/tokenization.py b/tinytorch/text/tokenization.py
deleted file mode 100644
index 10a40e63..00000000
--- a/tinytorch/text/tokenization.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/10_tokenization/tokenization_dev.ipynb.
-
-# %% auto 0
-__all__ = []
-
-# %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 0
-#| default_exp text.tokenization
-#| export