Reset package and export modules 01-07 only (skip broken spatial module)

This commit is contained in:
Vijay Janapa Reddi
2025-09-30 13:41:00 -04:00
parent 9a3373f406
commit ba6bd79a67
24 changed files with 807 additions and 647 deletions

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "6ca4b9f5",
"id": "22bf7b48",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -51,7 +51,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "3dcaaffc",
"id": "7d24677b",
"metadata": {
"nbgrader": {
"grade": false,
@@ -69,7 +69,7 @@
},
{
"cell_type": "markdown",
"id": "e70ae12a",
"id": "447a0b7a",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -116,7 +116,7 @@
},
{
"cell_type": "markdown",
"id": "7a1e48b5",
"id": "c2b4bc17",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -175,7 +175,7 @@
},
{
"cell_type": "markdown",
"id": "42f2279e",
"id": "1dc8a950",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -214,7 +214,7 @@
},
{
"cell_type": "markdown",
"id": "cb1e99f0",
"id": "334562a5",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -252,7 +252,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "4a090be0",
"id": "27b3b08d",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -724,7 +724,7 @@
},
{
"cell_type": "markdown",
"id": "a49cddfd",
"id": "345f0782",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -742,7 +742,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "79195fe8",
"id": "503244d4",
"metadata": {
"nbgrader": {
"grade": true,
@@ -791,7 +791,7 @@
},
{
"cell_type": "markdown",
"id": "7cbed527",
"id": "5176cde0",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -839,7 +839,7 @@
},
{
"cell_type": "markdown",
"id": "30f53e64",
"id": "45461424",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 2
@@ -882,7 +882,7 @@
},
{
"cell_type": "markdown",
"id": "e13b5c91",
"id": "7ba6f505",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -900,7 +900,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "26ab9e58",
"id": "9471ca95",
"metadata": {
"nbgrader": {
"grade": true,
@@ -957,7 +957,7 @@
},
{
"cell_type": "markdown",
"id": "8ab4eb75",
"id": "453ed0e5",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 2
@@ -1057,7 +1057,7 @@
},
{
"cell_type": "markdown",
"id": "75e72654",
"id": "ae6dca6f",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -1075,7 +1075,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "434f6550",
"id": "d1bf193f",
"metadata": {
"nbgrader": {
"grade": true,
@@ -1132,7 +1132,7 @@
},
{
"cell_type": "markdown",
"id": "de04fa2e",
"id": "23a70fb2",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 2
@@ -1235,7 +1235,7 @@
},
{
"cell_type": "markdown",
"id": "2f4cd90a",
"id": "a320a34f",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -1253,7 +1253,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "e497f3d1",
"id": "04a65af9",
"metadata": {
"nbgrader": {
"grade": true,
@@ -1323,7 +1323,7 @@
},
{
"cell_type": "markdown",
"id": "c944cd8b",
"id": "509140c2",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 2
@@ -1417,7 +1417,7 @@
},
{
"cell_type": "markdown",
"id": "e8312574",
"id": "21664f47",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -1435,7 +1435,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "66d6beb6",
"id": "5f9edd66",
"metadata": {
"nbgrader": {
"grade": true,
@@ -1508,7 +1508,7 @@
},
{
"cell_type": "markdown",
"id": "71042cd1",
"id": "8b900870",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 2
@@ -1583,7 +1583,7 @@
},
{
"cell_type": "markdown",
"id": "9d5518b2",
"id": "a98400bf",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 2
@@ -1644,7 +1644,7 @@
},
{
"cell_type": "markdown",
"id": "23b79c43",
"id": "c74f78e6",
"metadata": {
"lines_to_next_cell": 1
},
@@ -1666,7 +1666,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c8fdde9c",
"id": "6766dc8a",
"metadata": {
"lines_to_next_cell": 2,
"nbgrader": {
@@ -1794,7 +1794,7 @@
},
{
"cell_type": "markdown",
"id": "f0f02362",
"id": "602da67a",
"metadata": {
"cell_marker": "\"\"\""
},

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "8f7f092b",
"id": "41637b5b",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -34,7 +34,7 @@
},
{
"cell_type": "markdown",
"id": "ba7543b3",
"id": "eb80f71c",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -59,7 +59,7 @@
},
{
"cell_type": "markdown",
"id": "5f04cb4a",
"id": "ad445b19",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -78,7 +78,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "3042497e",
"id": "7fc4b3ae",
"metadata": {
"nbgrader": {
"grade": false,
@@ -102,7 +102,7 @@
},
{
"cell_type": "markdown",
"id": "609861d1",
"id": "6c49b0a7",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -144,7 +144,7 @@
},
{
"cell_type": "markdown",
"id": "9b7b4834",
"id": "a82d5ffc",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -166,7 +166,7 @@
},
{
"cell_type": "markdown",
"id": "29845a4a",
"id": "d954190f",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -190,7 +190,7 @@
},
{
"cell_type": "markdown",
"id": "d5fc598c",
"id": "1d26aa84",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -228,7 +228,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "b6cb596e",
"id": "cd112f28",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -287,7 +287,7 @@
},
{
"cell_type": "markdown",
"id": "84674501",
"id": "87407a56",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -303,7 +303,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "3fbc497a",
"id": "8599e53a",
"metadata": {
"nbgrader": {
"grade": true,
@@ -344,7 +344,7 @@
},
{
"cell_type": "markdown",
"id": "a076a2f1",
"id": "96438263",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -386,7 +386,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "a079c21f",
"id": "6bdad44d",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -442,7 +442,7 @@
},
{
"cell_type": "markdown",
"id": "8cf41efa",
"id": "853265df",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -458,7 +458,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "9bf84e40",
"id": "e3f2e5fd",
"metadata": {
"nbgrader": {
"grade": true,
@@ -505,7 +505,7 @@
},
{
"cell_type": "markdown",
"id": "26a36cf2",
"id": "d137e456",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -544,7 +544,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "a93086db",
"id": "3a3ec4c5",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -600,7 +600,7 @@
},
{
"cell_type": "markdown",
"id": "4488836b",
"id": "b2ad2baa",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -616,7 +616,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "46388fa5",
"id": "b92572ae",
"metadata": {
"nbgrader": {
"grade": true,
@@ -664,7 +664,7 @@
},
{
"cell_type": "markdown",
"id": "f341ff48",
"id": "d1cdd503",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -707,7 +707,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "a9684ba3",
"id": "90f15779",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -768,7 +768,7 @@
},
{
"cell_type": "markdown",
"id": "7c8ef48f",
"id": "eb655b3b",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -784,7 +784,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c42d7ec8",
"id": "838060ac",
"metadata": {
"nbgrader": {
"grade": true,
@@ -832,7 +832,7 @@
},
{
"cell_type": "markdown",
"id": "ba4edcdd",
"id": "a8047ea8",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -870,7 +870,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "2821ef9e",
"id": "aa266bb7",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -942,7 +942,7 @@
},
{
"cell_type": "markdown",
"id": "70c31533",
"id": "80e6ad27",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -958,7 +958,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "852f5832",
"id": "f3db3810",
"metadata": {
"nbgrader": {
"grade": true,
@@ -1016,7 +1016,7 @@
},
{
"cell_type": "markdown",
"id": "b0107716",
"id": "2db83cef",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 2
@@ -1029,7 +1029,7 @@
},
{
"cell_type": "markdown",
"id": "87c16b51",
"id": "428eaa1b",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -1049,7 +1049,7 @@
},
{
"cell_type": "markdown",
"id": "0a812659",
"id": "fe7666b9",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -1063,7 +1063,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "0015101e",
"id": "fac9ee55",
"metadata": {
"lines_to_next_cell": 2,
"nbgrader": {
@@ -1162,7 +1162,7 @@
},
{
"cell_type": "markdown",
"id": "d0575067",
"id": "6a9cc930",
"metadata": {
"cell_marker": "\"\"\""
},

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "2cfa2aae",
"id": "46b4a258",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -53,7 +53,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c7a84c05",
"id": "bc3a80e9",
"metadata": {
"nbgrader": {
"grade": false,
@@ -77,7 +77,7 @@
},
{
"cell_type": "markdown",
"id": "e52c72c2",
"id": "76d31667",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -101,7 +101,7 @@
},
{
"cell_type": "markdown",
"id": "e05eee85",
"id": "e0421bae",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -139,7 +139,7 @@
},
{
"cell_type": "markdown",
"id": "f489f983",
"id": "6670b0b1",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -160,7 +160,7 @@
},
{
"cell_type": "markdown",
"id": "fff4865c",
"id": "2dc8d8c8",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -211,7 +211,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "da931144",
"id": "a973eb44",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -335,7 +335,7 @@
},
{
"cell_type": "markdown",
"id": "77988775",
"id": "d4cbdf9d",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -351,7 +351,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "4c2e0b2e",
"id": "174fe10a",
"metadata": {
"nbgrader": {
"grade": true,
@@ -411,7 +411,7 @@
},
{
"cell_type": "markdown",
"id": "d2fa31b7",
"id": "e961f791",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -473,7 +473,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "88715659",
"id": "b924d865",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -556,6 +556,10 @@
" return Tensor(output_data)\n",
" ### END SOLUTION\n",
"\n",
" def __call__(self, x, training=True):\n",
" \"\"\"Allows the layer to be called like a function.\"\"\"\n",
" return self.forward(x, training)\n",
"\n",
" def parameters(self):\n",
" \"\"\"Dropout has no parameters.\"\"\"\n",
" return []\n",
@@ -566,7 +570,7 @@
},
{
"cell_type": "markdown",
"id": "0d33ff6f",
"id": "ee0bc9a1",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -582,7 +586,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "cb0b7ae8",
"id": "c76974a1",
"metadata": {
"nbgrader": {
"grade": true,
@@ -658,7 +662,7 @@
},
{
"cell_type": "markdown",
"id": "4b4aac2f",
"id": "231dae31",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 2
@@ -718,7 +722,7 @@
},
{
"cell_type": "markdown",
"id": "0f4f3b7d",
"id": "bbc4aad9",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -777,7 +781,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "99df0451",
"id": "0ca58dc7",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -832,7 +836,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "5e82e08c",
"id": "a9b7ae8a",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -873,7 +877,7 @@
},
{
"cell_type": "markdown",
"id": "85ea5db5",
"id": "5570a366",
"metadata": {
"lines_to_next_cell": 1
},
@@ -895,7 +899,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "ebddc165",
"id": "b2e11bf8",
"metadata": {
"lines_to_next_cell": 2,
"nbgrader": {
@@ -986,7 +990,7 @@
},
{
"cell_type": "markdown",
"id": "31c54ee3",
"id": "4c9212f9",
"metadata": {
"cell_marker": "\"\"\""
},

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "7a46b4ac",
"id": "9d798b1c",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -35,7 +35,7 @@
},
{
"cell_type": "markdown",
"id": "95565c7e",
"id": "91804987",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -59,7 +59,7 @@
},
{
"cell_type": "markdown",
"id": "1c9b95bd",
"id": "c09dc686",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -80,7 +80,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "054331fd",
"id": "51189bc1",
"metadata": {
"nbgrader": {
"grade": false,
@@ -113,7 +113,7 @@
},
{
"cell_type": "markdown",
"id": "e60f5944",
"id": "cc227c2d",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -189,7 +189,7 @@
},
{
"cell_type": "markdown",
"id": "2a8ac601",
"id": "49e5039b",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -235,7 +235,7 @@
},
{
"cell_type": "markdown",
"id": "3e628237",
"id": "b1e1cbd0",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -247,7 +247,7 @@
},
{
"cell_type": "markdown",
"id": "84a9e420",
"id": "820e9937",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -297,7 +297,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c570112b",
"id": "854758b3",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -348,7 +348,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "7e637f17",
"id": "6b57e650",
"metadata": {
"nbgrader": {
"grade": true,
@@ -389,7 +389,7 @@
},
{
"cell_type": "markdown",
"id": "54c1f877",
"id": "b8be9f2c",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -459,7 +459,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "24575e1c",
"id": "aca5154a",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -531,7 +531,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "f7e6aa15",
"id": "7391538b",
"metadata": {
"nbgrader": {
"grade": true,
@@ -577,7 +577,7 @@
},
{
"cell_type": "markdown",
"id": "cde7a8f4",
"id": "0b9b254c",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -670,7 +670,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "0e3a2600",
"id": "eb59fb50",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -746,7 +746,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "45e2f990",
"id": "c59fbbfd",
"metadata": {
"nbgrader": {
"grade": true,
@@ -797,7 +797,7 @@
},
{
"cell_type": "markdown",
"id": "30fd6c76",
"id": "599727d1",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -906,7 +906,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "3bd407b8",
"id": "54a20f3f",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -982,7 +982,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "985dd530",
"id": "1bab9d23",
"metadata": {
"nbgrader": {
"grade": true,
@@ -1033,7 +1033,7 @@
},
{
"cell_type": "markdown",
"id": "d7c0a96b",
"id": "ca40b581",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -1090,7 +1090,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c5baee20",
"id": "76b4eb81",
"metadata": {
"nbgrader": {
"grade": false,
@@ -1146,7 +1146,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "e2216e97",
"id": "b90c91f0",
"metadata": {
"nbgrader": {
"grade": false,
@@ -1211,7 +1211,7 @@
},
{
"cell_type": "markdown",
"id": "435562a9",
"id": "e2fc1aa7",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -1286,7 +1286,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "0e2b66af",
"id": "573fa75d",
"metadata": {
"nbgrader": {
"grade": false,
@@ -1336,7 +1336,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "518ead17",
"id": "b7f12c78",
"metadata": {
"nbgrader": {
"grade": false,
@@ -1393,7 +1393,7 @@
},
{
"cell_type": "markdown",
"id": "7d0c2aa4",
"id": "4c6ebac9",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -1457,7 +1457,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "7345a14f",
"id": "d0b635c1",
"metadata": {
"nbgrader": {
"grade": false,
@@ -1513,7 +1513,7 @@
},
{
"cell_type": "markdown",
"id": "73d7096f",
"id": "d770e887",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -1526,7 +1526,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "5a5f3a29",
"id": "55fd411d",
"metadata": {
"nbgrader": {
"grade": true,
@@ -1606,7 +1606,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "6e91917e",
"id": "b66f2370",
"metadata": {
"lines_to_next_cell": 2
},
@@ -1619,7 +1619,7 @@
},
{
"cell_type": "markdown",
"id": "16461a93",
"id": "ce0d9c33",
"metadata": {
"cell_marker": "\"\"\""
},

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "3d8f1c62",
"id": "518b6ae0",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -51,7 +51,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "a2b41da9",
"id": "30bbc6f8",
"metadata": {
"nbgrader": {
"grade": false,
@@ -68,15 +68,12 @@
"from typing import List, Union, Optional, Dict, Any\n",
"\n",
"# Import Tensor from Module 01 (now with gradient support from Module 05)\n",
"import sys\n",
"import os\n",
"sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))\n",
"from tensor_dev import Tensor"
"from tinytorch.core.tensor import Tensor"
]
},
{
"cell_type": "markdown",
"id": "3169e215",
"id": "9057f3bf",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -133,7 +130,7 @@
},
{
"cell_type": "markdown",
"id": "baec0321",
"id": "3b2f074e",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -219,7 +216,7 @@
},
{
"cell_type": "markdown",
"id": "49716b34",
"id": "3000c581",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -247,7 +244,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "06d956dd",
"id": "d9343aa4",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -258,6 +255,7 @@
},
"outputs": [],
"source": [
"#| export\n",
"class Optimizer:\n",
" \"\"\"\n",
" Base class for all optimizers.\n",
@@ -332,7 +330,7 @@
},
{
"cell_type": "markdown",
"id": "82015c9d",
"id": "0ded4383",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -348,7 +346,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c0f57a08",
"id": "25d61648",
"metadata": {
"nbgrader": {
"grade": true,
@@ -401,7 +399,7 @@
},
{
"cell_type": "markdown",
"id": "7d9b8ceb",
"id": "bf5adabc",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -473,7 +471,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "ae4679bb",
"id": "12f0f4b6",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -484,6 +482,7 @@
},
"outputs": [],
"source": [
"#| export\n",
"class SGD(Optimizer):\n",
" \"\"\"\n",
" Stochastic Gradient Descent with momentum.\n",
@@ -576,7 +575,7 @@
},
{
"cell_type": "markdown",
"id": "ced264d8",
"id": "815d0bab",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -592,7 +591,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "68ae4ccf",
"id": "c01ebc69",
"metadata": {
"nbgrader": {
"grade": true,
@@ -659,7 +658,7 @@
},
{
"cell_type": "markdown",
"id": "480929e4",
"id": "c656b1b4",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -733,7 +732,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "2d7e339f",
"id": "b545ed16",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -744,6 +743,7 @@
},
"outputs": [],
"source": [
"#| export\n",
"class Adam(Optimizer):\n",
" \"\"\"\n",
" Adam optimizer with adaptive learning rates.\n",
@@ -853,7 +853,7 @@
},
{
"cell_type": "markdown",
"id": "6f114c5b",
"id": "b688bced",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -869,7 +869,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "7f64abcc",
"id": "61fa7116",
"metadata": {
"nbgrader": {
"grade": true,
@@ -945,7 +945,7 @@
},
{
"cell_type": "markdown",
"id": "16ccfeaa",
"id": "7cb028b2",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -1019,7 +1019,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "23c16f99",
"id": "277056cc",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -1030,6 +1030,7 @@
},
"outputs": [],
"source": [
"#| export\n",
"class AdamW(Optimizer):\n",
" \"\"\"\n",
" AdamW optimizer with decoupled weight decay.\n",
@@ -1133,7 +1134,7 @@
},
{
"cell_type": "markdown",
"id": "0269f86a",
"id": "d59b1b2b",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -1149,7 +1150,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "016d7b36",
"id": "619464ee",
"metadata": {
"nbgrader": {
"grade": true,
@@ -1224,7 +1225,7 @@
},
{
"cell_type": "markdown",
"id": "295d5ee6",
"id": "29f5ad7b",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 2
@@ -1251,7 +1252,7 @@
},
{
"cell_type": "markdown",
"id": "47d676c3",
"id": "9dd160f5",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -1297,7 +1298,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "67290db6",
"id": "ab882d12",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -1355,7 +1356,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "21136a44",
"id": "abac74aa",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -1434,7 +1435,7 @@
},
{
"cell_type": "markdown",
"id": "b171c224",
"id": "146f209d",
"metadata": {
"lines_to_next_cell": 1
},
@@ -1456,7 +1457,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "46ae99ae",
"id": "1726f746",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -1607,7 +1608,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "896f4c69",
"id": "7328ac69",
"metadata": {},
"outputs": [],
"source": [
@@ -1618,7 +1619,7 @@
},
{
"cell_type": "markdown",
"id": "35b39338",
"id": "c662a5f7",
"metadata": {
"cell_marker": "\"\"\""
},

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "765eea82",
"id": "78521710",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -52,7 +52,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "38b1402a",
"id": "d912bff5",
"metadata": {
"nbgrader": {
"grade": false,
@@ -75,22 +75,15 @@
"import os\n",
"\n",
"# Import dependencies from other modules\n",
"sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))\n",
"from tensor_dev import Tensor\n",
"\n",
"sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))\n",
"from layers_dev import Linear\n",
"\n",
"sys.path.append(os.path.join(os.path.dirname(__file__), '..', '04_losses'))\n",
"from losses_dev import MSELoss, CrossEntropyLoss\n",
"\n",
"sys.path.append(os.path.join(os.path.dirname(__file__), '..', '06_optimizers'))\n",
"from optimizers_dev import SGD, AdamW"
"from tinytorch.core.tensor import Tensor\n",
"from tinytorch.core.layers import Linear\n",
"from tinytorch.core.losses import MSELoss, CrossEntropyLoss\n",
"from tinytorch.core.optimizers import SGD, AdamW"
]
},
{
"cell_type": "markdown",
"id": "89550fb8",
"id": "2f4fc27e",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -119,7 +112,7 @@
},
{
"cell_type": "markdown",
"id": "d0b48f7a",
"id": "4fa19758",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -166,7 +159,7 @@
},
{
"cell_type": "markdown",
"id": "4ed8a995",
"id": "8599a0f1",
"metadata": {
"cell_marker": "\"\"\""
},
@@ -180,7 +173,7 @@
},
{
"cell_type": "markdown",
"id": "ebfa93fc",
"id": "ed5a85db",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -221,7 +214,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "347b09da",
"id": "9dac2b34",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -233,6 +226,7 @@
},
"outputs": [],
"source": [
"#| export\n",
"class CosineSchedule:\n",
" \"\"\"\n",
" Cosine annealing learning rate schedule.\n",
@@ -274,7 +268,7 @@
},
{
"cell_type": "markdown",
"id": "c1db4e03",
"id": "c146074f",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -290,7 +284,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c27f6878",
"id": "ee33397e",
"metadata": {
"nbgrader": {
"grade": true,
@@ -334,7 +328,7 @@
},
{
"cell_type": "markdown",
"id": "81fc482c",
"id": "da8efa9f",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -380,7 +374,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "db99efd3",
"id": "29a5573c",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -457,7 +451,7 @@
},
{
"cell_type": "markdown",
"id": "3b0b188d",
"id": "7c1510f3",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -473,7 +467,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "91bf937f",
"id": "754c9cd5",
"metadata": {
"nbgrader": {
"grade": true,
@@ -540,7 +534,7 @@
},
{
"cell_type": "markdown",
"id": "dde7833e",
"id": "a827fb93",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -597,7 +591,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "fa8339e1",
"id": "63354dd4",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -609,6 +603,7 @@
},
"outputs": [],
"source": [
"#| export\n",
"class Trainer:\n",
" \"\"\"\n",
" Complete training orchestrator for neural networks.\n",
@@ -875,7 +870,7 @@
},
{
"cell_type": "markdown",
"id": "529dfcf5",
"id": "9266bc60",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -891,7 +886,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "03510440",
"id": "8ce52aba",
"metadata": {
"nbgrader": {
"grade": true,
@@ -972,7 +967,7 @@
},
{
"cell_type": "markdown",
"id": "905180bd",
"id": "7ad86345",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 2
@@ -985,7 +980,7 @@
},
{
"cell_type": "markdown",
"id": "3c631938",
"id": "9953bcd4",
"metadata": {
"lines_to_next_cell": 1
},
@@ -1009,7 +1004,7 @@
},
{
"cell_type": "markdown",
"id": "8b65c5ab",
"id": "2eab95b6",
"metadata": {
"cell_marker": "\"\"\"",
"lines_to_next_cell": 1
@@ -1023,7 +1018,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "29eea538",
"id": "0580d838",
"metadata": {
"lines_to_next_cell": 1,
"nbgrader": {
@@ -1151,7 +1146,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "ae1bc4b9",
"id": "62eadf89",
"metadata": {
"nbgrader": {
"grade": false,
@@ -1169,7 +1164,7 @@
},
{
"cell_type": "markdown",
"id": "ad8ae396",
"id": "ebe885e5",
"metadata": {
"cell_marker": "\"\"\""
},

204
tinytorch/_modidx.py generated Normal file
View File

@@ -0,0 +1,204 @@
# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║ 🚨 CRITICAL WARNING 🚨 ║
# ║ AUTOGENERATED! DO NOT EDIT! ║
# ║ ║
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
# ║ ║
# ║ ✅ TO EDIT: modules/source/[unknown]/[unknown]_dev.py ║
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
# ║ ║
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
# ║ Editing it directly may break module functionality and training. ║
# ║ ║
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
# ║ happens! The tinytorch/ directory is just the compiled output. ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# Autogenerated by nbdev
d = { 'settings': { 'branch': 'main',
'doc_baseurl': '/TinyTorch/',
'doc_host': 'https://tinytorch.github.io',
'git_url': 'https://github.com/tinytorch/TinyTorch/',
'lib_path': 'tinytorch'},
'syms': { 'tinytorch.core.activations': { 'tinytorch.core.activations.GELU': ( '02_activations/activations_dev.html#gelu',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.GELU.__call__': ( '02_activations/activations_dev.html#gelu.__call__',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.GELU.backward': ( '02_activations/activations_dev.html#gelu.backward',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.GELU.forward': ( '02_activations/activations_dev.html#gelu.forward',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.ReLU': ( '02_activations/activations_dev.html#relu',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.ReLU.__call__': ( '02_activations/activations_dev.html#relu.__call__',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.ReLU.backward': ( '02_activations/activations_dev.html#relu.backward',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.ReLU.forward': ( '02_activations/activations_dev.html#relu.forward',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Sigmoid': ( '02_activations/activations_dev.html#sigmoid',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Sigmoid.__call__': ( '02_activations/activations_dev.html#sigmoid.__call__',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Sigmoid.backward': ( '02_activations/activations_dev.html#sigmoid.backward',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Sigmoid.forward': ( '02_activations/activations_dev.html#sigmoid.forward',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Softmax': ( '02_activations/activations_dev.html#softmax',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Softmax.__call__': ( '02_activations/activations_dev.html#softmax.__call__',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Softmax.backward': ( '02_activations/activations_dev.html#softmax.backward',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Softmax.forward': ( '02_activations/activations_dev.html#softmax.forward',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Tanh': ( '02_activations/activations_dev.html#tanh',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Tanh.__call__': ( '02_activations/activations_dev.html#tanh.__call__',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Tanh.backward': ( '02_activations/activations_dev.html#tanh.backward',
'tinytorch/core/activations.py'),
'tinytorch.core.activations.Tanh.forward': ( '02_activations/activations_dev.html#tanh.forward',
'tinytorch/core/activations.py')},
'tinytorch.core.autograd': {},
'tinytorch.core.layers': { 'tinytorch.core.layers.Dropout': ('03_layers/layers_dev.html#dropout', 'tinytorch/core/layers.py'),
'tinytorch.core.layers.Dropout.__call__': ( '03_layers/layers_dev.html#dropout.__call__',
'tinytorch/core/layers.py'),
'tinytorch.core.layers.Dropout.__init__': ( '03_layers/layers_dev.html#dropout.__init__',
'tinytorch/core/layers.py'),
'tinytorch.core.layers.Dropout.__repr__': ( '03_layers/layers_dev.html#dropout.__repr__',
'tinytorch/core/layers.py'),
'tinytorch.core.layers.Dropout.forward': ( '03_layers/layers_dev.html#dropout.forward',
'tinytorch/core/layers.py'),
'tinytorch.core.layers.Dropout.parameters': ( '03_layers/layers_dev.html#dropout.parameters',
'tinytorch/core/layers.py'),
'tinytorch.core.layers.Linear': ('03_layers/layers_dev.html#linear', 'tinytorch/core/layers.py'),
'tinytorch.core.layers.Linear.__call__': ( '03_layers/layers_dev.html#linear.__call__',
'tinytorch/core/layers.py'),
'tinytorch.core.layers.Linear.__init__': ( '03_layers/layers_dev.html#linear.__init__',
'tinytorch/core/layers.py'),
'tinytorch.core.layers.Linear.__repr__': ( '03_layers/layers_dev.html#linear.__repr__',
'tinytorch/core/layers.py'),
'tinytorch.core.layers.Linear.forward': ( '03_layers/layers_dev.html#linear.forward',
'tinytorch/core/layers.py'),
'tinytorch.core.layers.Linear.parameters': ( '03_layers/layers_dev.html#linear.parameters',
'tinytorch/core/layers.py')},
'tinytorch.core.losses': { 'tinytorch.core.losses.BinaryCrossEntropyLoss': ( '04_losses/losses_dev.html#binarycrossentropyloss',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.BinaryCrossEntropyLoss.__call__': ( '04_losses/losses_dev.html#binarycrossentropyloss.__call__',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.BinaryCrossEntropyLoss.__init__': ( '04_losses/losses_dev.html#binarycrossentropyloss.__init__',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.BinaryCrossEntropyLoss.backward': ( '04_losses/losses_dev.html#binarycrossentropyloss.backward',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.BinaryCrossEntropyLoss.forward': ( '04_losses/losses_dev.html#binarycrossentropyloss.forward',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.CrossEntropyLoss': ( '04_losses/losses_dev.html#crossentropyloss',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.CrossEntropyLoss.__call__': ( '04_losses/losses_dev.html#crossentropyloss.__call__',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.CrossEntropyLoss.__init__': ( '04_losses/losses_dev.html#crossentropyloss.__init__',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.CrossEntropyLoss.backward': ( '04_losses/losses_dev.html#crossentropyloss.backward',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.CrossEntropyLoss.forward': ( '04_losses/losses_dev.html#crossentropyloss.forward',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.MSELoss': ('04_losses/losses_dev.html#mseloss', 'tinytorch/core/losses.py'),
'tinytorch.core.losses.MSELoss.__call__': ( '04_losses/losses_dev.html#mseloss.__call__',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.MSELoss.__init__': ( '04_losses/losses_dev.html#mseloss.__init__',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.MSELoss.backward': ( '04_losses/losses_dev.html#mseloss.backward',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.MSELoss.forward': ( '04_losses/losses_dev.html#mseloss.forward',
'tinytorch/core/losses.py'),
'tinytorch.core.losses.import_previous_module': ( '04_losses/losses_dev.html#import_previous_module',
'tinytorch/core/losses.py')},
'tinytorch.core.optimizers': { 'tinytorch.core.optimizers.Adam': ( '06_optimizers/optimizers_dev.html#adam',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.Adam.__init__': ( '06_optimizers/optimizers_dev.html#adam.__init__',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.Adam.step': ( '06_optimizers/optimizers_dev.html#adam.step',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.AdamW': ( '06_optimizers/optimizers_dev.html#adamw',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.AdamW.__init__': ( '06_optimizers/optimizers_dev.html#adamw.__init__',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.AdamW.step': ( '06_optimizers/optimizers_dev.html#adamw.step',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.Optimizer': ( '06_optimizers/optimizers_dev.html#optimizer',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.Optimizer.__init__': ( '06_optimizers/optimizers_dev.html#optimizer.__init__',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.Optimizer.step': ( '06_optimizers/optimizers_dev.html#optimizer.step',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.Optimizer.zero_grad': ( '06_optimizers/optimizers_dev.html#optimizer.zero_grad',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.SGD': ( '06_optimizers/optimizers_dev.html#sgd',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.SGD.__init__': ( '06_optimizers/optimizers_dev.html#sgd.__init__',
'tinytorch/core/optimizers.py'),
'tinytorch.core.optimizers.SGD.step': ( '06_optimizers/optimizers_dev.html#sgd.step',
'tinytorch/core/optimizers.py')},
'tinytorch.core.tensor': { 'tinytorch.core.tensor.Tensor': ('01_tensor/tensor_dev.html#tensor', 'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.__add__': ( '01_tensor/tensor_dev.html#tensor.__add__',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.__init__': ( '01_tensor/tensor_dev.html#tensor.__init__',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.__mul__': ( '01_tensor/tensor_dev.html#tensor.__mul__',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.__repr__': ( '01_tensor/tensor_dev.html#tensor.__repr__',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.__str__': ( '01_tensor/tensor_dev.html#tensor.__str__',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.__sub__': ( '01_tensor/tensor_dev.html#tensor.__sub__',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.__truediv__': ( '01_tensor/tensor_dev.html#tensor.__truediv__',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.backward': ( '01_tensor/tensor_dev.html#tensor.backward',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.matmul': ( '01_tensor/tensor_dev.html#tensor.matmul',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.max': ( '01_tensor/tensor_dev.html#tensor.max',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.mean': ( '01_tensor/tensor_dev.html#tensor.mean',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.numpy': ( '01_tensor/tensor_dev.html#tensor.numpy',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.reshape': ( '01_tensor/tensor_dev.html#tensor.reshape',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.sum': ( '01_tensor/tensor_dev.html#tensor.sum',
'tinytorch/core/tensor.py'),
'tinytorch.core.tensor.Tensor.transpose': ( '01_tensor/tensor_dev.html#tensor.transpose',
'tinytorch/core/tensor.py')},
'tinytorch.core.training': { 'tinytorch.core.training.CosineSchedule': ( '07_training/training_dev.html#cosineschedule',
'tinytorch/core/training.py'),
'tinytorch.core.training.CosineSchedule.__init__': ( '07_training/training_dev.html#cosineschedule.__init__',
'tinytorch/core/training.py'),
'tinytorch.core.training.CosineSchedule.get_lr': ( '07_training/training_dev.html#cosineschedule.get_lr',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer': ( '07_training/training_dev.html#trainer',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer.__init__': ( '07_training/training_dev.html#trainer.__init__',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer._get_model_state': ( '07_training/training_dev.html#trainer._get_model_state',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer._get_optimizer_state': ( '07_training/training_dev.html#trainer._get_optimizer_state',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer._get_scheduler_state': ( '07_training/training_dev.html#trainer._get_scheduler_state',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer._set_model_state': ( '07_training/training_dev.html#trainer._set_model_state',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer._set_optimizer_state': ( '07_training/training_dev.html#trainer._set_optimizer_state',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer._set_scheduler_state': ( '07_training/training_dev.html#trainer._set_scheduler_state',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer.evaluate': ( '07_training/training_dev.html#trainer.evaluate',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer.load_checkpoint': ( '07_training/training_dev.html#trainer.load_checkpoint',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer.save_checkpoint': ( '07_training/training_dev.html#trainer.save_checkpoint',
'tinytorch/core/training.py'),
'tinytorch.core.training.Trainer.train_epoch': ( '07_training/training_dev.html#trainer.train_epoch',
'tinytorch/core/training.py')}}}

View File

@@ -1,8 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/20_capstone/capstone_dev.ipynb.
# %% auto 0
__all__ = []
# %% ../../modules/source/20_capstone/capstone_dev.ipynb 2
#| default_exp applications.tinygpt
#| export

View File

@@ -1,8 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/19_benchmarking/benchmarking_dev.ipynb.
# %% auto 0
__all__ = []
# %% ../../modules/source/19_benchmarking/benchmarking_dev.ipynb 0
#| default_exp benchmarking.benchmark
#| export

View File

@@ -1,5 +1,19 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/02_activations/activations_dev.ipynb.
# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║ 🚨 CRITICAL WARNING 🚨 ║
# ║ AUTOGENERATED! DO NOT EDIT! ║
# ║ ║
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
# ║ ║
# ║ ✅ TO EDIT: modules/source/03_activations/activations_dev.py ║
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
# ║ ║
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
# ║ Editing it directly may break module functionality and training. ║
# ║ ║
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
# ║ happens! The tinytorch/ directory is just the compiled output. ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# %% auto 0
__all__ = ['Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax']

View File

@@ -1,8 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/12_attention/attention_dev.ipynb.
# %% auto 0
__all__ = []
# %% ../../modules/source/12_attention/attention_dev.ipynb 0
#| default_exp core.attention
#| export

View File

@@ -1,5 +1,19 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/03_layers/layers_dev.ipynb.
# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║ 🚨 CRITICAL WARNING 🚨 ║
# ║ AUTOGENERATED! DO NOT EDIT! ║
# ║ ║
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
# ║ ║
# ║ ✅ TO EDIT: modules/source/04_layers/layers_dev.py ║
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
# ║ ║
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
# ║ Editing it directly may break module functionality and training. ║
# ║ ║
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
# ║ happens! The tinytorch/ directory is just the compiled output. ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# %% auto 0
__all__ = ['Linear', 'Dropout']
@@ -194,6 +208,10 @@ class Dropout:
return Tensor(output_data)
### END SOLUTION
def __call__(self, x, training=True):
"""Allows the layer to be called like a function."""
return self.forward(x, training)
def parameters(self):
"""Dropout has no parameters."""
return []

View File

@@ -1,5 +1,19 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_losses/losses_dev.ipynb.
# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║ 🚨 CRITICAL WARNING 🚨 ║
# ║ AUTOGENERATED! DO NOT EDIT! ║
# ║ ║
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
# ║ ║
# ║ ✅ TO EDIT: modules/source/XX_losses/losses_dev.py ║
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
# ║ ║
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
# ║ Editing it directly may break module functionality and training. ║
# ║ ║
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
# ║ happens! The tinytorch/ directory is just the compiled output. ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# %% auto 0
__all__ = ['import_previous_module', 'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss']

View File

@@ -1,5 +1,19 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/06_optimizers/optimizers_dev.ipynb.
# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║ 🚨 CRITICAL WARNING 🚨 ║
# ║ AUTOGENERATED! DO NOT EDIT! ║
# ║ ║
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
# ║ ║
# ║ ✅ TO EDIT: modules/source/10_optimizers/optimizers_dev.py ║
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
# ║ ║
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
# ║ Editing it directly may break module functionality and training. ║
# ║ ║
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
# ║ happens! The tinytorch/ directory is just the compiled output. ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# %% auto 0
__all__ = ['Optimizer', 'SGD', 'Adam', 'AdamW']
@@ -7,10 +21,10 @@ __all__ = ['Optimizer', 'SGD', 'Adam', 'AdamW']
import numpy as np
from typing import List, Union, Optional, Dict, Any
# Import Tensor from Module 01
from tinytorch.core.tensor import Tensor
# Import Tensor from Module 01 (now with gradient support from Module 05)
from .tensor import Tensor
# %% Base Optimizer class
# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 5
class Optimizer:
"""
Base class for all optimizers.
@@ -37,6 +51,7 @@ class Optimizer:
HINT: Check that each parameter has requires_grad=True
"""
### BEGIN SOLUTION
# Validate and store parameters
if not isinstance(params, list):
params = list(params)
@@ -50,6 +65,7 @@ class Optimizer:
self.params = params
self.step_count = 0 # For algorithms that need step counting
### END SOLUTION
def zero_grad(self):
"""
@@ -67,8 +83,10 @@ class Optimizer:
WHY: Gradients accumulate by default, so we need to clear them between batches
"""
### BEGIN SOLUTION
for param in self.params:
param.grad = None
### END SOLUTION
def step(self):
"""
@@ -78,9 +96,7 @@ class Optimizer:
"""
raise NotImplementedError("Subclasses must implement step()")
# %% SGD Optimizer
# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 9
class SGD(Optimizer):
"""
Stochastic Gradient Descent with momentum.
@@ -108,6 +124,7 @@ class SGD(Optimizer):
- Momentum buffers should be initialized as None
- They'll be created lazily on first step
"""
### BEGIN SOLUTION
super().__init__(params)
self.lr = lr
@@ -116,6 +133,7 @@ class SGD(Optimizer):
# Initialize momentum buffers (created lazily)
self.momentum_buffers = [None for _ in self.params]
### END SOLUTION
def step(self):
"""
@@ -139,6 +157,7 @@ class SGD(Optimizer):
- Initialize momentum buffers on first use
- Use in-place operations to save memory
"""
### BEGIN SOLUTION
for i, param in enumerate(self.params):
if param.grad is None:
continue
@@ -165,10 +184,9 @@ class SGD(Optimizer):
# Increment step counter
self.step_count += 1
### END SOLUTION
# %% Adam Optimizer
# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 13
class Adam(Optimizer):
"""
Adam optimizer with adaptive learning rates.
@@ -198,6 +216,7 @@ class Adam(Optimizer):
EXAMPLE:
>>> optimizer = Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))
"""
### BEGIN SOLUTION
super().__init__(params)
self.lr = lr
@@ -208,6 +227,7 @@ class Adam(Optimizer):
# Initialize moment buffers (created lazily)
self.m_buffers = [None for _ in self.params] # First moment (mean)
self.v_buffers = [None for _ in self.params] # Second moment (variance)
### END SOLUTION
def step(self):
"""
@@ -235,6 +255,7 @@ class Adam(Optimizer):
- Use step_count for bias correction
- Square gradients element-wise for second moment
"""
### BEGIN SOLUTION
# Increment step counter first (needed for bias correction)
self.step_count += 1
@@ -270,10 +291,9 @@ class Adam(Optimizer):
# Update parameter
param.data = param.data - self.lr * m_hat / (np.sqrt(v_hat) + self.eps)
### END SOLUTION
# %% AdamW Optimizer
# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 17
class AdamW(Optimizer):
"""
AdamW optimizer with decoupled weight decay.
@@ -301,6 +321,7 @@ class AdamW(Optimizer):
EXAMPLE:
>>> optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
"""
### BEGIN SOLUTION
super().__init__(params)
self.lr = lr
@@ -311,6 +332,7 @@ class AdamW(Optimizer):
# Initialize moment buffers (same as Adam)
self.m_buffers = [None for _ in self.params]
self.v_buffers = [None for _ in self.params]
### END SOLUTION
def step(self):
"""
@@ -336,6 +358,7 @@ class AdamW(Optimizer):
HINT: Apply weight decay after gradient update for proper decoupling
"""
### BEGIN SOLUTION
# Increment step counter first
self.step_count += 1
@@ -369,4 +392,4 @@ class AdamW(Optimizer):
# Apply decoupled weight decay
if self.weight_decay != 0:
param.data = param.data * (1 - self.lr * self.weight_decay)
### END SOLUTION

View File

@@ -1,64 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/09_spatial/spatial_dev.ipynb.
# %% auto 0
__all__ = []
# %% ../../modules/source/09_spatial/spatial_dev.ipynb 1
import numpy as np
import sys
import os
import time
# Import dependencies from other modules
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
from tensor_dev import Tensor
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))
from layers_dev import Module
# Note: Keeping simplified implementations for reference during development
class _SimplifiedTensor:
"""Simplified tensor for spatial operations development."""
def __init__(self, data, requires_grad=False):
self.data = np.array(data, dtype=np.float32)
self.shape = self.data.shape
self.requires_grad = requires_grad
self.grad = None
def __repr__(self):
return f"Tensor(shape={self.shape}, data=\n{self.data})"
def __add__(self, other):
if isinstance(other, Tensor):
return Tensor(self.data + other.data)
return Tensor(self.data + other)
def __mul__(self, other):
if isinstance(other, Tensor):
return Tensor(self.data * other.data)
return Tensor(self.data * other)
def sum(self):
return Tensor(np.sum(self.data))
def mean(self):
return Tensor(np.mean(self.data))
# Create a simple Module base class for inheritance
class Module:
"""Simple base class for neural network modules."""
def __init__(self):
pass
def forward(self, x):
raise NotImplementedError("Subclasses must implement forward()")
def parameters(self):
"""Return list of parameters for this module."""
params = []
for attr_name in dir(self):
attr = getattr(self, attr_name)
if hasattr(attr, 'data') and hasattr(attr, 'requires_grad'):
params.append(attr)
return params

View File

@@ -1,5 +1,19 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/01_tensor/tensor_dev.ipynb.
# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║ 🚨 CRITICAL WARNING 🚨 ║
# ║ AUTOGENERATED! DO NOT EDIT! ║
# ║ ║
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
# ║ ║
# ║ ✅ TO EDIT: modules/source/02_tensor/tensor_dev.py ║
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
# ║ ║
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
# ║ Editing it directly may break module functionality and training. ║
# ║ ║
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
# ║ happens! The tinytorch/ directory is just the compiled output. ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# %% auto 0
__all__ = ['Tensor']

View File

@@ -1,7 +1,21 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/07_training/training_dev.ipynb.
# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║ 🚨 CRITICAL WARNING 🚨 ║
# ║ AUTOGENERATED! DO NOT EDIT! ║
# ║ ║
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
# ║ ║
# ║ ✅ TO EDIT: modules/source/11_training/training_dev.py ║
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
# ║ ║
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
# ║ Editing it directly may break module functionality and training. ║
# ║ ║
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
# ║ happens! The tinytorch/ directory is just the compiled output. ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# %% auto 0
__all__ = []
__all__ = ['CosineSchedule', 'Trainer']
# %% ../../modules/source/07_training/training_dev.ipynb 1
import numpy as np
@@ -13,14 +27,310 @@ import sys
import os
# Import dependencies from other modules
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
from tensor_dev import Tensor
from .tensor import Tensor
from .layers import Linear
from .losses import MSELoss, CrossEntropyLoss
from .optimizers import SGD, AdamW
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))
from layers_dev import Linear
# %% ../../modules/source/07_training/training_dev.ipynb 6
class CosineSchedule:
"""
Cosine annealing learning rate schedule.
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '04_losses'))
from losses_dev import MSELoss, CrossEntropyLoss
Starts at max_lr, decreases following a cosine curve to min_lr over T epochs.
This provides aggressive learning initially, then fine-tuning at the end.
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '06_optimizers'))
from optimizers_dev import SGD, AdamW
TODO: Implement cosine annealing schedule
APPROACH:
1. Store max_lr, min_lr, and total_epochs
2. In get_lr(), compute cosine factor: (1 + cos(π * epoch / total_epochs)) / 2
3. Interpolate: min_lr + (max_lr - min_lr) * cosine_factor
EXAMPLE:
>>> schedule = CosineSchedule(max_lr=0.1, min_lr=0.01, total_epochs=100)
>>> print(schedule.get_lr(0)) # Start: 0.1
>>> print(schedule.get_lr(50)) # Middle: ~0.055
>>> print(schedule.get_lr(100)) # End: 0.01
HINT: Use np.cos() and np.pi for the cosine calculation
"""
### BEGIN SOLUTION
def __init__(self, max_lr: float = 0.1, min_lr: float = 0.01, total_epochs: int = 100):
self.max_lr = max_lr
self.min_lr = min_lr
self.total_epochs = total_epochs
def get_lr(self, epoch: int) -> float:
"""Get learning rate for current epoch."""
if epoch >= self.total_epochs:
return self.min_lr
# Cosine annealing formula
cosine_factor = (1 + np.cos(np.pi * epoch / self.total_epochs)) / 2
return self.min_lr + (self.max_lr - self.min_lr) * cosine_factor
### END SOLUTION
# %% ../../modules/source/07_training/training_dev.ipynb 14
class Trainer:
"""
Complete training orchestrator for neural networks.
Handles the full training lifecycle: forward pass, loss computation,
backward pass, optimization, scheduling, checkpointing, and evaluation.
This is the central class that brings together all the components
you've built in previous modules.
TODO: Implement complete Trainer class
APPROACH:
1. Store model, optimizer, loss function, and optional scheduler
2. train_epoch(): Loop through data, compute loss, update parameters
3. evaluate(): Similar loop but without gradient updates
4. save/load_checkpoint(): Persist training state for resumption
DESIGN PATTERNS:
- Context managers for train/eval modes
- Gradient accumulation for effective large batch sizes
- Progress tracking for monitoring
- Flexible scheduling integration
"""
### BEGIN SOLUTION
def __init__(self, model, optimizer, loss_fn, scheduler=None, grad_clip_norm=None):
"""
Initialize trainer with model and training components.
Args:
model: Neural network to train
optimizer: Parameter update strategy (SGD, Adam, etc.)
loss_fn: Loss function (CrossEntropy, MSE, etc.)
scheduler: Optional learning rate scheduler
grad_clip_norm: Optional gradient clipping threshold
"""
self.model = model
self.optimizer = optimizer
self.loss_fn = loss_fn
self.scheduler = scheduler
self.grad_clip_norm = grad_clip_norm
# Training state
self.epoch = 0
self.step = 0
self.training_mode = True
# History tracking
self.history = {
'train_loss': [],
'eval_loss': [],
'learning_rates': []
}
def train_epoch(self, dataloader, accumulation_steps=1):
"""
Train for one epoch through the dataset.
Args:
dataloader: Iterable yielding (inputs, targets) batches
accumulation_steps: Number of batches to accumulate before update
Returns:
Average loss for the epoch
"""
self.model.training = True
self.training_mode = True
total_loss = 0.0
num_batches = 0
accumulated_loss = 0.0
for batch_idx, (inputs, targets) in enumerate(dataloader):
# Forward pass
outputs = self.model.forward(inputs)
loss = self.loss_fn.forward(outputs, targets)
# Scale loss for accumulation
scaled_loss = loss.data / accumulation_steps
accumulated_loss += scaled_loss
# Backward pass
if hasattr(loss, 'backward'):
loss.backward()
# Update parameters every accumulation_steps
if (batch_idx + 1) % accumulation_steps == 0:
# Gradient clipping
if self.grad_clip_norm is not None:
params = []
if hasattr(self.model, 'parameters'):
params = self.model.parameters()
clip_grad_norm(params, self.grad_clip_norm)
# Optimizer step
self.optimizer.step()
self.optimizer.zero_grad()
total_loss += accumulated_loss
accumulated_loss = 0.0
num_batches += 1
self.step += 1
# Handle remaining accumulated gradients
if accumulated_loss > 0:
if self.grad_clip_norm is not None:
params = []
if hasattr(self.model, 'parameters'):
params = self.model.parameters()
clip_grad_norm(params, self.grad_clip_norm)
self.optimizer.step()
self.optimizer.zero_grad()
total_loss += accumulated_loss
num_batches += 1
avg_loss = total_loss / max(num_batches, 1)
self.history['train_loss'].append(avg_loss)
# Update scheduler
if self.scheduler is not None:
current_lr = self.scheduler.get_lr(self.epoch)
# Update optimizer learning rate
if hasattr(self.optimizer, 'lr'):
self.optimizer.lr = current_lr
self.history['learning_rates'].append(current_lr)
self.epoch += 1
return avg_loss
def evaluate(self, dataloader):
"""
Evaluate model on dataset without updating parameters.
Args:
dataloader: Iterable yielding (inputs, targets) batches
Returns:
Average loss and accuracy
"""
self.model.training = False
self.training_mode = False
total_loss = 0.0
correct = 0
total = 0
for inputs, targets in dataloader:
# Forward pass only
outputs = self.model.forward(inputs)
loss = self.loss_fn.forward(outputs, targets)
total_loss += loss.data
# Calculate accuracy (for classification)
if hasattr(outputs, 'data') and hasattr(targets, 'data'):
if len(outputs.data.shape) > 1: # Multi-class
predictions = np.argmax(outputs.data, axis=1)
if len(targets.data.shape) == 1: # Integer targets
correct += np.sum(predictions == targets.data)
else: # One-hot targets
correct += np.sum(predictions == np.argmax(targets.data, axis=1))
total += len(predictions)
avg_loss = total_loss / len(dataloader) if len(dataloader) > 0 else 0.0
accuracy = correct / total if total > 0 else 0.0
self.history['eval_loss'].append(avg_loss)
return avg_loss, accuracy
def save_checkpoint(self, path: str):
"""
Save complete training state for resumption.
Args:
path: File path to save checkpoint
"""
checkpoint = {
'epoch': self.epoch,
'step': self.step,
'model_state': self._get_model_state(),
'optimizer_state': self._get_optimizer_state(),
'scheduler_state': self._get_scheduler_state(),
'history': self.history,
'training_mode': self.training_mode
}
Path(path).parent.mkdir(parents=True, exist_ok=True)
with open(path, 'wb') as f:
pickle.dump(checkpoint, f)
def load_checkpoint(self, path: str):
"""
Load training state from checkpoint.
Args:
path: File path to load checkpoint from
"""
with open(path, 'rb') as f:
checkpoint = pickle.load(f)
self.epoch = checkpoint['epoch']
self.step = checkpoint['step']
self.history = checkpoint['history']
self.training_mode = checkpoint['training_mode']
# Restore states (simplified for educational purposes)
if 'model_state' in checkpoint:
self._set_model_state(checkpoint['model_state'])
if 'optimizer_state' in checkpoint:
self._set_optimizer_state(checkpoint['optimizer_state'])
if 'scheduler_state' in checkpoint:
self._set_scheduler_state(checkpoint['scheduler_state'])
def _get_model_state(self):
"""Extract model parameters for checkpointing."""
if hasattr(self.model, 'parameters'):
return {i: param.data.copy() for i, param in enumerate(self.model.parameters())}
return {}
def _set_model_state(self, state):
"""Restore model parameters from checkpoint."""
if hasattr(self.model, 'parameters'):
for i, param in enumerate(self.model.parameters()):
if i in state:
param.data = state[i].copy()
def _get_optimizer_state(self):
"""Extract optimizer state for checkpointing."""
state = {}
if hasattr(self.optimizer, 'lr'):
state['lr'] = self.optimizer.lr
if hasattr(self.optimizer, 'momentum_buffers'):
state['momentum_buffers'] = self.optimizer.momentum_buffers.copy()
return state
def _set_optimizer_state(self, state):
"""Restore optimizer state from checkpoint."""
if 'lr' in state and hasattr(self.optimizer, 'lr'):
self.optimizer.lr = state['lr']
if 'momentum_buffers' in state and hasattr(self.optimizer, 'momentum_buffers'):
self.optimizer.momentum_buffers = state['momentum_buffers']
def _get_scheduler_state(self):
"""Extract scheduler state for checkpointing."""
if self.scheduler is None:
return None
return {
'max_lr': getattr(self.scheduler, 'max_lr', None),
'min_lr': getattr(self.scheduler, 'min_lr', None),
'total_epochs': getattr(self.scheduler, 'total_epochs', None)
}
def _set_scheduler_state(self, state):
"""Restore scheduler state from checkpoint."""
if state is None or self.scheduler is None:
return
for key, value in state.items():
if hasattr(self.scheduler, key):
setattr(self.scheduler, key, value)
### END SOLUTION

View File

@@ -1,57 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/14_kvcaching/kvcaching_dev.ipynb.
# %% auto 0
__all__ = ['Tensor']
# %% ../../modules/source/14_kvcaching/kvcaching_dev.ipynb 1
import numpy as np
import time
from typing import Tuple, Optional, Dict, List
from dataclasses import dataclass
# Import our TinyTorch components (Modules 01-13)
### BEGIN SOLUTION
# Note: In real implementation, these would import from previous modules
# For now, we'll implement minimal versions to focus on caching concepts
class Tensor:
"""Minimal Tensor for KV Caching focus (from Module 01)"""
def __init__(self, data, requires_grad=False):
self.data = np.array(data)
self.shape = self.data.shape
self.requires_grad = requires_grad
self.grad = None
def __getitem__(self, key):
return Tensor(self.data[key])
def __setitem__(self, key, value):
if isinstance(value, Tensor):
self.data[key] = value.data
else:
self.data[key] = value
def size(self, dim=None):
if dim is None:
return self.shape
return self.shape[dim]
def view(self, *shape):
return Tensor(self.data.reshape(shape))
def transpose(self, dim0, dim1):
axes = list(range(len(self.shape)))
axes[dim0], axes[dim1] = axes[dim1], axes[dim0]
return Tensor(np.transpose(self.data, axes))
@staticmethod
def cat(tensors, dim=0):
"""Concatenate tensors along dimension"""
arrays = [t.data for t in tensors]
return Tensor(np.concatenate(arrays, axis=dim))
@staticmethod
def zeros(*shape):
"""Create zero tensor"""
return Tensor(np.zeros(shape))
### END SOLUTION

View File

@@ -1,148 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/13_transformers/transformers_dev.ipynb.
# %% auto 0
__all__ = ['Tensor', 'Linear', 'MultiHeadAttention', 'Embedding', 'gelu']
# %% ../../modules/source/13_transformers/transformers_dev.ipynb 1
import numpy as np
import math
from typing import Optional, List
# Minimal implementations for development - in practice these import from previous modules
class Tensor:
"""Minimal Tensor class for transformer development - imports from Module 01 in practice."""
def __init__(self, data, requires_grad=False):
self.data = np.array(data)
self.shape = self.data.shape
self.size = self.data.size
self.requires_grad = requires_grad
self.grad = None
def __add__(self, other):
if isinstance(other, Tensor):
return Tensor(self.data + other.data)
return Tensor(self.data + other)
def __mul__(self, other):
if isinstance(other, Tensor):
return Tensor(self.data * other.data)
return Tensor(self.data * other)
def matmul(self, other):
return Tensor(np.dot(self.data, other.data))
def sum(self, axis=None, keepdims=False):
return Tensor(self.data.sum(axis=axis, keepdims=keepdims))
def mean(self, axis=None, keepdims=False):
return Tensor(self.data.mean(axis=axis, keepdims=keepdims))
def reshape(self, *shape):
return Tensor(self.data.reshape(shape))
def __repr__(self):
return f"Tensor(data={self.data}, shape={self.shape})"
class Linear:
"""Minimal Linear layer - imports from Module 03 in practice."""
def __init__(self, in_features, out_features, bias=True):
# Xavier/Glorot initialization
std = math.sqrt(2.0 / (in_features + out_features))
self.weight = Tensor(np.random.normal(0, std, (in_features, out_features)))
self.bias = Tensor(np.zeros(out_features)) if bias else None
def forward(self, x):
output = x.matmul(self.weight)
if self.bias is not None:
output = output + self.bias
return output
def parameters(self):
params = [self.weight]
if self.bias is not None:
params.append(self.bias)
return params
class MultiHeadAttention:
"""Minimal MultiHeadAttention - imports from Module 12 in practice."""
def __init__(self, embed_dim, num_heads):
assert embed_dim % num_heads == 0
self.embed_dim = embed_dim
self.num_heads = num_heads
self.head_dim = embed_dim // num_heads
self.q_proj = Linear(embed_dim, embed_dim)
self.k_proj = Linear(embed_dim, embed_dim)
self.v_proj = Linear(embed_dim, embed_dim)
self.out_proj = Linear(embed_dim, embed_dim)
def forward(self, x, mask=None):
batch_size, seq_len, embed_dim = x.shape
# Linear projections
Q = self.q_proj.forward(x)
K = self.k_proj.forward(x)
V = self.v_proj.forward(x)
# Reshape for multi-head attention
Q = Q.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
K = K.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
V = V.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
# Transpose to (batch_size, num_heads, seq_len, head_dim)
Q = Tensor(np.transpose(Q.data, (0, 2, 1, 3)))
K = Tensor(np.transpose(K.data, (0, 2, 1, 3)))
V = Tensor(np.transpose(V.data, (0, 2, 1, 3)))
# Scaled dot-product attention
scores = Tensor(np.matmul(Q.data, np.transpose(K.data, (0, 1, 3, 2))))
scores = scores * (1.0 / math.sqrt(self.head_dim))
# Apply causal mask for autoregressive generation
if mask is not None:
scores = Tensor(scores.data + mask.data)
# Softmax
attention_weights = self._softmax(scores)
# Apply attention to values
out = Tensor(np.matmul(attention_weights.data, V.data))
# Transpose back and reshape
out = Tensor(np.transpose(out.data, (0, 2, 1, 3)))
out = out.reshape(batch_size, seq_len, embed_dim)
# Final linear projection
return self.out_proj.forward(out)
def _softmax(self, x):
"""Numerically stable softmax."""
exp_x = Tensor(np.exp(x.data - np.max(x.data, axis=-1, keepdims=True)))
return Tensor(exp_x.data / np.sum(exp_x.data, axis=-1, keepdims=True))
def parameters(self):
params = []
params.extend(self.q_proj.parameters())
params.extend(self.k_proj.parameters())
params.extend(self.v_proj.parameters())
params.extend(self.out_proj.parameters())
return params
class Embedding:
"""Minimal Embedding layer - imports from Module 11 in practice."""
def __init__(self, vocab_size, embed_dim):
self.vocab_size = vocab_size
self.embed_dim = embed_dim
# Initialize with small random values
self.weight = Tensor(np.random.normal(0, 0.02, (vocab_size, embed_dim)))
def forward(self, indices):
# Simple embedding lookup
return Tensor(self.weight.data[indices.data])
def parameters(self):
return [self.weight]
def gelu(x):
"""GELU activation function."""
return Tensor(0.5 * x.data * (1 + np.tanh(np.sqrt(2 / np.pi) * (x.data + 0.044715 * x.data**3))))

View File

@@ -1,8 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/16_acceleration/acceleration_dev.ipynb.
# %% auto 0
__all__ = []
# %% ../../modules/source/16_acceleration/acceleration_dev.ipynb 0
#| default_exp optimization.acceleration
#| export

View File

@@ -1,85 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/18_compression/compression_dev.ipynb.
# %% auto 0
__all__ = ['Tensor', 'Linear', 'Sequential']
# %% ../../modules/source/18_compression/compression_dev.ipynb 1
import numpy as np
import copy
from typing import List, Dict, Any, Tuple, Optional
import time
# Import from previous modules
# Note: In the full package, these would be imports like:
# from tinytorch.core.tensor import Tensor
# from tinytorch.core.layers import Linear
# For development, we'll create minimal implementations
class Tensor:
"""Minimal Tensor class for compression development - imports from Module 01 in practice."""
def __init__(self, data, requires_grad=False):
self.data = np.array(data)
self.shape = self.data.shape
self.size = self.data.size
self.requires_grad = requires_grad
self.grad = None
def __add__(self, other):
if isinstance(other, Tensor):
return Tensor(self.data + other.data)
return Tensor(self.data + other)
def __mul__(self, other):
if isinstance(other, Tensor):
return Tensor(self.data * other.data)
return Tensor(self.data * other)
def matmul(self, other):
return Tensor(np.dot(self.data, other.data))
def abs(self):
return Tensor(np.abs(self.data))
def sum(self, axis=None):
return Tensor(self.data.sum(axis=axis))
def __repr__(self):
return f"Tensor(shape={self.shape})"
class Linear:
"""Minimal Linear layer for compression development - imports from Module 03 in practice."""
def __init__(self, in_features, out_features, bias=True):
self.in_features = in_features
self.out_features = out_features
# Initialize with He initialization
self.weight = Tensor(np.random.randn(in_features, out_features) * np.sqrt(2.0 / in_features))
self.bias = Tensor(np.zeros(out_features)) if bias else None
def forward(self, x):
output = x.matmul(self.weight)
if self.bias is not None:
output = output + self.bias
return output
def parameters(self):
params = [self.weight]
if self.bias is not None:
params.append(self.bias)
return params
class Sequential:
"""Minimal Sequential container for model compression."""
def __init__(self, *layers):
self.layers = list(layers)
def forward(self, x):
for layer in self.layers:
x = layer.forward(x)
return x
def parameters(self):
params = []
for layer in self.layers:
if hasattr(layer, 'parameters'):
params.extend(layer.parameters())
return params

View File

@@ -1,8 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/17_quantization/quantization_dev.ipynb.
# %% auto 0
__all__ = []
# %% ../../modules/source/17_quantization/quantization_dev.ipynb 0
#| default_exp optimization.quantization
#| export

View File

@@ -1,35 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/15_profiling/profiling_dev.ipynb.
# %% auto 0
__all__ = []
# %% ../../modules/source/15_profiling/profiling_dev.ipynb 1
import time
import numpy as np
import tracemalloc
from typing import Dict, List, Any, Optional, Tuple
from collections import defaultdict
import gc
# Import our TinyTorch components for profiling
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '09_spatial'))
# For testing purposes - in real package these would be proper imports
try:
from tensor_dev import Tensor
from layers_dev import Linear, Sequential
from spatial_dev import Conv2d
except ImportError:
# Fallback - create minimal implementations for testing
class Tensor:
def __init__(self, data):
self.data = np.array(data)
self.shape = self.data.shape
def __mul__(self, other):
return Tensor(self.data * other.data)
def sum(self):
return Tensor(np.sum(self.data))

View File

@@ -1,8 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/10_tokenization/tokenization_dev.ipynb.
# %% auto 0
__all__ = []
# %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 0
#| default_exp text.tokenization
#| export