mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-11 21:14:17 -05:00
Reset package and export modules 01-07 only (skip broken spatial module)
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6ca4b9f5",
|
||||
"id": "22bf7b48",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -51,7 +51,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3dcaaffc",
|
||||
"id": "7d24677b",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -69,7 +69,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e70ae12a",
|
||||
"id": "447a0b7a",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -116,7 +116,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7a1e48b5",
|
||||
"id": "c2b4bc17",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -175,7 +175,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "42f2279e",
|
||||
"id": "1dc8a950",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -214,7 +214,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cb1e99f0",
|
||||
"id": "334562a5",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -252,7 +252,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4a090be0",
|
||||
"id": "27b3b08d",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -724,7 +724,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a49cddfd",
|
||||
"id": "345f0782",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -742,7 +742,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "79195fe8",
|
||||
"id": "503244d4",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -791,7 +791,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7cbed527",
|
||||
"id": "5176cde0",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -839,7 +839,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "30f53e64",
|
||||
"id": "45461424",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 2
|
||||
@@ -882,7 +882,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e13b5c91",
|
||||
"id": "7ba6f505",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -900,7 +900,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "26ab9e58",
|
||||
"id": "9471ca95",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -957,7 +957,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8ab4eb75",
|
||||
"id": "453ed0e5",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 2
|
||||
@@ -1057,7 +1057,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "75e72654",
|
||||
"id": "ae6dca6f",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -1075,7 +1075,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "434f6550",
|
||||
"id": "d1bf193f",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -1132,7 +1132,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "de04fa2e",
|
||||
"id": "23a70fb2",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 2
|
||||
@@ -1235,7 +1235,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2f4cd90a",
|
||||
"id": "a320a34f",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -1253,7 +1253,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e497f3d1",
|
||||
"id": "04a65af9",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -1323,7 +1323,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c944cd8b",
|
||||
"id": "509140c2",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 2
|
||||
@@ -1417,7 +1417,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e8312574",
|
||||
"id": "21664f47",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -1435,7 +1435,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "66d6beb6",
|
||||
"id": "5f9edd66",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -1508,7 +1508,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "71042cd1",
|
||||
"id": "8b900870",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 2
|
||||
@@ -1583,7 +1583,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9d5518b2",
|
||||
"id": "a98400bf",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 2
|
||||
@@ -1644,7 +1644,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23b79c43",
|
||||
"id": "c74f78e6",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1
|
||||
},
|
||||
@@ -1666,7 +1666,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c8fdde9c",
|
||||
"id": "6766dc8a",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 2,
|
||||
"nbgrader": {
|
||||
@@ -1794,7 +1794,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f0f02362",
|
||||
"id": "602da67a",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f7f092b",
|
||||
"id": "41637b5b",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -34,7 +34,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ba7543b3",
|
||||
"id": "eb80f71c",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -59,7 +59,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5f04cb4a",
|
||||
"id": "ad445b19",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -78,7 +78,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3042497e",
|
||||
"id": "7fc4b3ae",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -102,7 +102,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "609861d1",
|
||||
"id": "6c49b0a7",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -144,7 +144,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9b7b4834",
|
||||
"id": "a82d5ffc",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -166,7 +166,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "29845a4a",
|
||||
"id": "d954190f",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -190,7 +190,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d5fc598c",
|
||||
"id": "1d26aa84",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -228,7 +228,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b6cb596e",
|
||||
"id": "cd112f28",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -287,7 +287,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "84674501",
|
||||
"id": "87407a56",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -303,7 +303,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3fbc497a",
|
||||
"id": "8599e53a",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -344,7 +344,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a076a2f1",
|
||||
"id": "96438263",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -386,7 +386,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a079c21f",
|
||||
"id": "6bdad44d",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -442,7 +442,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8cf41efa",
|
||||
"id": "853265df",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -458,7 +458,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9bf84e40",
|
||||
"id": "e3f2e5fd",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -505,7 +505,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "26a36cf2",
|
||||
"id": "d137e456",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -544,7 +544,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a93086db",
|
||||
"id": "3a3ec4c5",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -600,7 +600,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4488836b",
|
||||
"id": "b2ad2baa",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -616,7 +616,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "46388fa5",
|
||||
"id": "b92572ae",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -664,7 +664,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f341ff48",
|
||||
"id": "d1cdd503",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -707,7 +707,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a9684ba3",
|
||||
"id": "90f15779",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -768,7 +768,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7c8ef48f",
|
||||
"id": "eb655b3b",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -784,7 +784,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c42d7ec8",
|
||||
"id": "838060ac",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -832,7 +832,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ba4edcdd",
|
||||
"id": "a8047ea8",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -870,7 +870,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2821ef9e",
|
||||
"id": "aa266bb7",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -942,7 +942,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "70c31533",
|
||||
"id": "80e6ad27",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -958,7 +958,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "852f5832",
|
||||
"id": "f3db3810",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -1016,7 +1016,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b0107716",
|
||||
"id": "2db83cef",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 2
|
||||
@@ -1029,7 +1029,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "87c16b51",
|
||||
"id": "428eaa1b",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -1049,7 +1049,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0a812659",
|
||||
"id": "fe7666b9",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -1063,7 +1063,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0015101e",
|
||||
"id": "fac9ee55",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 2,
|
||||
"nbgrader": {
|
||||
@@ -1162,7 +1162,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d0575067",
|
||||
"id": "6a9cc930",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cfa2aae",
|
||||
"id": "46b4a258",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -53,7 +53,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c7a84c05",
|
||||
"id": "bc3a80e9",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -77,7 +77,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e52c72c2",
|
||||
"id": "76d31667",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -101,7 +101,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e05eee85",
|
||||
"id": "e0421bae",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -139,7 +139,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f489f983",
|
||||
"id": "6670b0b1",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -160,7 +160,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fff4865c",
|
||||
"id": "2dc8d8c8",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -211,7 +211,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "da931144",
|
||||
"id": "a973eb44",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -335,7 +335,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77988775",
|
||||
"id": "d4cbdf9d",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -351,7 +351,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4c2e0b2e",
|
||||
"id": "174fe10a",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -411,7 +411,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d2fa31b7",
|
||||
"id": "e961f791",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -473,7 +473,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "88715659",
|
||||
"id": "b924d865",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -556,6 +556,10 @@
|
||||
" return Tensor(output_data)\n",
|
||||
" ### END SOLUTION\n",
|
||||
"\n",
|
||||
" def __call__(self, x, training=True):\n",
|
||||
" \"\"\"Allows the layer to be called like a function.\"\"\"\n",
|
||||
" return self.forward(x, training)\n",
|
||||
"\n",
|
||||
" def parameters(self):\n",
|
||||
" \"\"\"Dropout has no parameters.\"\"\"\n",
|
||||
" return []\n",
|
||||
@@ -566,7 +570,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0d33ff6f",
|
||||
"id": "ee0bc9a1",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -582,7 +586,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cb0b7ae8",
|
||||
"id": "c76974a1",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -658,7 +662,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4b4aac2f",
|
||||
"id": "231dae31",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 2
|
||||
@@ -718,7 +722,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f4f3b7d",
|
||||
"id": "bbc4aad9",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -777,7 +781,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "99df0451",
|
||||
"id": "0ca58dc7",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -832,7 +836,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5e82e08c",
|
||||
"id": "a9b7ae8a",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -873,7 +877,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "85ea5db5",
|
||||
"id": "5570a366",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1
|
||||
},
|
||||
@@ -895,7 +899,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ebddc165",
|
||||
"id": "b2e11bf8",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 2,
|
||||
"nbgrader": {
|
||||
@@ -986,7 +990,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "31c54ee3",
|
||||
"id": "4c9212f9",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7a46b4ac",
|
||||
"id": "9d798b1c",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -35,7 +35,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "95565c7e",
|
||||
"id": "91804987",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -59,7 +59,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1c9b95bd",
|
||||
"id": "c09dc686",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -80,7 +80,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "054331fd",
|
||||
"id": "51189bc1",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -113,7 +113,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e60f5944",
|
||||
"id": "cc227c2d",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -189,7 +189,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2a8ac601",
|
||||
"id": "49e5039b",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -235,7 +235,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3e628237",
|
||||
"id": "b1e1cbd0",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -247,7 +247,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "84a9e420",
|
||||
"id": "820e9937",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -297,7 +297,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c570112b",
|
||||
"id": "854758b3",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -348,7 +348,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7e637f17",
|
||||
"id": "6b57e650",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -389,7 +389,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "54c1f877",
|
||||
"id": "b8be9f2c",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -459,7 +459,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "24575e1c",
|
||||
"id": "aca5154a",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -531,7 +531,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f7e6aa15",
|
||||
"id": "7391538b",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -577,7 +577,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cde7a8f4",
|
||||
"id": "0b9b254c",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -670,7 +670,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0e3a2600",
|
||||
"id": "eb59fb50",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -746,7 +746,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "45e2f990",
|
||||
"id": "c59fbbfd",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -797,7 +797,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "30fd6c76",
|
||||
"id": "599727d1",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -906,7 +906,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3bd407b8",
|
||||
"id": "54a20f3f",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -982,7 +982,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "985dd530",
|
||||
"id": "1bab9d23",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -1033,7 +1033,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d7c0a96b",
|
||||
"id": "ca40b581",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -1090,7 +1090,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c5baee20",
|
||||
"id": "76b4eb81",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -1146,7 +1146,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e2216e97",
|
||||
"id": "b90c91f0",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -1211,7 +1211,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "435562a9",
|
||||
"id": "e2fc1aa7",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -1286,7 +1286,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0e2b66af",
|
||||
"id": "573fa75d",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -1336,7 +1336,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "518ead17",
|
||||
"id": "b7f12c78",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -1393,7 +1393,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7d0c2aa4",
|
||||
"id": "4c6ebac9",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -1457,7 +1457,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7345a14f",
|
||||
"id": "d0b635c1",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -1513,7 +1513,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "73d7096f",
|
||||
"id": "d770e887",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -1526,7 +1526,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5a5f3a29",
|
||||
"id": "55fd411d",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -1606,7 +1606,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e91917e",
|
||||
"id": "b66f2370",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -1619,7 +1619,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "16461a93",
|
||||
"id": "ce0d9c33",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3d8f1c62",
|
||||
"id": "518b6ae0",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -51,7 +51,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a2b41da9",
|
||||
"id": "30bbc6f8",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -68,15 +68,12 @@
|
||||
"from typing import List, Union, Optional, Dict, Any\n",
|
||||
"\n",
|
||||
"# Import Tensor from Module 01 (now with gradient support from Module 05)\n",
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))\n",
|
||||
"from tensor_dev import Tensor"
|
||||
"from tinytorch.core.tensor import Tensor"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3169e215",
|
||||
"id": "9057f3bf",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -133,7 +130,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "baec0321",
|
||||
"id": "3b2f074e",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -219,7 +216,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "49716b34",
|
||||
"id": "3000c581",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -247,7 +244,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "06d956dd",
|
||||
"id": "d9343aa4",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -258,6 +255,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#| export\n",
|
||||
"class Optimizer:\n",
|
||||
" \"\"\"\n",
|
||||
" Base class for all optimizers.\n",
|
||||
@@ -332,7 +330,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "82015c9d",
|
||||
"id": "0ded4383",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -348,7 +346,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c0f57a08",
|
||||
"id": "25d61648",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -401,7 +399,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7d9b8ceb",
|
||||
"id": "bf5adabc",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -473,7 +471,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ae4679bb",
|
||||
"id": "12f0f4b6",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -484,6 +482,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#| export\n",
|
||||
"class SGD(Optimizer):\n",
|
||||
" \"\"\"\n",
|
||||
" Stochastic Gradient Descent with momentum.\n",
|
||||
@@ -576,7 +575,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ced264d8",
|
||||
"id": "815d0bab",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -592,7 +591,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "68ae4ccf",
|
||||
"id": "c01ebc69",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -659,7 +658,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "480929e4",
|
||||
"id": "c656b1b4",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -733,7 +732,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2d7e339f",
|
||||
"id": "b545ed16",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -744,6 +743,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#| export\n",
|
||||
"class Adam(Optimizer):\n",
|
||||
" \"\"\"\n",
|
||||
" Adam optimizer with adaptive learning rates.\n",
|
||||
@@ -853,7 +853,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6f114c5b",
|
||||
"id": "b688bced",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -869,7 +869,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7f64abcc",
|
||||
"id": "61fa7116",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -945,7 +945,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "16ccfeaa",
|
||||
"id": "7cb028b2",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -1019,7 +1019,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "23c16f99",
|
||||
"id": "277056cc",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -1030,6 +1030,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#| export\n",
|
||||
"class AdamW(Optimizer):\n",
|
||||
" \"\"\"\n",
|
||||
" AdamW optimizer with decoupled weight decay.\n",
|
||||
@@ -1133,7 +1134,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0269f86a",
|
||||
"id": "d59b1b2b",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -1149,7 +1150,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "016d7b36",
|
||||
"id": "619464ee",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -1224,7 +1225,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "295d5ee6",
|
||||
"id": "29f5ad7b",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 2
|
||||
@@ -1251,7 +1252,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "47d676c3",
|
||||
"id": "9dd160f5",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -1297,7 +1298,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "67290db6",
|
||||
"id": "ab882d12",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -1355,7 +1356,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "21136a44",
|
||||
"id": "abac74aa",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -1434,7 +1435,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b171c224",
|
||||
"id": "146f209d",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1
|
||||
},
|
||||
@@ -1456,7 +1457,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "46ae99ae",
|
||||
"id": "1726f746",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -1607,7 +1608,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "896f4c69",
|
||||
"id": "7328ac69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1618,7 +1619,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35b39338",
|
||||
"id": "c662a5f7",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "765eea82",
|
||||
"id": "78521710",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -52,7 +52,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "38b1402a",
|
||||
"id": "d912bff5",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -75,22 +75,15 @@
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Import dependencies from other modules\n",
|
||||
"sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))\n",
|
||||
"from tensor_dev import Tensor\n",
|
||||
"\n",
|
||||
"sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))\n",
|
||||
"from layers_dev import Linear\n",
|
||||
"\n",
|
||||
"sys.path.append(os.path.join(os.path.dirname(__file__), '..', '04_losses'))\n",
|
||||
"from losses_dev import MSELoss, CrossEntropyLoss\n",
|
||||
"\n",
|
||||
"sys.path.append(os.path.join(os.path.dirname(__file__), '..', '06_optimizers'))\n",
|
||||
"from optimizers_dev import SGD, AdamW"
|
||||
"from tinytorch.core.tensor import Tensor\n",
|
||||
"from tinytorch.core.layers import Linear\n",
|
||||
"from tinytorch.core.losses import MSELoss, CrossEntropyLoss\n",
|
||||
"from tinytorch.core.optimizers import SGD, AdamW"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "89550fb8",
|
||||
"id": "2f4fc27e",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -119,7 +112,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d0b48f7a",
|
||||
"id": "4fa19758",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -166,7 +159,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4ed8a995",
|
||||
"id": "8599a0f1",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
@@ -180,7 +173,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ebfa93fc",
|
||||
"id": "ed5a85db",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -221,7 +214,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "347b09da",
|
||||
"id": "9dac2b34",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -233,6 +226,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#| export\n",
|
||||
"class CosineSchedule:\n",
|
||||
" \"\"\"\n",
|
||||
" Cosine annealing learning rate schedule.\n",
|
||||
@@ -274,7 +268,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c1db4e03",
|
||||
"id": "c146074f",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -290,7 +284,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c27f6878",
|
||||
"id": "ee33397e",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -334,7 +328,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "81fc482c",
|
||||
"id": "da8efa9f",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -380,7 +374,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "db99efd3",
|
||||
"id": "29a5573c",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -457,7 +451,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3b0b188d",
|
||||
"id": "7c1510f3",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -473,7 +467,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "91bf937f",
|
||||
"id": "754c9cd5",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -540,7 +534,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dde7833e",
|
||||
"id": "a827fb93",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -597,7 +591,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fa8339e1",
|
||||
"id": "63354dd4",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -609,6 +603,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#| export\n",
|
||||
"class Trainer:\n",
|
||||
" \"\"\"\n",
|
||||
" Complete training orchestrator for neural networks.\n",
|
||||
@@ -875,7 +870,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "529dfcf5",
|
||||
"id": "9266bc60",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -891,7 +886,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "03510440",
|
||||
"id": "8ce52aba",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": true,
|
||||
@@ -972,7 +967,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "905180bd",
|
||||
"id": "7ad86345",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 2
|
||||
@@ -985,7 +980,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3c631938",
|
||||
"id": "9953bcd4",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1
|
||||
},
|
||||
@@ -1009,7 +1004,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8b65c5ab",
|
||||
"id": "2eab95b6",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\"",
|
||||
"lines_to_next_cell": 1
|
||||
@@ -1023,7 +1018,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "29eea538",
|
||||
"id": "0580d838",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 1,
|
||||
"nbgrader": {
|
||||
@@ -1151,7 +1146,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ae1bc4b9",
|
||||
"id": "62eadf89",
|
||||
"metadata": {
|
||||
"nbgrader": {
|
||||
"grade": false,
|
||||
@@ -1169,7 +1164,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ad8ae396",
|
||||
"id": "ebe885e5",
|
||||
"metadata": {
|
||||
"cell_marker": "\"\"\""
|
||||
},
|
||||
|
||||
204
tinytorch/_modidx.py
generated
Normal file
204
tinytorch/_modidx.py
generated
Normal file
@@ -0,0 +1,204 @@
|
||||
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
||||
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
||||
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
||||
# ║ ║
|
||||
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
||||
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
||||
# ║ ║
|
||||
# ║ ✅ TO EDIT: modules/source/[unknown]/[unknown]_dev.py ║
|
||||
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
||||
# ║ ║
|
||||
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
||||
# ║ Editing it directly may break module functionality and training. ║
|
||||
# ║ ║
|
||||
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
|
||||
# ║ happens! The tinytorch/ directory is just the compiled output. ║
|
||||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||||
# Autogenerated by nbdev
|
||||
|
||||
d = { 'settings': { 'branch': 'main',
|
||||
'doc_baseurl': '/TinyTorch/',
|
||||
'doc_host': 'https://tinytorch.github.io',
|
||||
'git_url': 'https://github.com/tinytorch/TinyTorch/',
|
||||
'lib_path': 'tinytorch'},
|
||||
'syms': { 'tinytorch.core.activations': { 'tinytorch.core.activations.GELU': ( '02_activations/activations_dev.html#gelu',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.GELU.__call__': ( '02_activations/activations_dev.html#gelu.__call__',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.GELU.backward': ( '02_activations/activations_dev.html#gelu.backward',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.GELU.forward': ( '02_activations/activations_dev.html#gelu.forward',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.ReLU': ( '02_activations/activations_dev.html#relu',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.ReLU.__call__': ( '02_activations/activations_dev.html#relu.__call__',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.ReLU.backward': ( '02_activations/activations_dev.html#relu.backward',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.ReLU.forward': ( '02_activations/activations_dev.html#relu.forward',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Sigmoid': ( '02_activations/activations_dev.html#sigmoid',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Sigmoid.__call__': ( '02_activations/activations_dev.html#sigmoid.__call__',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Sigmoid.backward': ( '02_activations/activations_dev.html#sigmoid.backward',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Sigmoid.forward': ( '02_activations/activations_dev.html#sigmoid.forward',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Softmax': ( '02_activations/activations_dev.html#softmax',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Softmax.__call__': ( '02_activations/activations_dev.html#softmax.__call__',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Softmax.backward': ( '02_activations/activations_dev.html#softmax.backward',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Softmax.forward': ( '02_activations/activations_dev.html#softmax.forward',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Tanh': ( '02_activations/activations_dev.html#tanh',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Tanh.__call__': ( '02_activations/activations_dev.html#tanh.__call__',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Tanh.backward': ( '02_activations/activations_dev.html#tanh.backward',
|
||||
'tinytorch/core/activations.py'),
|
||||
'tinytorch.core.activations.Tanh.forward': ( '02_activations/activations_dev.html#tanh.forward',
|
||||
'tinytorch/core/activations.py')},
|
||||
'tinytorch.core.autograd': {},
|
||||
'tinytorch.core.layers': { 'tinytorch.core.layers.Dropout': ('03_layers/layers_dev.html#dropout', 'tinytorch/core/layers.py'),
|
||||
'tinytorch.core.layers.Dropout.__call__': ( '03_layers/layers_dev.html#dropout.__call__',
|
||||
'tinytorch/core/layers.py'),
|
||||
'tinytorch.core.layers.Dropout.__init__': ( '03_layers/layers_dev.html#dropout.__init__',
|
||||
'tinytorch/core/layers.py'),
|
||||
'tinytorch.core.layers.Dropout.__repr__': ( '03_layers/layers_dev.html#dropout.__repr__',
|
||||
'tinytorch/core/layers.py'),
|
||||
'tinytorch.core.layers.Dropout.forward': ( '03_layers/layers_dev.html#dropout.forward',
|
||||
'tinytorch/core/layers.py'),
|
||||
'tinytorch.core.layers.Dropout.parameters': ( '03_layers/layers_dev.html#dropout.parameters',
|
||||
'tinytorch/core/layers.py'),
|
||||
'tinytorch.core.layers.Linear': ('03_layers/layers_dev.html#linear', 'tinytorch/core/layers.py'),
|
||||
'tinytorch.core.layers.Linear.__call__': ( '03_layers/layers_dev.html#linear.__call__',
|
||||
'tinytorch/core/layers.py'),
|
||||
'tinytorch.core.layers.Linear.__init__': ( '03_layers/layers_dev.html#linear.__init__',
|
||||
'tinytorch/core/layers.py'),
|
||||
'tinytorch.core.layers.Linear.__repr__': ( '03_layers/layers_dev.html#linear.__repr__',
|
||||
'tinytorch/core/layers.py'),
|
||||
'tinytorch.core.layers.Linear.forward': ( '03_layers/layers_dev.html#linear.forward',
|
||||
'tinytorch/core/layers.py'),
|
||||
'tinytorch.core.layers.Linear.parameters': ( '03_layers/layers_dev.html#linear.parameters',
|
||||
'tinytorch/core/layers.py')},
|
||||
'tinytorch.core.losses': { 'tinytorch.core.losses.BinaryCrossEntropyLoss': ( '04_losses/losses_dev.html#binarycrossentropyloss',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.BinaryCrossEntropyLoss.__call__': ( '04_losses/losses_dev.html#binarycrossentropyloss.__call__',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.BinaryCrossEntropyLoss.__init__': ( '04_losses/losses_dev.html#binarycrossentropyloss.__init__',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.BinaryCrossEntropyLoss.backward': ( '04_losses/losses_dev.html#binarycrossentropyloss.backward',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.BinaryCrossEntropyLoss.forward': ( '04_losses/losses_dev.html#binarycrossentropyloss.forward',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.CrossEntropyLoss': ( '04_losses/losses_dev.html#crossentropyloss',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.CrossEntropyLoss.__call__': ( '04_losses/losses_dev.html#crossentropyloss.__call__',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.CrossEntropyLoss.__init__': ( '04_losses/losses_dev.html#crossentropyloss.__init__',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.CrossEntropyLoss.backward': ( '04_losses/losses_dev.html#crossentropyloss.backward',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.CrossEntropyLoss.forward': ( '04_losses/losses_dev.html#crossentropyloss.forward',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.MSELoss': ('04_losses/losses_dev.html#mseloss', 'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.MSELoss.__call__': ( '04_losses/losses_dev.html#mseloss.__call__',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.MSELoss.__init__': ( '04_losses/losses_dev.html#mseloss.__init__',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.MSELoss.backward': ( '04_losses/losses_dev.html#mseloss.backward',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.MSELoss.forward': ( '04_losses/losses_dev.html#mseloss.forward',
|
||||
'tinytorch/core/losses.py'),
|
||||
'tinytorch.core.losses.import_previous_module': ( '04_losses/losses_dev.html#import_previous_module',
|
||||
'tinytorch/core/losses.py')},
|
||||
'tinytorch.core.optimizers': { 'tinytorch.core.optimizers.Adam': ( '06_optimizers/optimizers_dev.html#adam',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.Adam.__init__': ( '06_optimizers/optimizers_dev.html#adam.__init__',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.Adam.step': ( '06_optimizers/optimizers_dev.html#adam.step',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.AdamW': ( '06_optimizers/optimizers_dev.html#adamw',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.AdamW.__init__': ( '06_optimizers/optimizers_dev.html#adamw.__init__',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.AdamW.step': ( '06_optimizers/optimizers_dev.html#adamw.step',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.Optimizer': ( '06_optimizers/optimizers_dev.html#optimizer',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.Optimizer.__init__': ( '06_optimizers/optimizers_dev.html#optimizer.__init__',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.Optimizer.step': ( '06_optimizers/optimizers_dev.html#optimizer.step',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.Optimizer.zero_grad': ( '06_optimizers/optimizers_dev.html#optimizer.zero_grad',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.SGD': ( '06_optimizers/optimizers_dev.html#sgd',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.SGD.__init__': ( '06_optimizers/optimizers_dev.html#sgd.__init__',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.SGD.step': ( '06_optimizers/optimizers_dev.html#sgd.step',
|
||||
'tinytorch/core/optimizers.py')},
|
||||
'tinytorch.core.tensor': { 'tinytorch.core.tensor.Tensor': ('01_tensor/tensor_dev.html#tensor', 'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.__add__': ( '01_tensor/tensor_dev.html#tensor.__add__',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.__init__': ( '01_tensor/tensor_dev.html#tensor.__init__',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.__mul__': ( '01_tensor/tensor_dev.html#tensor.__mul__',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.__repr__': ( '01_tensor/tensor_dev.html#tensor.__repr__',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.__str__': ( '01_tensor/tensor_dev.html#tensor.__str__',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.__sub__': ( '01_tensor/tensor_dev.html#tensor.__sub__',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.__truediv__': ( '01_tensor/tensor_dev.html#tensor.__truediv__',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.backward': ( '01_tensor/tensor_dev.html#tensor.backward',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.matmul': ( '01_tensor/tensor_dev.html#tensor.matmul',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.max': ( '01_tensor/tensor_dev.html#tensor.max',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.mean': ( '01_tensor/tensor_dev.html#tensor.mean',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.numpy': ( '01_tensor/tensor_dev.html#tensor.numpy',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.reshape': ( '01_tensor/tensor_dev.html#tensor.reshape',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.sum': ( '01_tensor/tensor_dev.html#tensor.sum',
|
||||
'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.transpose': ( '01_tensor/tensor_dev.html#tensor.transpose',
|
||||
'tinytorch/core/tensor.py')},
|
||||
'tinytorch.core.training': { 'tinytorch.core.training.CosineSchedule': ( '07_training/training_dev.html#cosineschedule',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.CosineSchedule.__init__': ( '07_training/training_dev.html#cosineschedule.__init__',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.CosineSchedule.get_lr': ( '07_training/training_dev.html#cosineschedule.get_lr',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer': ( '07_training/training_dev.html#trainer',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer.__init__': ( '07_training/training_dev.html#trainer.__init__',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer._get_model_state': ( '07_training/training_dev.html#trainer._get_model_state',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer._get_optimizer_state': ( '07_training/training_dev.html#trainer._get_optimizer_state',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer._get_scheduler_state': ( '07_training/training_dev.html#trainer._get_scheduler_state',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer._set_model_state': ( '07_training/training_dev.html#trainer._set_model_state',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer._set_optimizer_state': ( '07_training/training_dev.html#trainer._set_optimizer_state',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer._set_scheduler_state': ( '07_training/training_dev.html#trainer._set_scheduler_state',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer.evaluate': ( '07_training/training_dev.html#trainer.evaluate',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer.load_checkpoint': ( '07_training/training_dev.html#trainer.load_checkpoint',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer.save_checkpoint': ( '07_training/training_dev.html#trainer.save_checkpoint',
|
||||
'tinytorch/core/training.py'),
|
||||
'tinytorch.core.training.Trainer.train_epoch': ( '07_training/training_dev.html#trainer.train_epoch',
|
||||
'tinytorch/core/training.py')}}}
|
||||
8
tinytorch/applications/tinygpt.py
generated
8
tinytorch/applications/tinygpt.py
generated
@@ -1,8 +0,0 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/20_capstone/capstone_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = []
|
||||
|
||||
# %% ../../modules/source/20_capstone/capstone_dev.ipynb 2
|
||||
#| default_exp applications.tinygpt
|
||||
#| export
|
||||
8
tinytorch/benchmarking/benchmark.py
generated
8
tinytorch/benchmarking/benchmark.py
generated
@@ -1,8 +0,0 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/19_benchmarking/benchmarking_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = []
|
||||
|
||||
# %% ../../modules/source/19_benchmarking/benchmarking_dev.ipynb 0
|
||||
#| default_exp benchmarking.benchmark
|
||||
#| export
|
||||
18
tinytorch/core/activations.py
generated
18
tinytorch/core/activations.py
generated
@@ -1,5 +1,19 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/02_activations/activations_dev.ipynb.
|
||||
|
||||
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
||||
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
||||
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
||||
# ║ ║
|
||||
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
||||
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
||||
# ║ ║
|
||||
# ║ ✅ TO EDIT: modules/source/03_activations/activations_dev.py ║
|
||||
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
||||
# ║ ║
|
||||
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
||||
# ║ Editing it directly may break module functionality and training. ║
|
||||
# ║ ║
|
||||
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
|
||||
# ║ happens! The tinytorch/ directory is just the compiled output. ║
|
||||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||||
# %% auto 0
|
||||
__all__ = ['Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax']
|
||||
|
||||
|
||||
8
tinytorch/core/attention.py
generated
8
tinytorch/core/attention.py
generated
@@ -1,8 +0,0 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/12_attention/attention_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = []
|
||||
|
||||
# %% ../../modules/source/12_attention/attention_dev.ipynb 0
|
||||
#| default_exp core.attention
|
||||
#| export
|
||||
22
tinytorch/core/layers.py
generated
22
tinytorch/core/layers.py
generated
@@ -1,5 +1,19 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/03_layers/layers_dev.ipynb.
|
||||
|
||||
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
||||
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
||||
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
||||
# ║ ║
|
||||
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
||||
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
||||
# ║ ║
|
||||
# ║ ✅ TO EDIT: modules/source/04_layers/layers_dev.py ║
|
||||
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
||||
# ║ ║
|
||||
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
||||
# ║ Editing it directly may break module functionality and training. ║
|
||||
# ║ ║
|
||||
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
|
||||
# ║ happens! The tinytorch/ directory is just the compiled output. ║
|
||||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||||
# %% auto 0
|
||||
__all__ = ['Linear', 'Dropout']
|
||||
|
||||
@@ -194,6 +208,10 @@ class Dropout:
|
||||
return Tensor(output_data)
|
||||
### END SOLUTION
|
||||
|
||||
def __call__(self, x, training=True):
|
||||
"""Allows the layer to be called like a function."""
|
||||
return self.forward(x, training)
|
||||
|
||||
def parameters(self):
|
||||
"""Dropout has no parameters."""
|
||||
return []
|
||||
|
||||
18
tinytorch/core/losses.py
generated
18
tinytorch/core/losses.py
generated
@@ -1,5 +1,19 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_losses/losses_dev.ipynb.
|
||||
|
||||
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
||||
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
||||
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
||||
# ║ ║
|
||||
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
||||
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
||||
# ║ ║
|
||||
# ║ ✅ TO EDIT: modules/source/XX_losses/losses_dev.py ║
|
||||
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
||||
# ║ ║
|
||||
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
||||
# ║ Editing it directly may break module functionality and training. ║
|
||||
# ║ ║
|
||||
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
|
||||
# ║ happens! The tinytorch/ directory is just the compiled output. ║
|
||||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||||
# %% auto 0
|
||||
__all__ = ['import_previous_module', 'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss']
|
||||
|
||||
|
||||
53
tinytorch/core/optimizers.py
generated
53
tinytorch/core/optimizers.py
generated
@@ -1,5 +1,19 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/06_optimizers/optimizers_dev.ipynb.
|
||||
|
||||
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
||||
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
||||
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
||||
# ║ ║
|
||||
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
||||
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
||||
# ║ ║
|
||||
# ║ ✅ TO EDIT: modules/source/10_optimizers/optimizers_dev.py ║
|
||||
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
||||
# ║ ║
|
||||
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
||||
# ║ Editing it directly may break module functionality and training. ║
|
||||
# ║ ║
|
||||
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
|
||||
# ║ happens! The tinytorch/ directory is just the compiled output. ║
|
||||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||||
# %% auto 0
|
||||
__all__ = ['Optimizer', 'SGD', 'Adam', 'AdamW']
|
||||
|
||||
@@ -7,10 +21,10 @@ __all__ = ['Optimizer', 'SGD', 'Adam', 'AdamW']
|
||||
import numpy as np
|
||||
from typing import List, Union, Optional, Dict, Any
|
||||
|
||||
# Import Tensor from Module 01
|
||||
from tinytorch.core.tensor import Tensor
|
||||
# Import Tensor from Module 01 (now with gradient support from Module 05)
|
||||
from .tensor import Tensor
|
||||
|
||||
# %% Base Optimizer class
|
||||
# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 5
|
||||
class Optimizer:
|
||||
"""
|
||||
Base class for all optimizers.
|
||||
@@ -37,6 +51,7 @@ class Optimizer:
|
||||
|
||||
HINT: Check that each parameter has requires_grad=True
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# Validate and store parameters
|
||||
if not isinstance(params, list):
|
||||
params = list(params)
|
||||
@@ -50,6 +65,7 @@ class Optimizer:
|
||||
|
||||
self.params = params
|
||||
self.step_count = 0 # For algorithms that need step counting
|
||||
### END SOLUTION
|
||||
|
||||
def zero_grad(self):
|
||||
"""
|
||||
@@ -67,8 +83,10 @@ class Optimizer:
|
||||
|
||||
WHY: Gradients accumulate by default, so we need to clear them between batches
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
for param in self.params:
|
||||
param.grad = None
|
||||
### END SOLUTION
|
||||
|
||||
def step(self):
|
||||
"""
|
||||
@@ -78,9 +96,7 @@ class Optimizer:
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement step()")
|
||||
|
||||
|
||||
|
||||
# %% SGD Optimizer
|
||||
# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 9
|
||||
class SGD(Optimizer):
|
||||
"""
|
||||
Stochastic Gradient Descent with momentum.
|
||||
@@ -108,6 +124,7 @@ class SGD(Optimizer):
|
||||
- Momentum buffers should be initialized as None
|
||||
- They'll be created lazily on first step
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
super().__init__(params)
|
||||
|
||||
self.lr = lr
|
||||
@@ -116,6 +133,7 @@ class SGD(Optimizer):
|
||||
|
||||
# Initialize momentum buffers (created lazily)
|
||||
self.momentum_buffers = [None for _ in self.params]
|
||||
### END SOLUTION
|
||||
|
||||
def step(self):
|
||||
"""
|
||||
@@ -139,6 +157,7 @@ class SGD(Optimizer):
|
||||
- Initialize momentum buffers on first use
|
||||
- Use in-place operations to save memory
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
for i, param in enumerate(self.params):
|
||||
if param.grad is None:
|
||||
continue
|
||||
@@ -165,10 +184,9 @@ class SGD(Optimizer):
|
||||
|
||||
# Increment step counter
|
||||
self.step_count += 1
|
||||
### END SOLUTION
|
||||
|
||||
|
||||
|
||||
# %% Adam Optimizer
|
||||
# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 13
|
||||
class Adam(Optimizer):
|
||||
"""
|
||||
Adam optimizer with adaptive learning rates.
|
||||
@@ -198,6 +216,7 @@ class Adam(Optimizer):
|
||||
EXAMPLE:
|
||||
>>> optimizer = Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
super().__init__(params)
|
||||
|
||||
self.lr = lr
|
||||
@@ -208,6 +227,7 @@ class Adam(Optimizer):
|
||||
# Initialize moment buffers (created lazily)
|
||||
self.m_buffers = [None for _ in self.params] # First moment (mean)
|
||||
self.v_buffers = [None for _ in self.params] # Second moment (variance)
|
||||
### END SOLUTION
|
||||
|
||||
def step(self):
|
||||
"""
|
||||
@@ -235,6 +255,7 @@ class Adam(Optimizer):
|
||||
- Use step_count for bias correction
|
||||
- Square gradients element-wise for second moment
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# Increment step counter first (needed for bias correction)
|
||||
self.step_count += 1
|
||||
|
||||
@@ -270,10 +291,9 @@ class Adam(Optimizer):
|
||||
|
||||
# Update parameter
|
||||
param.data = param.data - self.lr * m_hat / (np.sqrt(v_hat) + self.eps)
|
||||
### END SOLUTION
|
||||
|
||||
|
||||
|
||||
# %% AdamW Optimizer
|
||||
# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 17
|
||||
class AdamW(Optimizer):
|
||||
"""
|
||||
AdamW optimizer with decoupled weight decay.
|
||||
@@ -301,6 +321,7 @@ class AdamW(Optimizer):
|
||||
EXAMPLE:
|
||||
>>> optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
super().__init__(params)
|
||||
|
||||
self.lr = lr
|
||||
@@ -311,6 +332,7 @@ class AdamW(Optimizer):
|
||||
# Initialize moment buffers (same as Adam)
|
||||
self.m_buffers = [None for _ in self.params]
|
||||
self.v_buffers = [None for _ in self.params]
|
||||
### END SOLUTION
|
||||
|
||||
def step(self):
|
||||
"""
|
||||
@@ -336,6 +358,7 @@ class AdamW(Optimizer):
|
||||
|
||||
HINT: Apply weight decay after gradient update for proper decoupling
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# Increment step counter first
|
||||
self.step_count += 1
|
||||
|
||||
@@ -369,4 +392,4 @@ class AdamW(Optimizer):
|
||||
# Apply decoupled weight decay
|
||||
if self.weight_decay != 0:
|
||||
param.data = param.data * (1 - self.lr * self.weight_decay)
|
||||
|
||||
### END SOLUTION
|
||||
|
||||
64
tinytorch/core/spatial.py
generated
64
tinytorch/core/spatial.py
generated
@@ -1,64 +0,0 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/09_spatial/spatial_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = []
|
||||
|
||||
# %% ../../modules/source/09_spatial/spatial_dev.ipynb 1
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
# Import dependencies from other modules
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
|
||||
from tensor_dev import Tensor
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))
|
||||
from layers_dev import Module
|
||||
|
||||
# Note: Keeping simplified implementations for reference during development
|
||||
class _SimplifiedTensor:
|
||||
"""Simplified tensor for spatial operations development."""
|
||||
|
||||
def __init__(self, data, requires_grad=False):
|
||||
self.data = np.array(data, dtype=np.float32)
|
||||
self.shape = self.data.shape
|
||||
self.requires_grad = requires_grad
|
||||
self.grad = None
|
||||
|
||||
def __repr__(self):
|
||||
return f"Tensor(shape={self.shape}, data=\n{self.data})"
|
||||
|
||||
def __add__(self, other):
|
||||
if isinstance(other, Tensor):
|
||||
return Tensor(self.data + other.data)
|
||||
return Tensor(self.data + other)
|
||||
|
||||
def __mul__(self, other):
|
||||
if isinstance(other, Tensor):
|
||||
return Tensor(self.data * other.data)
|
||||
return Tensor(self.data * other)
|
||||
|
||||
def sum(self):
|
||||
return Tensor(np.sum(self.data))
|
||||
|
||||
def mean(self):
|
||||
return Tensor(np.mean(self.data))
|
||||
|
||||
# Create a simple Module base class for inheritance
|
||||
class Module:
|
||||
"""Simple base class for neural network modules."""
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def forward(self, x):
|
||||
raise NotImplementedError("Subclasses must implement forward()")
|
||||
|
||||
def parameters(self):
|
||||
"""Return list of parameters for this module."""
|
||||
params = []
|
||||
for attr_name in dir(self):
|
||||
attr = getattr(self, attr_name)
|
||||
if hasattr(attr, 'data') and hasattr(attr, 'requires_grad'):
|
||||
params.append(attr)
|
||||
return params
|
||||
18
tinytorch/core/tensor.py
generated
18
tinytorch/core/tensor.py
generated
@@ -1,5 +1,19 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/01_tensor/tensor_dev.ipynb.
|
||||
|
||||
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
||||
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
||||
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
||||
# ║ ║
|
||||
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
||||
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
||||
# ║ ║
|
||||
# ║ ✅ TO EDIT: modules/source/02_tensor/tensor_dev.py ║
|
||||
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
||||
# ║ ║
|
||||
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
||||
# ║ Editing it directly may break module functionality and training. ║
|
||||
# ║ ║
|
||||
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
|
||||
# ║ happens! The tinytorch/ directory is just the compiled output. ║
|
||||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||||
# %% auto 0
|
||||
__all__ = ['Tensor']
|
||||
|
||||
|
||||
332
tinytorch/core/training.py
generated
332
tinytorch/core/training.py
generated
@@ -1,7 +1,21 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/07_training/training_dev.ipynb.
|
||||
|
||||
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
||||
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
||||
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
||||
# ║ ║
|
||||
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
||||
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
||||
# ║ ║
|
||||
# ║ ✅ TO EDIT: modules/source/11_training/training_dev.py ║
|
||||
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
||||
# ║ ║
|
||||
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
||||
# ║ Editing it directly may break module functionality and training. ║
|
||||
# ║ ║
|
||||
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
|
||||
# ║ happens! The tinytorch/ directory is just the compiled output. ║
|
||||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||||
# %% auto 0
|
||||
__all__ = []
|
||||
__all__ = ['CosineSchedule', 'Trainer']
|
||||
|
||||
# %% ../../modules/source/07_training/training_dev.ipynb 1
|
||||
import numpy as np
|
||||
@@ -13,14 +27,310 @@ import sys
|
||||
import os
|
||||
|
||||
# Import dependencies from other modules
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
|
||||
from tensor_dev import Tensor
|
||||
from .tensor import Tensor
|
||||
from .layers import Linear
|
||||
from .losses import MSELoss, CrossEntropyLoss
|
||||
from .optimizers import SGD, AdamW
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))
|
||||
from layers_dev import Linear
|
||||
# %% ../../modules/source/07_training/training_dev.ipynb 6
|
||||
class CosineSchedule:
|
||||
"""
|
||||
Cosine annealing learning rate schedule.
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '04_losses'))
|
||||
from losses_dev import MSELoss, CrossEntropyLoss
|
||||
Starts at max_lr, decreases following a cosine curve to min_lr over T epochs.
|
||||
This provides aggressive learning initially, then fine-tuning at the end.
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '06_optimizers'))
|
||||
from optimizers_dev import SGD, AdamW
|
||||
TODO: Implement cosine annealing schedule
|
||||
|
||||
APPROACH:
|
||||
1. Store max_lr, min_lr, and total_epochs
|
||||
2. In get_lr(), compute cosine factor: (1 + cos(π * epoch / total_epochs)) / 2
|
||||
3. Interpolate: min_lr + (max_lr - min_lr) * cosine_factor
|
||||
|
||||
EXAMPLE:
|
||||
>>> schedule = CosineSchedule(max_lr=0.1, min_lr=0.01, total_epochs=100)
|
||||
>>> print(schedule.get_lr(0)) # Start: 0.1
|
||||
>>> print(schedule.get_lr(50)) # Middle: ~0.055
|
||||
>>> print(schedule.get_lr(100)) # End: 0.01
|
||||
|
||||
HINT: Use np.cos() and np.pi for the cosine calculation
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
def __init__(self, max_lr: float = 0.1, min_lr: float = 0.01, total_epochs: int = 100):
|
||||
self.max_lr = max_lr
|
||||
self.min_lr = min_lr
|
||||
self.total_epochs = total_epochs
|
||||
|
||||
def get_lr(self, epoch: int) -> float:
|
||||
"""Get learning rate for current epoch."""
|
||||
if epoch >= self.total_epochs:
|
||||
return self.min_lr
|
||||
|
||||
# Cosine annealing formula
|
||||
cosine_factor = (1 + np.cos(np.pi * epoch / self.total_epochs)) / 2
|
||||
return self.min_lr + (self.max_lr - self.min_lr) * cosine_factor
|
||||
### END SOLUTION
|
||||
|
||||
# %% ../../modules/source/07_training/training_dev.ipynb 14
|
||||
class Trainer:
|
||||
"""
|
||||
Complete training orchestrator for neural networks.
|
||||
|
||||
Handles the full training lifecycle: forward pass, loss computation,
|
||||
backward pass, optimization, scheduling, checkpointing, and evaluation.
|
||||
|
||||
This is the central class that brings together all the components
|
||||
you've built in previous modules.
|
||||
|
||||
TODO: Implement complete Trainer class
|
||||
|
||||
APPROACH:
|
||||
1. Store model, optimizer, loss function, and optional scheduler
|
||||
2. train_epoch(): Loop through data, compute loss, update parameters
|
||||
3. evaluate(): Similar loop but without gradient updates
|
||||
4. save/load_checkpoint(): Persist training state for resumption
|
||||
|
||||
DESIGN PATTERNS:
|
||||
- Context managers for train/eval modes
|
||||
- Gradient accumulation for effective large batch sizes
|
||||
- Progress tracking for monitoring
|
||||
- Flexible scheduling integration
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
def __init__(self, model, optimizer, loss_fn, scheduler=None, grad_clip_norm=None):
|
||||
"""
|
||||
Initialize trainer with model and training components.
|
||||
|
||||
Args:
|
||||
model: Neural network to train
|
||||
optimizer: Parameter update strategy (SGD, Adam, etc.)
|
||||
loss_fn: Loss function (CrossEntropy, MSE, etc.)
|
||||
scheduler: Optional learning rate scheduler
|
||||
grad_clip_norm: Optional gradient clipping threshold
|
||||
"""
|
||||
self.model = model
|
||||
self.optimizer = optimizer
|
||||
self.loss_fn = loss_fn
|
||||
self.scheduler = scheduler
|
||||
self.grad_clip_norm = grad_clip_norm
|
||||
|
||||
# Training state
|
||||
self.epoch = 0
|
||||
self.step = 0
|
||||
self.training_mode = True
|
||||
|
||||
# History tracking
|
||||
self.history = {
|
||||
'train_loss': [],
|
||||
'eval_loss': [],
|
||||
'learning_rates': []
|
||||
}
|
||||
|
||||
def train_epoch(self, dataloader, accumulation_steps=1):
|
||||
"""
|
||||
Train for one epoch through the dataset.
|
||||
|
||||
Args:
|
||||
dataloader: Iterable yielding (inputs, targets) batches
|
||||
accumulation_steps: Number of batches to accumulate before update
|
||||
|
||||
Returns:
|
||||
Average loss for the epoch
|
||||
"""
|
||||
self.model.training = True
|
||||
self.training_mode = True
|
||||
|
||||
total_loss = 0.0
|
||||
num_batches = 0
|
||||
accumulated_loss = 0.0
|
||||
|
||||
for batch_idx, (inputs, targets) in enumerate(dataloader):
|
||||
# Forward pass
|
||||
outputs = self.model.forward(inputs)
|
||||
loss = self.loss_fn.forward(outputs, targets)
|
||||
|
||||
# Scale loss for accumulation
|
||||
scaled_loss = loss.data / accumulation_steps
|
||||
accumulated_loss += scaled_loss
|
||||
|
||||
# Backward pass
|
||||
if hasattr(loss, 'backward'):
|
||||
loss.backward()
|
||||
|
||||
# Update parameters every accumulation_steps
|
||||
if (batch_idx + 1) % accumulation_steps == 0:
|
||||
# Gradient clipping
|
||||
if self.grad_clip_norm is not None:
|
||||
params = []
|
||||
if hasattr(self.model, 'parameters'):
|
||||
params = self.model.parameters()
|
||||
clip_grad_norm(params, self.grad_clip_norm)
|
||||
|
||||
# Optimizer step
|
||||
self.optimizer.step()
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
total_loss += accumulated_loss
|
||||
accumulated_loss = 0.0
|
||||
num_batches += 1
|
||||
self.step += 1
|
||||
|
||||
# Handle remaining accumulated gradients
|
||||
if accumulated_loss > 0:
|
||||
if self.grad_clip_norm is not None:
|
||||
params = []
|
||||
if hasattr(self.model, 'parameters'):
|
||||
params = self.model.parameters()
|
||||
clip_grad_norm(params, self.grad_clip_norm)
|
||||
|
||||
self.optimizer.step()
|
||||
self.optimizer.zero_grad()
|
||||
total_loss += accumulated_loss
|
||||
num_batches += 1
|
||||
|
||||
avg_loss = total_loss / max(num_batches, 1)
|
||||
self.history['train_loss'].append(avg_loss)
|
||||
|
||||
# Update scheduler
|
||||
if self.scheduler is not None:
|
||||
current_lr = self.scheduler.get_lr(self.epoch)
|
||||
# Update optimizer learning rate
|
||||
if hasattr(self.optimizer, 'lr'):
|
||||
self.optimizer.lr = current_lr
|
||||
self.history['learning_rates'].append(current_lr)
|
||||
|
||||
self.epoch += 1
|
||||
return avg_loss
|
||||
|
||||
def evaluate(self, dataloader):
|
||||
"""
|
||||
Evaluate model on dataset without updating parameters.
|
||||
|
||||
Args:
|
||||
dataloader: Iterable yielding (inputs, targets) batches
|
||||
|
||||
Returns:
|
||||
Average loss and accuracy
|
||||
"""
|
||||
self.model.training = False
|
||||
self.training_mode = False
|
||||
|
||||
total_loss = 0.0
|
||||
correct = 0
|
||||
total = 0
|
||||
|
||||
for inputs, targets in dataloader:
|
||||
# Forward pass only
|
||||
outputs = self.model.forward(inputs)
|
||||
loss = self.loss_fn.forward(outputs, targets)
|
||||
|
||||
total_loss += loss.data
|
||||
|
||||
# Calculate accuracy (for classification)
|
||||
if hasattr(outputs, 'data') and hasattr(targets, 'data'):
|
||||
if len(outputs.data.shape) > 1: # Multi-class
|
||||
predictions = np.argmax(outputs.data, axis=1)
|
||||
if len(targets.data.shape) == 1: # Integer targets
|
||||
correct += np.sum(predictions == targets.data)
|
||||
else: # One-hot targets
|
||||
correct += np.sum(predictions == np.argmax(targets.data, axis=1))
|
||||
total += len(predictions)
|
||||
|
||||
avg_loss = total_loss / len(dataloader) if len(dataloader) > 0 else 0.0
|
||||
accuracy = correct / total if total > 0 else 0.0
|
||||
|
||||
self.history['eval_loss'].append(avg_loss)
|
||||
|
||||
return avg_loss, accuracy
|
||||
|
||||
def save_checkpoint(self, path: str):
|
||||
"""
|
||||
Save complete training state for resumption.
|
||||
|
||||
Args:
|
||||
path: File path to save checkpoint
|
||||
"""
|
||||
checkpoint = {
|
||||
'epoch': self.epoch,
|
||||
'step': self.step,
|
||||
'model_state': self._get_model_state(),
|
||||
'optimizer_state': self._get_optimizer_state(),
|
||||
'scheduler_state': self._get_scheduler_state(),
|
||||
'history': self.history,
|
||||
'training_mode': self.training_mode
|
||||
}
|
||||
|
||||
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, 'wb') as f:
|
||||
pickle.dump(checkpoint, f)
|
||||
|
||||
def load_checkpoint(self, path: str):
|
||||
"""
|
||||
Load training state from checkpoint.
|
||||
|
||||
Args:
|
||||
path: File path to load checkpoint from
|
||||
"""
|
||||
with open(path, 'rb') as f:
|
||||
checkpoint = pickle.load(f)
|
||||
|
||||
self.epoch = checkpoint['epoch']
|
||||
self.step = checkpoint['step']
|
||||
self.history = checkpoint['history']
|
||||
self.training_mode = checkpoint['training_mode']
|
||||
|
||||
# Restore states (simplified for educational purposes)
|
||||
if 'model_state' in checkpoint:
|
||||
self._set_model_state(checkpoint['model_state'])
|
||||
if 'optimizer_state' in checkpoint:
|
||||
self._set_optimizer_state(checkpoint['optimizer_state'])
|
||||
if 'scheduler_state' in checkpoint:
|
||||
self._set_scheduler_state(checkpoint['scheduler_state'])
|
||||
|
||||
def _get_model_state(self):
|
||||
"""Extract model parameters for checkpointing."""
|
||||
if hasattr(self.model, 'parameters'):
|
||||
return {i: param.data.copy() for i, param in enumerate(self.model.parameters())}
|
||||
return {}
|
||||
|
||||
def _set_model_state(self, state):
|
||||
"""Restore model parameters from checkpoint."""
|
||||
if hasattr(self.model, 'parameters'):
|
||||
for i, param in enumerate(self.model.parameters()):
|
||||
if i in state:
|
||||
param.data = state[i].copy()
|
||||
|
||||
def _get_optimizer_state(self):
|
||||
"""Extract optimizer state for checkpointing."""
|
||||
state = {}
|
||||
if hasattr(self.optimizer, 'lr'):
|
||||
state['lr'] = self.optimizer.lr
|
||||
if hasattr(self.optimizer, 'momentum_buffers'):
|
||||
state['momentum_buffers'] = self.optimizer.momentum_buffers.copy()
|
||||
return state
|
||||
|
||||
def _set_optimizer_state(self, state):
|
||||
"""Restore optimizer state from checkpoint."""
|
||||
if 'lr' in state and hasattr(self.optimizer, 'lr'):
|
||||
self.optimizer.lr = state['lr']
|
||||
if 'momentum_buffers' in state and hasattr(self.optimizer, 'momentum_buffers'):
|
||||
self.optimizer.momentum_buffers = state['momentum_buffers']
|
||||
|
||||
def _get_scheduler_state(self):
|
||||
"""Extract scheduler state for checkpointing."""
|
||||
if self.scheduler is None:
|
||||
return None
|
||||
return {
|
||||
'max_lr': getattr(self.scheduler, 'max_lr', None),
|
||||
'min_lr': getattr(self.scheduler, 'min_lr', None),
|
||||
'total_epochs': getattr(self.scheduler, 'total_epochs', None)
|
||||
}
|
||||
|
||||
def _set_scheduler_state(self, state):
|
||||
"""Restore scheduler state from checkpoint."""
|
||||
if state is None or self.scheduler is None:
|
||||
return
|
||||
for key, value in state.items():
|
||||
if hasattr(self.scheduler, key):
|
||||
setattr(self.scheduler, key, value)
|
||||
### END SOLUTION
|
||||
|
||||
57
tinytorch/generation/kv_cache.py
generated
57
tinytorch/generation/kv_cache.py
generated
@@ -1,57 +0,0 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/14_kvcaching/kvcaching_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = ['Tensor']
|
||||
|
||||
# %% ../../modules/source/14_kvcaching/kvcaching_dev.ipynb 1
|
||||
import numpy as np
|
||||
import time
|
||||
from typing import Tuple, Optional, Dict, List
|
||||
from dataclasses import dataclass
|
||||
|
||||
# Import our TinyTorch components (Modules 01-13)
|
||||
### BEGIN SOLUTION
|
||||
# Note: In real implementation, these would import from previous modules
|
||||
# For now, we'll implement minimal versions to focus on caching concepts
|
||||
|
||||
class Tensor:
|
||||
"""Minimal Tensor for KV Caching focus (from Module 01)"""
|
||||
def __init__(self, data, requires_grad=False):
|
||||
self.data = np.array(data)
|
||||
self.shape = self.data.shape
|
||||
self.requires_grad = requires_grad
|
||||
self.grad = None
|
||||
|
||||
def __getitem__(self, key):
|
||||
return Tensor(self.data[key])
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if isinstance(value, Tensor):
|
||||
self.data[key] = value.data
|
||||
else:
|
||||
self.data[key] = value
|
||||
|
||||
def size(self, dim=None):
|
||||
if dim is None:
|
||||
return self.shape
|
||||
return self.shape[dim]
|
||||
|
||||
def view(self, *shape):
|
||||
return Tensor(self.data.reshape(shape))
|
||||
|
||||
def transpose(self, dim0, dim1):
|
||||
axes = list(range(len(self.shape)))
|
||||
axes[dim0], axes[dim1] = axes[dim1], axes[dim0]
|
||||
return Tensor(np.transpose(self.data, axes))
|
||||
|
||||
@staticmethod
|
||||
def cat(tensors, dim=0):
|
||||
"""Concatenate tensors along dimension"""
|
||||
arrays = [t.data for t in tensors]
|
||||
return Tensor(np.concatenate(arrays, axis=dim))
|
||||
|
||||
@staticmethod
|
||||
def zeros(*shape):
|
||||
"""Create zero tensor"""
|
||||
return Tensor(np.zeros(shape))
|
||||
### END SOLUTION
|
||||
148
tinytorch/models/transformer.py
generated
148
tinytorch/models/transformer.py
generated
@@ -1,148 +0,0 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/13_transformers/transformers_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = ['Tensor', 'Linear', 'MultiHeadAttention', 'Embedding', 'gelu']
|
||||
|
||||
# %% ../../modules/source/13_transformers/transformers_dev.ipynb 1
|
||||
import numpy as np
|
||||
import math
|
||||
from typing import Optional, List
|
||||
|
||||
# Minimal implementations for development - in practice these import from previous modules
|
||||
class Tensor:
|
||||
"""Minimal Tensor class for transformer development - imports from Module 01 in practice."""
|
||||
def __init__(self, data, requires_grad=False):
|
||||
self.data = np.array(data)
|
||||
self.shape = self.data.shape
|
||||
self.size = self.data.size
|
||||
self.requires_grad = requires_grad
|
||||
self.grad = None
|
||||
|
||||
def __add__(self, other):
|
||||
if isinstance(other, Tensor):
|
||||
return Tensor(self.data + other.data)
|
||||
return Tensor(self.data + other)
|
||||
|
||||
def __mul__(self, other):
|
||||
if isinstance(other, Tensor):
|
||||
return Tensor(self.data * other.data)
|
||||
return Tensor(self.data * other)
|
||||
|
||||
def matmul(self, other):
|
||||
return Tensor(np.dot(self.data, other.data))
|
||||
|
||||
def sum(self, axis=None, keepdims=False):
|
||||
return Tensor(self.data.sum(axis=axis, keepdims=keepdims))
|
||||
|
||||
def mean(self, axis=None, keepdims=False):
|
||||
return Tensor(self.data.mean(axis=axis, keepdims=keepdims))
|
||||
|
||||
def reshape(self, *shape):
|
||||
return Tensor(self.data.reshape(shape))
|
||||
|
||||
def __repr__(self):
|
||||
return f"Tensor(data={self.data}, shape={self.shape})"
|
||||
|
||||
class Linear:
|
||||
"""Minimal Linear layer - imports from Module 03 in practice."""
|
||||
def __init__(self, in_features, out_features, bias=True):
|
||||
# Xavier/Glorot initialization
|
||||
std = math.sqrt(2.0 / (in_features + out_features))
|
||||
self.weight = Tensor(np.random.normal(0, std, (in_features, out_features)))
|
||||
self.bias = Tensor(np.zeros(out_features)) if bias else None
|
||||
|
||||
def forward(self, x):
|
||||
output = x.matmul(self.weight)
|
||||
if self.bias is not None:
|
||||
output = output + self.bias
|
||||
return output
|
||||
|
||||
def parameters(self):
|
||||
params = [self.weight]
|
||||
if self.bias is not None:
|
||||
params.append(self.bias)
|
||||
return params
|
||||
|
||||
class MultiHeadAttention:
|
||||
"""Minimal MultiHeadAttention - imports from Module 12 in practice."""
|
||||
def __init__(self, embed_dim, num_heads):
|
||||
assert embed_dim % num_heads == 0
|
||||
self.embed_dim = embed_dim
|
||||
self.num_heads = num_heads
|
||||
self.head_dim = embed_dim // num_heads
|
||||
|
||||
self.q_proj = Linear(embed_dim, embed_dim)
|
||||
self.k_proj = Linear(embed_dim, embed_dim)
|
||||
self.v_proj = Linear(embed_dim, embed_dim)
|
||||
self.out_proj = Linear(embed_dim, embed_dim)
|
||||
|
||||
def forward(self, x, mask=None):
|
||||
batch_size, seq_len, embed_dim = x.shape
|
||||
|
||||
# Linear projections
|
||||
Q = self.q_proj.forward(x)
|
||||
K = self.k_proj.forward(x)
|
||||
V = self.v_proj.forward(x)
|
||||
|
||||
# Reshape for multi-head attention
|
||||
Q = Q.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
|
||||
K = K.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
|
||||
V = V.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
|
||||
|
||||
# Transpose to (batch_size, num_heads, seq_len, head_dim)
|
||||
Q = Tensor(np.transpose(Q.data, (0, 2, 1, 3)))
|
||||
K = Tensor(np.transpose(K.data, (0, 2, 1, 3)))
|
||||
V = Tensor(np.transpose(V.data, (0, 2, 1, 3)))
|
||||
|
||||
# Scaled dot-product attention
|
||||
scores = Tensor(np.matmul(Q.data, np.transpose(K.data, (0, 1, 3, 2))))
|
||||
scores = scores * (1.0 / math.sqrt(self.head_dim))
|
||||
|
||||
# Apply causal mask for autoregressive generation
|
||||
if mask is not None:
|
||||
scores = Tensor(scores.data + mask.data)
|
||||
|
||||
# Softmax
|
||||
attention_weights = self._softmax(scores)
|
||||
|
||||
# Apply attention to values
|
||||
out = Tensor(np.matmul(attention_weights.data, V.data))
|
||||
|
||||
# Transpose back and reshape
|
||||
out = Tensor(np.transpose(out.data, (0, 2, 1, 3)))
|
||||
out = out.reshape(batch_size, seq_len, embed_dim)
|
||||
|
||||
# Final linear projection
|
||||
return self.out_proj.forward(out)
|
||||
|
||||
def _softmax(self, x):
|
||||
"""Numerically stable softmax."""
|
||||
exp_x = Tensor(np.exp(x.data - np.max(x.data, axis=-1, keepdims=True)))
|
||||
return Tensor(exp_x.data / np.sum(exp_x.data, axis=-1, keepdims=True))
|
||||
|
||||
def parameters(self):
|
||||
params = []
|
||||
params.extend(self.q_proj.parameters())
|
||||
params.extend(self.k_proj.parameters())
|
||||
params.extend(self.v_proj.parameters())
|
||||
params.extend(self.out_proj.parameters())
|
||||
return params
|
||||
|
||||
class Embedding:
|
||||
"""Minimal Embedding layer - imports from Module 11 in practice."""
|
||||
def __init__(self, vocab_size, embed_dim):
|
||||
self.vocab_size = vocab_size
|
||||
self.embed_dim = embed_dim
|
||||
# Initialize with small random values
|
||||
self.weight = Tensor(np.random.normal(0, 0.02, (vocab_size, embed_dim)))
|
||||
|
||||
def forward(self, indices):
|
||||
# Simple embedding lookup
|
||||
return Tensor(self.weight.data[indices.data])
|
||||
|
||||
def parameters(self):
|
||||
return [self.weight]
|
||||
|
||||
def gelu(x):
|
||||
"""GELU activation function."""
|
||||
return Tensor(0.5 * x.data * (1 + np.tanh(np.sqrt(2 / np.pi) * (x.data + 0.044715 * x.data**3))))
|
||||
8
tinytorch/optimization/acceleration.py
generated
8
tinytorch/optimization/acceleration.py
generated
@@ -1,8 +0,0 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/16_acceleration/acceleration_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = []
|
||||
|
||||
# %% ../../modules/source/16_acceleration/acceleration_dev.ipynb 0
|
||||
#| default_exp optimization.acceleration
|
||||
#| export
|
||||
85
tinytorch/optimization/compression.py
generated
85
tinytorch/optimization/compression.py
generated
@@ -1,85 +0,0 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/18_compression/compression_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = ['Tensor', 'Linear', 'Sequential']
|
||||
|
||||
# %% ../../modules/source/18_compression/compression_dev.ipynb 1
|
||||
import numpy as np
|
||||
import copy
|
||||
from typing import List, Dict, Any, Tuple, Optional
|
||||
import time
|
||||
|
||||
# Import from previous modules
|
||||
# Note: In the full package, these would be imports like:
|
||||
# from tinytorch.core.tensor import Tensor
|
||||
# from tinytorch.core.layers import Linear
|
||||
# For development, we'll create minimal implementations
|
||||
|
||||
class Tensor:
|
||||
"""Minimal Tensor class for compression development - imports from Module 01 in practice."""
|
||||
def __init__(self, data, requires_grad=False):
|
||||
self.data = np.array(data)
|
||||
self.shape = self.data.shape
|
||||
self.size = self.data.size
|
||||
self.requires_grad = requires_grad
|
||||
self.grad = None
|
||||
|
||||
def __add__(self, other):
|
||||
if isinstance(other, Tensor):
|
||||
return Tensor(self.data + other.data)
|
||||
return Tensor(self.data + other)
|
||||
|
||||
def __mul__(self, other):
|
||||
if isinstance(other, Tensor):
|
||||
return Tensor(self.data * other.data)
|
||||
return Tensor(self.data * other)
|
||||
|
||||
def matmul(self, other):
|
||||
return Tensor(np.dot(self.data, other.data))
|
||||
|
||||
def abs(self):
|
||||
return Tensor(np.abs(self.data))
|
||||
|
||||
def sum(self, axis=None):
|
||||
return Tensor(self.data.sum(axis=axis))
|
||||
|
||||
def __repr__(self):
|
||||
return f"Tensor(shape={self.shape})"
|
||||
|
||||
class Linear:
|
||||
"""Minimal Linear layer for compression development - imports from Module 03 in practice."""
|
||||
def __init__(self, in_features, out_features, bias=True):
|
||||
self.in_features = in_features
|
||||
self.out_features = out_features
|
||||
# Initialize with He initialization
|
||||
self.weight = Tensor(np.random.randn(in_features, out_features) * np.sqrt(2.0 / in_features))
|
||||
self.bias = Tensor(np.zeros(out_features)) if bias else None
|
||||
|
||||
def forward(self, x):
|
||||
output = x.matmul(self.weight)
|
||||
if self.bias is not None:
|
||||
output = output + self.bias
|
||||
return output
|
||||
|
||||
def parameters(self):
|
||||
params = [self.weight]
|
||||
if self.bias is not None:
|
||||
params.append(self.bias)
|
||||
return params
|
||||
|
||||
class Sequential:
|
||||
"""Minimal Sequential container for model compression."""
|
||||
def __init__(self, *layers):
|
||||
self.layers = list(layers)
|
||||
|
||||
def forward(self, x):
|
||||
for layer in self.layers:
|
||||
x = layer.forward(x)
|
||||
return x
|
||||
|
||||
def parameters(self):
|
||||
params = []
|
||||
for layer in self.layers:
|
||||
if hasattr(layer, 'parameters'):
|
||||
params.extend(layer.parameters())
|
||||
return params
|
||||
8
tinytorch/optimization/quantization.py
generated
8
tinytorch/optimization/quantization.py
generated
@@ -1,8 +0,0 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/17_quantization/quantization_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = []
|
||||
|
||||
# %% ../../modules/source/17_quantization/quantization_dev.ipynb 0
|
||||
#| default_exp optimization.quantization
|
||||
#| export
|
||||
35
tinytorch/profiling/profiler.py
generated
35
tinytorch/profiling/profiler.py
generated
@@ -1,35 +0,0 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/15_profiling/profiling_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = []
|
||||
|
||||
# %% ../../modules/source/15_profiling/profiling_dev.ipynb 1
|
||||
import time
|
||||
import numpy as np
|
||||
import tracemalloc
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from collections import defaultdict
|
||||
import gc
|
||||
|
||||
# Import our TinyTorch components for profiling
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '09_spatial'))
|
||||
|
||||
# For testing purposes - in real package these would be proper imports
|
||||
try:
|
||||
from tensor_dev import Tensor
|
||||
from layers_dev import Linear, Sequential
|
||||
from spatial_dev import Conv2d
|
||||
except ImportError:
|
||||
# Fallback - create minimal implementations for testing
|
||||
class Tensor:
|
||||
def __init__(self, data):
|
||||
self.data = np.array(data)
|
||||
self.shape = self.data.shape
|
||||
def __mul__(self, other):
|
||||
return Tensor(self.data * other.data)
|
||||
def sum(self):
|
||||
return Tensor(np.sum(self.data))
|
||||
8
tinytorch/text/tokenization.py
generated
8
tinytorch/text/tokenization.py
generated
@@ -1,8 +0,0 @@
|
||||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/10_tokenization/tokenization_dev.ipynb.
|
||||
|
||||
# %% auto 0
|
||||
__all__ = []
|
||||
|
||||
# %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 0
|
||||
#| default_exp text.tokenization
|
||||
#| export
|
||||
Reference in New Issue
Block a user