diff --git a/modules/source/01_tensor/tensor_dev.ipynb b/modules/source/01_tensor/tensor_dev.ipynb index c6d011b6..9cad66bf 100644 --- a/modules/source/01_tensor/tensor_dev.ipynb +++ b/modules/source/01_tensor/tensor_dev.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "6ca4b9f5", + "id": "22bf7b48", "metadata": { "cell_marker": "\"\"\"" }, @@ -51,7 +51,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3dcaaffc", + "id": "7d24677b", "metadata": { "nbgrader": { "grade": false, @@ -69,7 +69,7 @@ }, { "cell_type": "markdown", - "id": "e70ae12a", + "id": "447a0b7a", "metadata": { "cell_marker": "\"\"\"" }, @@ -116,7 +116,7 @@ }, { "cell_type": "markdown", - "id": "7a1e48b5", + "id": "c2b4bc17", "metadata": { "cell_marker": "\"\"\"" }, @@ -175,7 +175,7 @@ }, { "cell_type": "markdown", - "id": "42f2279e", + "id": "1dc8a950", "metadata": { "cell_marker": "\"\"\"" }, @@ -214,7 +214,7 @@ }, { "cell_type": "markdown", - "id": "cb1e99f0", + "id": "334562a5", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -252,7 +252,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4a090be0", + "id": "27b3b08d", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -724,7 +724,7 @@ }, { "cell_type": "markdown", - "id": "a49cddfd", + "id": "345f0782", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -742,7 +742,7 @@ { "cell_type": "code", "execution_count": null, - "id": "79195fe8", + "id": "503244d4", "metadata": { "nbgrader": { "grade": true, @@ -791,7 +791,7 @@ }, { "cell_type": "markdown", - "id": "7cbed527", + "id": "5176cde0", "metadata": { "cell_marker": "\"\"\"" }, @@ -839,7 +839,7 @@ }, { "cell_type": "markdown", - "id": "30f53e64", + "id": "45461424", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 2 @@ -882,7 +882,7 @@ }, { "cell_type": "markdown", - "id": "e13b5c91", + "id": "7ba6f505", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -900,7 +900,7 @@ { "cell_type": "code", "execution_count": null, - "id": "26ab9e58", + "id": "9471ca95", "metadata": { "nbgrader": { "grade": true, @@ -957,7 +957,7 @@ }, { "cell_type": "markdown", - "id": "8ab4eb75", + "id": "453ed0e5", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 2 @@ -1057,7 +1057,7 @@ }, { "cell_type": "markdown", - "id": "75e72654", + "id": "ae6dca6f", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1075,7 +1075,7 @@ { "cell_type": "code", "execution_count": null, - "id": "434f6550", + "id": "d1bf193f", "metadata": { "nbgrader": { "grade": true, @@ -1132,7 +1132,7 @@ }, { "cell_type": "markdown", - "id": "de04fa2e", + "id": "23a70fb2", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 2 @@ -1235,7 +1235,7 @@ }, { "cell_type": "markdown", - "id": "2f4cd90a", + "id": "a320a34f", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1253,7 +1253,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e497f3d1", + "id": "04a65af9", "metadata": { "nbgrader": { "grade": true, @@ -1323,7 +1323,7 @@ }, { "cell_type": "markdown", - "id": "c944cd8b", + "id": "509140c2", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 2 @@ -1417,7 +1417,7 @@ }, { "cell_type": "markdown", - "id": "e8312574", + "id": "21664f47", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1435,7 +1435,7 @@ { "cell_type": "code", "execution_count": null, - "id": "66d6beb6", + "id": "5f9edd66", "metadata": { "nbgrader": { "grade": true, @@ -1508,7 +1508,7 @@ }, { "cell_type": "markdown", - "id": "71042cd1", + "id": "8b900870", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 2 @@ -1583,7 +1583,7 @@ }, { "cell_type": "markdown", - "id": "9d5518b2", + "id": "a98400bf", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 2 @@ -1644,7 +1644,7 @@ }, { "cell_type": "markdown", - "id": "23b79c43", + "id": "c74f78e6", "metadata": { "lines_to_next_cell": 1 }, @@ -1666,7 +1666,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c8fdde9c", + "id": "6766dc8a", "metadata": { "lines_to_next_cell": 2, "nbgrader": { @@ -1794,7 +1794,7 @@ }, { "cell_type": "markdown", - "id": "f0f02362", + "id": "602da67a", "metadata": { "cell_marker": "\"\"\"" }, diff --git a/modules/source/02_activations/activations_dev.ipynb b/modules/source/02_activations/activations_dev.ipynb index 91358874..f3dc5445 100644 --- a/modules/source/02_activations/activations_dev.ipynb +++ b/modules/source/02_activations/activations_dev.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "8f7f092b", + "id": "41637b5b", "metadata": { "cell_marker": "\"\"\"" }, @@ -34,7 +34,7 @@ }, { "cell_type": "markdown", - "id": "ba7543b3", + "id": "eb80f71c", "metadata": { "cell_marker": "\"\"\"" }, @@ -59,7 +59,7 @@ }, { "cell_type": "markdown", - "id": "5f04cb4a", + "id": "ad445b19", "metadata": { "cell_marker": "\"\"\"" }, @@ -78,7 +78,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3042497e", + "id": "7fc4b3ae", "metadata": { "nbgrader": { "grade": false, @@ -102,7 +102,7 @@ }, { "cell_type": "markdown", - "id": "609861d1", + "id": "6c49b0a7", "metadata": { "cell_marker": "\"\"\"" }, @@ -144,7 +144,7 @@ }, { "cell_type": "markdown", - "id": "9b7b4834", + "id": "a82d5ffc", "metadata": { "cell_marker": "\"\"\"" }, @@ -166,7 +166,7 @@ }, { "cell_type": "markdown", - "id": "29845a4a", + "id": "d954190f", "metadata": { "cell_marker": "\"\"\"" }, @@ -190,7 +190,7 @@ }, { "cell_type": "markdown", - "id": "d5fc598c", + "id": "1d26aa84", "metadata": { "cell_marker": "\"\"\"" }, @@ -228,7 +228,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b6cb596e", + "id": "cd112f28", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -287,7 +287,7 @@ }, { "cell_type": "markdown", - "id": "84674501", + "id": "87407a56", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -303,7 +303,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3fbc497a", + "id": "8599e53a", "metadata": { "nbgrader": { "grade": true, @@ -344,7 +344,7 @@ }, { "cell_type": "markdown", - "id": "a076a2f1", + "id": "96438263", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -386,7 +386,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a079c21f", + "id": "6bdad44d", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -442,7 +442,7 @@ }, { "cell_type": "markdown", - "id": "8cf41efa", + "id": "853265df", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -458,7 +458,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9bf84e40", + "id": "e3f2e5fd", "metadata": { "nbgrader": { "grade": true, @@ -505,7 +505,7 @@ }, { "cell_type": "markdown", - "id": "26a36cf2", + "id": "d137e456", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -544,7 +544,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a93086db", + "id": "3a3ec4c5", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -600,7 +600,7 @@ }, { "cell_type": "markdown", - "id": "4488836b", + "id": "b2ad2baa", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -616,7 +616,7 @@ { "cell_type": "code", "execution_count": null, - "id": "46388fa5", + "id": "b92572ae", "metadata": { "nbgrader": { "grade": true, @@ -664,7 +664,7 @@ }, { "cell_type": "markdown", - "id": "f341ff48", + "id": "d1cdd503", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -707,7 +707,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a9684ba3", + "id": "90f15779", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -768,7 +768,7 @@ }, { "cell_type": "markdown", - "id": "7c8ef48f", + "id": "eb655b3b", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -784,7 +784,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c42d7ec8", + "id": "838060ac", "metadata": { "nbgrader": { "grade": true, @@ -832,7 +832,7 @@ }, { "cell_type": "markdown", - "id": "ba4edcdd", + "id": "a8047ea8", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -870,7 +870,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2821ef9e", + "id": "aa266bb7", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -942,7 +942,7 @@ }, { "cell_type": "markdown", - "id": "70c31533", + "id": "80e6ad27", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -958,7 +958,7 @@ { "cell_type": "code", "execution_count": null, - "id": "852f5832", + "id": "f3db3810", "metadata": { "nbgrader": { "grade": true, @@ -1016,7 +1016,7 @@ }, { "cell_type": "markdown", - "id": "b0107716", + "id": "2db83cef", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 2 @@ -1029,7 +1029,7 @@ }, { "cell_type": "markdown", - "id": "87c16b51", + "id": "428eaa1b", "metadata": { "cell_marker": "\"\"\"" }, @@ -1049,7 +1049,7 @@ }, { "cell_type": "markdown", - "id": "0a812659", + "id": "fe7666b9", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1063,7 +1063,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0015101e", + "id": "fac9ee55", "metadata": { "lines_to_next_cell": 2, "nbgrader": { @@ -1162,7 +1162,7 @@ }, { "cell_type": "markdown", - "id": "d0575067", + "id": "6a9cc930", "metadata": { "cell_marker": "\"\"\"" }, diff --git a/modules/source/03_layers/layers_dev.ipynb b/modules/source/03_layers/layers_dev.ipynb index 76e3f822..2007b4b5 100644 --- a/modules/source/03_layers/layers_dev.ipynb +++ b/modules/source/03_layers/layers_dev.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "2cfa2aae", + "id": "46b4a258", "metadata": { "cell_marker": "\"\"\"" }, @@ -53,7 +53,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c7a84c05", + "id": "bc3a80e9", "metadata": { "nbgrader": { "grade": false, @@ -77,7 +77,7 @@ }, { "cell_type": "markdown", - "id": "e52c72c2", + "id": "76d31667", "metadata": { "cell_marker": "\"\"\"" }, @@ -101,7 +101,7 @@ }, { "cell_type": "markdown", - "id": "e05eee85", + "id": "e0421bae", "metadata": { "cell_marker": "\"\"\"" }, @@ -139,7 +139,7 @@ }, { "cell_type": "markdown", - "id": "f489f983", + "id": "6670b0b1", "metadata": { "cell_marker": "\"\"\"" }, @@ -160,7 +160,7 @@ }, { "cell_type": "markdown", - "id": "fff4865c", + "id": "2dc8d8c8", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -211,7 +211,7 @@ { "cell_type": "code", "execution_count": null, - "id": "da931144", + "id": "a973eb44", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -335,7 +335,7 @@ }, { "cell_type": "markdown", - "id": "77988775", + "id": "d4cbdf9d", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -351,7 +351,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4c2e0b2e", + "id": "174fe10a", "metadata": { "nbgrader": { "grade": true, @@ -411,7 +411,7 @@ }, { "cell_type": "markdown", - "id": "d2fa31b7", + "id": "e961f791", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -473,7 +473,7 @@ { "cell_type": "code", "execution_count": null, - "id": "88715659", + "id": "b924d865", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -556,6 +556,10 @@ " return Tensor(output_data)\n", " ### END SOLUTION\n", "\n", + " def __call__(self, x, training=True):\n", + " \"\"\"Allows the layer to be called like a function.\"\"\"\n", + " return self.forward(x, training)\n", + "\n", " def parameters(self):\n", " \"\"\"Dropout has no parameters.\"\"\"\n", " return []\n", @@ -566,7 +570,7 @@ }, { "cell_type": "markdown", - "id": "0d33ff6f", + "id": "ee0bc9a1", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -582,7 +586,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cb0b7ae8", + "id": "c76974a1", "metadata": { "nbgrader": { "grade": true, @@ -658,7 +662,7 @@ }, { "cell_type": "markdown", - "id": "4b4aac2f", + "id": "231dae31", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 2 @@ -718,7 +722,7 @@ }, { "cell_type": "markdown", - "id": "0f4f3b7d", + "id": "bbc4aad9", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -777,7 +781,7 @@ { "cell_type": "code", "execution_count": null, - "id": "99df0451", + "id": "0ca58dc7", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -832,7 +836,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5e82e08c", + "id": "a9b7ae8a", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -873,7 +877,7 @@ }, { "cell_type": "markdown", - "id": "85ea5db5", + "id": "5570a366", "metadata": { "lines_to_next_cell": 1 }, @@ -895,7 +899,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ebddc165", + "id": "b2e11bf8", "metadata": { "lines_to_next_cell": 2, "nbgrader": { @@ -986,7 +990,7 @@ }, { "cell_type": "markdown", - "id": "31c54ee3", + "id": "4c9212f9", "metadata": { "cell_marker": "\"\"\"" }, diff --git a/modules/source/04_losses/losses_dev.ipynb b/modules/source/04_losses/losses_dev.ipynb index e0301d50..39f53cf2 100644 --- a/modules/source/04_losses/losses_dev.ipynb +++ b/modules/source/04_losses/losses_dev.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "7a46b4ac", + "id": "9d798b1c", "metadata": { "cell_marker": "\"\"\"" }, @@ -35,7 +35,7 @@ }, { "cell_type": "markdown", - "id": "95565c7e", + "id": "91804987", "metadata": { "cell_marker": "\"\"\"" }, @@ -59,7 +59,7 @@ }, { "cell_type": "markdown", - "id": "1c9b95bd", + "id": "c09dc686", "metadata": { "cell_marker": "\"\"\"" }, @@ -80,7 +80,7 @@ { "cell_type": "code", "execution_count": null, - "id": "054331fd", + "id": "51189bc1", "metadata": { "nbgrader": { "grade": false, @@ -113,7 +113,7 @@ }, { "cell_type": "markdown", - "id": "e60f5944", + "id": "cc227c2d", "metadata": { "cell_marker": "\"\"\"" }, @@ -189,7 +189,7 @@ }, { "cell_type": "markdown", - "id": "2a8ac601", + "id": "49e5039b", "metadata": { "cell_marker": "\"\"\"" }, @@ -235,7 +235,7 @@ }, { "cell_type": "markdown", - "id": "3e628237", + "id": "b1e1cbd0", "metadata": { "cell_marker": "\"\"\"" }, @@ -247,7 +247,7 @@ }, { "cell_type": "markdown", - "id": "84a9e420", + "id": "820e9937", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -297,7 +297,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c570112b", + "id": "854758b3", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -348,7 +348,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7e637f17", + "id": "6b57e650", "metadata": { "nbgrader": { "grade": true, @@ -389,7 +389,7 @@ }, { "cell_type": "markdown", - "id": "54c1f877", + "id": "b8be9f2c", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -459,7 +459,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24575e1c", + "id": "aca5154a", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -531,7 +531,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f7e6aa15", + "id": "7391538b", "metadata": { "nbgrader": { "grade": true, @@ -577,7 +577,7 @@ }, { "cell_type": "markdown", - "id": "cde7a8f4", + "id": "0b9b254c", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -670,7 +670,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0e3a2600", + "id": "eb59fb50", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -746,7 +746,7 @@ { "cell_type": "code", "execution_count": null, - "id": "45e2f990", + "id": "c59fbbfd", "metadata": { "nbgrader": { "grade": true, @@ -797,7 +797,7 @@ }, { "cell_type": "markdown", - "id": "30fd6c76", + "id": "599727d1", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -906,7 +906,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3bd407b8", + "id": "54a20f3f", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -982,7 +982,7 @@ { "cell_type": "code", "execution_count": null, - "id": "985dd530", + "id": "1bab9d23", "metadata": { "nbgrader": { "grade": true, @@ -1033,7 +1033,7 @@ }, { "cell_type": "markdown", - "id": "d7c0a96b", + "id": "ca40b581", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1090,7 +1090,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c5baee20", + "id": "76b4eb81", "metadata": { "nbgrader": { "grade": false, @@ -1146,7 +1146,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e2216e97", + "id": "b90c91f0", "metadata": { "nbgrader": { "grade": false, @@ -1211,7 +1211,7 @@ }, { "cell_type": "markdown", - "id": "435562a9", + "id": "e2fc1aa7", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1286,7 +1286,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0e2b66af", + "id": "573fa75d", "metadata": { "nbgrader": { "grade": false, @@ -1336,7 +1336,7 @@ { "cell_type": "code", "execution_count": null, - "id": "518ead17", + "id": "b7f12c78", "metadata": { "nbgrader": { "grade": false, @@ -1393,7 +1393,7 @@ }, { "cell_type": "markdown", - "id": "7d0c2aa4", + "id": "4c6ebac9", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1457,7 +1457,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7345a14f", + "id": "d0b635c1", "metadata": { "nbgrader": { "grade": false, @@ -1513,7 +1513,7 @@ }, { "cell_type": "markdown", - "id": "73d7096f", + "id": "d770e887", "metadata": { "cell_marker": "\"\"\"" }, @@ -1526,7 +1526,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5a5f3a29", + "id": "55fd411d", "metadata": { "nbgrader": { "grade": true, @@ -1606,7 +1606,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6e91917e", + "id": "b66f2370", "metadata": { "lines_to_next_cell": 2 }, @@ -1619,7 +1619,7 @@ }, { "cell_type": "markdown", - "id": "16461a93", + "id": "ce0d9c33", "metadata": { "cell_marker": "\"\"\"" }, diff --git a/modules/source/06_optimizers/optimizers_dev.ipynb b/modules/source/06_optimizers/optimizers_dev.ipynb index dfd5ac71..7ef0e46e 100644 --- a/modules/source/06_optimizers/optimizers_dev.ipynb +++ b/modules/source/06_optimizers/optimizers_dev.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "3d8f1c62", + "id": "518b6ae0", "metadata": { "cell_marker": "\"\"\"" }, @@ -51,7 +51,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a2b41da9", + "id": "30bbc6f8", "metadata": { "nbgrader": { "grade": false, @@ -68,15 +68,12 @@ "from typing import List, Union, Optional, Dict, Any\n", "\n", "# Import Tensor from Module 01 (now with gradient support from Module 05)\n", - "import sys\n", - "import os\n", - "sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))\n", - "from tensor_dev import Tensor" + "from tinytorch.core.tensor import Tensor" ] }, { "cell_type": "markdown", - "id": "3169e215", + "id": "9057f3bf", "metadata": { "cell_marker": "\"\"\"" }, @@ -133,7 +130,7 @@ }, { "cell_type": "markdown", - "id": "baec0321", + "id": "3b2f074e", "metadata": { "cell_marker": "\"\"\"" }, @@ -219,7 +216,7 @@ }, { "cell_type": "markdown", - "id": "49716b34", + "id": "3000c581", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -247,7 +244,7 @@ { "cell_type": "code", "execution_count": null, - "id": "06d956dd", + "id": "d9343aa4", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -258,6 +255,7 @@ }, "outputs": [], "source": [ + "#| export\n", "class Optimizer:\n", " \"\"\"\n", " Base class for all optimizers.\n", @@ -332,7 +330,7 @@ }, { "cell_type": "markdown", - "id": "82015c9d", + "id": "0ded4383", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -348,7 +346,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c0f57a08", + "id": "25d61648", "metadata": { "nbgrader": { "grade": true, @@ -401,7 +399,7 @@ }, { "cell_type": "markdown", - "id": "7d9b8ceb", + "id": "bf5adabc", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -473,7 +471,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ae4679bb", + "id": "12f0f4b6", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -484,6 +482,7 @@ }, "outputs": [], "source": [ + "#| export\n", "class SGD(Optimizer):\n", " \"\"\"\n", " Stochastic Gradient Descent with momentum.\n", @@ -576,7 +575,7 @@ }, { "cell_type": "markdown", - "id": "ced264d8", + "id": "815d0bab", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -592,7 +591,7 @@ { "cell_type": "code", "execution_count": null, - "id": "68ae4ccf", + "id": "c01ebc69", "metadata": { "nbgrader": { "grade": true, @@ -659,7 +658,7 @@ }, { "cell_type": "markdown", - "id": "480929e4", + "id": "c656b1b4", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -733,7 +732,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2d7e339f", + "id": "b545ed16", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -744,6 +743,7 @@ }, "outputs": [], "source": [ + "#| export\n", "class Adam(Optimizer):\n", " \"\"\"\n", " Adam optimizer with adaptive learning rates.\n", @@ -853,7 +853,7 @@ }, { "cell_type": "markdown", - "id": "6f114c5b", + "id": "b688bced", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -869,7 +869,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7f64abcc", + "id": "61fa7116", "metadata": { "nbgrader": { "grade": true, @@ -945,7 +945,7 @@ }, { "cell_type": "markdown", - "id": "16ccfeaa", + "id": "7cb028b2", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1019,7 +1019,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23c16f99", + "id": "277056cc", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -1030,6 +1030,7 @@ }, "outputs": [], "source": [ + "#| export\n", "class AdamW(Optimizer):\n", " \"\"\"\n", " AdamW optimizer with decoupled weight decay.\n", @@ -1133,7 +1134,7 @@ }, { "cell_type": "markdown", - "id": "0269f86a", + "id": "d59b1b2b", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1149,7 +1150,7 @@ { "cell_type": "code", "execution_count": null, - "id": "016d7b36", + "id": "619464ee", "metadata": { "nbgrader": { "grade": true, @@ -1224,7 +1225,7 @@ }, { "cell_type": "markdown", - "id": "295d5ee6", + "id": "29f5ad7b", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 2 @@ -1251,7 +1252,7 @@ }, { "cell_type": "markdown", - "id": "47d676c3", + "id": "9dd160f5", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1297,7 +1298,7 @@ { "cell_type": "code", "execution_count": null, - "id": "67290db6", + "id": "ab882d12", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -1355,7 +1356,7 @@ { "cell_type": "code", "execution_count": null, - "id": "21136a44", + "id": "abac74aa", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -1434,7 +1435,7 @@ }, { "cell_type": "markdown", - "id": "b171c224", + "id": "146f209d", "metadata": { "lines_to_next_cell": 1 }, @@ -1456,7 +1457,7 @@ { "cell_type": "code", "execution_count": null, - "id": "46ae99ae", + "id": "1726f746", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -1607,7 +1608,7 @@ { "cell_type": "code", "execution_count": null, - "id": "896f4c69", + "id": "7328ac69", "metadata": {}, "outputs": [], "source": [ @@ -1618,7 +1619,7 @@ }, { "cell_type": "markdown", - "id": "35b39338", + "id": "c662a5f7", "metadata": { "cell_marker": "\"\"\"" }, diff --git a/modules/source/07_training/training_dev.ipynb b/modules/source/07_training/training_dev.ipynb index 06cc7480..9fe44d21 100644 --- a/modules/source/07_training/training_dev.ipynb +++ b/modules/source/07_training/training_dev.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "765eea82", + "id": "78521710", "metadata": { "cell_marker": "\"\"\"" }, @@ -52,7 +52,7 @@ { "cell_type": "code", "execution_count": null, - "id": "38b1402a", + "id": "d912bff5", "metadata": { "nbgrader": { "grade": false, @@ -75,22 +75,15 @@ "import os\n", "\n", "# Import dependencies from other modules\n", - "sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))\n", - "from tensor_dev import Tensor\n", - "\n", - "sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))\n", - "from layers_dev import Linear\n", - "\n", - "sys.path.append(os.path.join(os.path.dirname(__file__), '..', '04_losses'))\n", - "from losses_dev import MSELoss, CrossEntropyLoss\n", - "\n", - "sys.path.append(os.path.join(os.path.dirname(__file__), '..', '06_optimizers'))\n", - "from optimizers_dev import SGD, AdamW" + "from tinytorch.core.tensor import Tensor\n", + "from tinytorch.core.layers import Linear\n", + "from tinytorch.core.losses import MSELoss, CrossEntropyLoss\n", + "from tinytorch.core.optimizers import SGD, AdamW" ] }, { "cell_type": "markdown", - "id": "89550fb8", + "id": "2f4fc27e", "metadata": { "cell_marker": "\"\"\"" }, @@ -119,7 +112,7 @@ }, { "cell_type": "markdown", - "id": "d0b48f7a", + "id": "4fa19758", "metadata": { "cell_marker": "\"\"\"" }, @@ -166,7 +159,7 @@ }, { "cell_type": "markdown", - "id": "4ed8a995", + "id": "8599a0f1", "metadata": { "cell_marker": "\"\"\"" }, @@ -180,7 +173,7 @@ }, { "cell_type": "markdown", - "id": "ebfa93fc", + "id": "ed5a85db", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -221,7 +214,7 @@ { "cell_type": "code", "execution_count": null, - "id": "347b09da", + "id": "9dac2b34", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -233,6 +226,7 @@ }, "outputs": [], "source": [ + "#| export\n", "class CosineSchedule:\n", " \"\"\"\n", " Cosine annealing learning rate schedule.\n", @@ -274,7 +268,7 @@ }, { "cell_type": "markdown", - "id": "c1db4e03", + "id": "c146074f", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -290,7 +284,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c27f6878", + "id": "ee33397e", "metadata": { "nbgrader": { "grade": true, @@ -334,7 +328,7 @@ }, { "cell_type": "markdown", - "id": "81fc482c", + "id": "da8efa9f", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -380,7 +374,7 @@ { "cell_type": "code", "execution_count": null, - "id": "db99efd3", + "id": "29a5573c", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -457,7 +451,7 @@ }, { "cell_type": "markdown", - "id": "3b0b188d", + "id": "7c1510f3", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -473,7 +467,7 @@ { "cell_type": "code", "execution_count": null, - "id": "91bf937f", + "id": "754c9cd5", "metadata": { "nbgrader": { "grade": true, @@ -540,7 +534,7 @@ }, { "cell_type": "markdown", - "id": "dde7833e", + "id": "a827fb93", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -597,7 +591,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fa8339e1", + "id": "63354dd4", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -609,6 +603,7 @@ }, "outputs": [], "source": [ + "#| export\n", "class Trainer:\n", " \"\"\"\n", " Complete training orchestrator for neural networks.\n", @@ -875,7 +870,7 @@ }, { "cell_type": "markdown", - "id": "529dfcf5", + "id": "9266bc60", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -891,7 +886,7 @@ { "cell_type": "code", "execution_count": null, - "id": "03510440", + "id": "8ce52aba", "metadata": { "nbgrader": { "grade": true, @@ -972,7 +967,7 @@ }, { "cell_type": "markdown", - "id": "905180bd", + "id": "7ad86345", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 2 @@ -985,7 +980,7 @@ }, { "cell_type": "markdown", - "id": "3c631938", + "id": "9953bcd4", "metadata": { "lines_to_next_cell": 1 }, @@ -1009,7 +1004,7 @@ }, { "cell_type": "markdown", - "id": "8b65c5ab", + "id": "2eab95b6", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1023,7 +1018,7 @@ { "cell_type": "code", "execution_count": null, - "id": "29eea538", + "id": "0580d838", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -1151,7 +1146,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ae1bc4b9", + "id": "62eadf89", "metadata": { "nbgrader": { "grade": false, @@ -1169,7 +1164,7 @@ }, { "cell_type": "markdown", - "id": "ad8ae396", + "id": "ebe885e5", "metadata": { "cell_marker": "\"\"\"" }, diff --git a/tinytorch/_modidx.py b/tinytorch/_modidx.py new file mode 100644 index 00000000..8ad72781 --- /dev/null +++ b/tinytorch/_modidx.py @@ -0,0 +1,204 @@ +# ╔═══════════════════════════════════════════════════════════════════════════════╗ +# ║ 🚨 CRITICAL WARNING 🚨 ║ +# ║ AUTOGENERATED! DO NOT EDIT! ║ +# ║ ║ +# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ +# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ +# ║ ║ +# ║ ✅ TO EDIT: modules/source/[unknown]/[unknown]_dev.py ║ +# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ +# ║ ║ +# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ +# ║ Editing it directly may break module functionality and training. ║ +# ║ ║ +# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ +# ║ happens! The tinytorch/ directory is just the compiled output. ║ +# ╚═══════════════════════════════════════════════════════════════════════════════╝ +# Autogenerated by nbdev + +d = { 'settings': { 'branch': 'main', + 'doc_baseurl': '/TinyTorch/', + 'doc_host': 'https://tinytorch.github.io', + 'git_url': 'https://github.com/tinytorch/TinyTorch/', + 'lib_path': 'tinytorch'}, + 'syms': { 'tinytorch.core.activations': { 'tinytorch.core.activations.GELU': ( '02_activations/activations_dev.html#gelu', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.GELU.__call__': ( '02_activations/activations_dev.html#gelu.__call__', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.GELU.backward': ( '02_activations/activations_dev.html#gelu.backward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.GELU.forward': ( '02_activations/activations_dev.html#gelu.forward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.ReLU': ( '02_activations/activations_dev.html#relu', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.ReLU.__call__': ( '02_activations/activations_dev.html#relu.__call__', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.ReLU.backward': ( '02_activations/activations_dev.html#relu.backward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.ReLU.forward': ( '02_activations/activations_dev.html#relu.forward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Sigmoid': ( '02_activations/activations_dev.html#sigmoid', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Sigmoid.__call__': ( '02_activations/activations_dev.html#sigmoid.__call__', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Sigmoid.backward': ( '02_activations/activations_dev.html#sigmoid.backward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Sigmoid.forward': ( '02_activations/activations_dev.html#sigmoid.forward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Softmax': ( '02_activations/activations_dev.html#softmax', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Softmax.__call__': ( '02_activations/activations_dev.html#softmax.__call__', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Softmax.backward': ( '02_activations/activations_dev.html#softmax.backward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Softmax.forward': ( '02_activations/activations_dev.html#softmax.forward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Tanh': ( '02_activations/activations_dev.html#tanh', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Tanh.__call__': ( '02_activations/activations_dev.html#tanh.__call__', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Tanh.backward': ( '02_activations/activations_dev.html#tanh.backward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Tanh.forward': ( '02_activations/activations_dev.html#tanh.forward', + 'tinytorch/core/activations.py')}, + 'tinytorch.core.autograd': {}, + 'tinytorch.core.layers': { 'tinytorch.core.layers.Dropout': ('03_layers/layers_dev.html#dropout', 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Dropout.__call__': ( '03_layers/layers_dev.html#dropout.__call__', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Dropout.__init__': ( '03_layers/layers_dev.html#dropout.__init__', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Dropout.__repr__': ( '03_layers/layers_dev.html#dropout.__repr__', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Dropout.forward': ( '03_layers/layers_dev.html#dropout.forward', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Dropout.parameters': ( '03_layers/layers_dev.html#dropout.parameters', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Linear': ('03_layers/layers_dev.html#linear', 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Linear.__call__': ( '03_layers/layers_dev.html#linear.__call__', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Linear.__init__': ( '03_layers/layers_dev.html#linear.__init__', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Linear.__repr__': ( '03_layers/layers_dev.html#linear.__repr__', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Linear.forward': ( '03_layers/layers_dev.html#linear.forward', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Linear.parameters': ( '03_layers/layers_dev.html#linear.parameters', + 'tinytorch/core/layers.py')}, + 'tinytorch.core.losses': { 'tinytorch.core.losses.BinaryCrossEntropyLoss': ( '04_losses/losses_dev.html#binarycrossentropyloss', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.BinaryCrossEntropyLoss.__call__': ( '04_losses/losses_dev.html#binarycrossentropyloss.__call__', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.BinaryCrossEntropyLoss.__init__': ( '04_losses/losses_dev.html#binarycrossentropyloss.__init__', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.BinaryCrossEntropyLoss.backward': ( '04_losses/losses_dev.html#binarycrossentropyloss.backward', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.BinaryCrossEntropyLoss.forward': ( '04_losses/losses_dev.html#binarycrossentropyloss.forward', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.CrossEntropyLoss': ( '04_losses/losses_dev.html#crossentropyloss', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.CrossEntropyLoss.__call__': ( '04_losses/losses_dev.html#crossentropyloss.__call__', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.CrossEntropyLoss.__init__': ( '04_losses/losses_dev.html#crossentropyloss.__init__', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.CrossEntropyLoss.backward': ( '04_losses/losses_dev.html#crossentropyloss.backward', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.CrossEntropyLoss.forward': ( '04_losses/losses_dev.html#crossentropyloss.forward', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.MSELoss': ('04_losses/losses_dev.html#mseloss', 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.MSELoss.__call__': ( '04_losses/losses_dev.html#mseloss.__call__', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.MSELoss.__init__': ( '04_losses/losses_dev.html#mseloss.__init__', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.MSELoss.backward': ( '04_losses/losses_dev.html#mseloss.backward', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.MSELoss.forward': ( '04_losses/losses_dev.html#mseloss.forward', + 'tinytorch/core/losses.py'), + 'tinytorch.core.losses.import_previous_module': ( '04_losses/losses_dev.html#import_previous_module', + 'tinytorch/core/losses.py')}, + 'tinytorch.core.optimizers': { 'tinytorch.core.optimizers.Adam': ( '06_optimizers/optimizers_dev.html#adam', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.Adam.__init__': ( '06_optimizers/optimizers_dev.html#adam.__init__', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.Adam.step': ( '06_optimizers/optimizers_dev.html#adam.step', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.AdamW': ( '06_optimizers/optimizers_dev.html#adamw', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.AdamW.__init__': ( '06_optimizers/optimizers_dev.html#adamw.__init__', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.AdamW.step': ( '06_optimizers/optimizers_dev.html#adamw.step', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.Optimizer': ( '06_optimizers/optimizers_dev.html#optimizer', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.Optimizer.__init__': ( '06_optimizers/optimizers_dev.html#optimizer.__init__', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.Optimizer.step': ( '06_optimizers/optimizers_dev.html#optimizer.step', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.Optimizer.zero_grad': ( '06_optimizers/optimizers_dev.html#optimizer.zero_grad', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.SGD': ( '06_optimizers/optimizers_dev.html#sgd', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.SGD.__init__': ( '06_optimizers/optimizers_dev.html#sgd.__init__', + 'tinytorch/core/optimizers.py'), + 'tinytorch.core.optimizers.SGD.step': ( '06_optimizers/optimizers_dev.html#sgd.step', + 'tinytorch/core/optimizers.py')}, + 'tinytorch.core.tensor': { 'tinytorch.core.tensor.Tensor': ('01_tensor/tensor_dev.html#tensor', 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__add__': ( '01_tensor/tensor_dev.html#tensor.__add__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__init__': ( '01_tensor/tensor_dev.html#tensor.__init__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__mul__': ( '01_tensor/tensor_dev.html#tensor.__mul__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__repr__': ( '01_tensor/tensor_dev.html#tensor.__repr__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__str__': ( '01_tensor/tensor_dev.html#tensor.__str__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__sub__': ( '01_tensor/tensor_dev.html#tensor.__sub__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__truediv__': ( '01_tensor/tensor_dev.html#tensor.__truediv__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.backward': ( '01_tensor/tensor_dev.html#tensor.backward', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.matmul': ( '01_tensor/tensor_dev.html#tensor.matmul', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.max': ( '01_tensor/tensor_dev.html#tensor.max', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.mean': ( '01_tensor/tensor_dev.html#tensor.mean', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.numpy': ( '01_tensor/tensor_dev.html#tensor.numpy', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.reshape': ( '01_tensor/tensor_dev.html#tensor.reshape', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.sum': ( '01_tensor/tensor_dev.html#tensor.sum', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.transpose': ( '01_tensor/tensor_dev.html#tensor.transpose', + 'tinytorch/core/tensor.py')}, + 'tinytorch.core.training': { 'tinytorch.core.training.CosineSchedule': ( '07_training/training_dev.html#cosineschedule', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.CosineSchedule.__init__': ( '07_training/training_dev.html#cosineschedule.__init__', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.CosineSchedule.get_lr': ( '07_training/training_dev.html#cosineschedule.get_lr', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer': ( '07_training/training_dev.html#trainer', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer.__init__': ( '07_training/training_dev.html#trainer.__init__', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer._get_model_state': ( '07_training/training_dev.html#trainer._get_model_state', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer._get_optimizer_state': ( '07_training/training_dev.html#trainer._get_optimizer_state', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer._get_scheduler_state': ( '07_training/training_dev.html#trainer._get_scheduler_state', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer._set_model_state': ( '07_training/training_dev.html#trainer._set_model_state', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer._set_optimizer_state': ( '07_training/training_dev.html#trainer._set_optimizer_state', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer._set_scheduler_state': ( '07_training/training_dev.html#trainer._set_scheduler_state', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer.evaluate': ( '07_training/training_dev.html#trainer.evaluate', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer.load_checkpoint': ( '07_training/training_dev.html#trainer.load_checkpoint', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer.save_checkpoint': ( '07_training/training_dev.html#trainer.save_checkpoint', + 'tinytorch/core/training.py'), + 'tinytorch.core.training.Trainer.train_epoch': ( '07_training/training_dev.html#trainer.train_epoch', + 'tinytorch/core/training.py')}}} diff --git a/tinytorch/applications/tinygpt.py b/tinytorch/applications/tinygpt.py deleted file mode 100644 index 80dabc9a..00000000 --- a/tinytorch/applications/tinygpt.py +++ /dev/null @@ -1,8 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/20_capstone/capstone_dev.ipynb. - -# %% auto 0 -__all__ = [] - -# %% ../../modules/source/20_capstone/capstone_dev.ipynb 2 -#| default_exp applications.tinygpt -#| export diff --git a/tinytorch/benchmarking/benchmark.py b/tinytorch/benchmarking/benchmark.py deleted file mode 100644 index 59888381..00000000 --- a/tinytorch/benchmarking/benchmark.py +++ /dev/null @@ -1,8 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/19_benchmarking/benchmarking_dev.ipynb. - -# %% auto 0 -__all__ = [] - -# %% ../../modules/source/19_benchmarking/benchmarking_dev.ipynb 0 -#| default_exp benchmarking.benchmark -#| export diff --git a/tinytorch/core/activations.py b/tinytorch/core/activations.py index e9c19589..fff7d636 100644 --- a/tinytorch/core/activations.py +++ b/tinytorch/core/activations.py @@ -1,5 +1,19 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/02_activations/activations_dev.ipynb. - +# ╔═══════════════════════════════════════════════════════════════════════════════╗ +# ║ 🚨 CRITICAL WARNING 🚨 ║ +# ║ AUTOGENERATED! DO NOT EDIT! ║ +# ║ ║ +# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ +# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ +# ║ ║ +# ║ ✅ TO EDIT: modules/source/03_activations/activations_dev.py ║ +# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ +# ║ ║ +# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ +# ║ Editing it directly may break module functionality and training. ║ +# ║ ║ +# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ +# ║ happens! The tinytorch/ directory is just the compiled output. ║ +# ╚═══════════════════════════════════════════════════════════════════════════════╝ # %% auto 0 __all__ = ['Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax'] diff --git a/tinytorch/core/attention.py b/tinytorch/core/attention.py deleted file mode 100644 index 7d6df426..00000000 --- a/tinytorch/core/attention.py +++ /dev/null @@ -1,8 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/12_attention/attention_dev.ipynb. - -# %% auto 0 -__all__ = [] - -# %% ../../modules/source/12_attention/attention_dev.ipynb 0 -#| default_exp core.attention -#| export diff --git a/tinytorch/core/layers.py b/tinytorch/core/layers.py index 7ea028c4..f2729cd9 100644 --- a/tinytorch/core/layers.py +++ b/tinytorch/core/layers.py @@ -1,5 +1,19 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/03_layers/layers_dev.ipynb. - +# ╔═══════════════════════════════════════════════════════════════════════════════╗ +# ║ 🚨 CRITICAL WARNING 🚨 ║ +# ║ AUTOGENERATED! DO NOT EDIT! ║ +# ║ ║ +# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ +# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ +# ║ ║ +# ║ ✅ TO EDIT: modules/source/04_layers/layers_dev.py ║ +# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ +# ║ ║ +# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ +# ║ Editing it directly may break module functionality and training. ║ +# ║ ║ +# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ +# ║ happens! The tinytorch/ directory is just the compiled output. ║ +# ╚═══════════════════════════════════════════════════════════════════════════════╝ # %% auto 0 __all__ = ['Linear', 'Dropout'] @@ -194,6 +208,10 @@ class Dropout: return Tensor(output_data) ### END SOLUTION + def __call__(self, x, training=True): + """Allows the layer to be called like a function.""" + return self.forward(x, training) + def parameters(self): """Dropout has no parameters.""" return [] diff --git a/tinytorch/core/losses.py b/tinytorch/core/losses.py index 68c47944..348bed68 100644 --- a/tinytorch/core/losses.py +++ b/tinytorch/core/losses.py @@ -1,5 +1,19 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_losses/losses_dev.ipynb. - +# ╔═══════════════════════════════════════════════════════════════════════════════╗ +# ║ 🚨 CRITICAL WARNING 🚨 ║ +# ║ AUTOGENERATED! DO NOT EDIT! ║ +# ║ ║ +# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ +# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ +# ║ ║ +# ║ ✅ TO EDIT: modules/source/XX_losses/losses_dev.py ║ +# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ +# ║ ║ +# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ +# ║ Editing it directly may break module functionality and training. ║ +# ║ ║ +# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ +# ║ happens! The tinytorch/ directory is just the compiled output. ║ +# ╚═══════════════════════════════════════════════════════════════════════════════╝ # %% auto 0 __all__ = ['import_previous_module', 'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss'] diff --git a/tinytorch/core/optimizers.py b/tinytorch/core/optimizers.py index eec75d99..3698541c 100644 --- a/tinytorch/core/optimizers.py +++ b/tinytorch/core/optimizers.py @@ -1,5 +1,19 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/06_optimizers/optimizers_dev.ipynb. - +# ╔═══════════════════════════════════════════════════════════════════════════════╗ +# ║ 🚨 CRITICAL WARNING 🚨 ║ +# ║ AUTOGENERATED! DO NOT EDIT! ║ +# ║ ║ +# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ +# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ +# ║ ║ +# ║ ✅ TO EDIT: modules/source/10_optimizers/optimizers_dev.py ║ +# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ +# ║ ║ +# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ +# ║ Editing it directly may break module functionality and training. ║ +# ║ ║ +# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ +# ║ happens! The tinytorch/ directory is just the compiled output. ║ +# ╚═══════════════════════════════════════════════════════════════════════════════╝ # %% auto 0 __all__ = ['Optimizer', 'SGD', 'Adam', 'AdamW'] @@ -7,10 +21,10 @@ __all__ = ['Optimizer', 'SGD', 'Adam', 'AdamW'] import numpy as np from typing import List, Union, Optional, Dict, Any -# Import Tensor from Module 01 -from tinytorch.core.tensor import Tensor +# Import Tensor from Module 01 (now with gradient support from Module 05) +from .tensor import Tensor -# %% Base Optimizer class +# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 5 class Optimizer: """ Base class for all optimizers. @@ -37,6 +51,7 @@ class Optimizer: HINT: Check that each parameter has requires_grad=True """ + ### BEGIN SOLUTION # Validate and store parameters if not isinstance(params, list): params = list(params) @@ -50,6 +65,7 @@ class Optimizer: self.params = params self.step_count = 0 # For algorithms that need step counting + ### END SOLUTION def zero_grad(self): """ @@ -67,8 +83,10 @@ class Optimizer: WHY: Gradients accumulate by default, so we need to clear them between batches """ + ### BEGIN SOLUTION for param in self.params: param.grad = None + ### END SOLUTION def step(self): """ @@ -78,9 +96,7 @@ class Optimizer: """ raise NotImplementedError("Subclasses must implement step()") - - -# %% SGD Optimizer +# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 9 class SGD(Optimizer): """ Stochastic Gradient Descent with momentum. @@ -108,6 +124,7 @@ class SGD(Optimizer): - Momentum buffers should be initialized as None - They'll be created lazily on first step """ + ### BEGIN SOLUTION super().__init__(params) self.lr = lr @@ -116,6 +133,7 @@ class SGD(Optimizer): # Initialize momentum buffers (created lazily) self.momentum_buffers = [None for _ in self.params] + ### END SOLUTION def step(self): """ @@ -139,6 +157,7 @@ class SGD(Optimizer): - Initialize momentum buffers on first use - Use in-place operations to save memory """ + ### BEGIN SOLUTION for i, param in enumerate(self.params): if param.grad is None: continue @@ -165,10 +184,9 @@ class SGD(Optimizer): # Increment step counter self.step_count += 1 + ### END SOLUTION - - -# %% Adam Optimizer +# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 13 class Adam(Optimizer): """ Adam optimizer with adaptive learning rates. @@ -198,6 +216,7 @@ class Adam(Optimizer): EXAMPLE: >>> optimizer = Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999)) """ + ### BEGIN SOLUTION super().__init__(params) self.lr = lr @@ -208,6 +227,7 @@ class Adam(Optimizer): # Initialize moment buffers (created lazily) self.m_buffers = [None for _ in self.params] # First moment (mean) self.v_buffers = [None for _ in self.params] # Second moment (variance) + ### END SOLUTION def step(self): """ @@ -235,6 +255,7 @@ class Adam(Optimizer): - Use step_count for bias correction - Square gradients element-wise for second moment """ + ### BEGIN SOLUTION # Increment step counter first (needed for bias correction) self.step_count += 1 @@ -270,10 +291,9 @@ class Adam(Optimizer): # Update parameter param.data = param.data - self.lr * m_hat / (np.sqrt(v_hat) + self.eps) + ### END SOLUTION - - -# %% AdamW Optimizer +# %% ../../modules/source/06_optimizers/optimizers_dev.ipynb 17 class AdamW(Optimizer): """ AdamW optimizer with decoupled weight decay. @@ -301,6 +321,7 @@ class AdamW(Optimizer): EXAMPLE: >>> optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=0.01) """ + ### BEGIN SOLUTION super().__init__(params) self.lr = lr @@ -311,6 +332,7 @@ class AdamW(Optimizer): # Initialize moment buffers (same as Adam) self.m_buffers = [None for _ in self.params] self.v_buffers = [None for _ in self.params] + ### END SOLUTION def step(self): """ @@ -336,6 +358,7 @@ class AdamW(Optimizer): HINT: Apply weight decay after gradient update for proper decoupling """ + ### BEGIN SOLUTION # Increment step counter first self.step_count += 1 @@ -369,4 +392,4 @@ class AdamW(Optimizer): # Apply decoupled weight decay if self.weight_decay != 0: param.data = param.data * (1 - self.lr * self.weight_decay) - + ### END SOLUTION diff --git a/tinytorch/core/spatial.py b/tinytorch/core/spatial.py deleted file mode 100644 index faa47403..00000000 --- a/tinytorch/core/spatial.py +++ /dev/null @@ -1,64 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/09_spatial/spatial_dev.ipynb. - -# %% auto 0 -__all__ = [] - -# %% ../../modules/source/09_spatial/spatial_dev.ipynb 1 -import numpy as np -import sys -import os -import time - -# Import dependencies from other modules -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor')) -from tensor_dev import Tensor - -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers')) -from layers_dev import Module - -# Note: Keeping simplified implementations for reference during development -class _SimplifiedTensor: - """Simplified tensor for spatial operations development.""" - - def __init__(self, data, requires_grad=False): - self.data = np.array(data, dtype=np.float32) - self.shape = self.data.shape - self.requires_grad = requires_grad - self.grad = None - - def __repr__(self): - return f"Tensor(shape={self.shape}, data=\n{self.data})" - - def __add__(self, other): - if isinstance(other, Tensor): - return Tensor(self.data + other.data) - return Tensor(self.data + other) - - def __mul__(self, other): - if isinstance(other, Tensor): - return Tensor(self.data * other.data) - return Tensor(self.data * other) - - def sum(self): - return Tensor(np.sum(self.data)) - - def mean(self): - return Tensor(np.mean(self.data)) - - # Create a simple Module base class for inheritance - class Module: - """Simple base class for neural network modules.""" - def __init__(self): - pass - - def forward(self, x): - raise NotImplementedError("Subclasses must implement forward()") - - def parameters(self): - """Return list of parameters for this module.""" - params = [] - for attr_name in dir(self): - attr = getattr(self, attr_name) - if hasattr(attr, 'data') and hasattr(attr, 'requires_grad'): - params.append(attr) - return params diff --git a/tinytorch/core/tensor.py b/tinytorch/core/tensor.py index 22b11dad..fb786066 100644 --- a/tinytorch/core/tensor.py +++ b/tinytorch/core/tensor.py @@ -1,5 +1,19 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/01_tensor/tensor_dev.ipynb. - +# ╔═══════════════════════════════════════════════════════════════════════════════╗ +# ║ 🚨 CRITICAL WARNING 🚨 ║ +# ║ AUTOGENERATED! DO NOT EDIT! ║ +# ║ ║ +# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ +# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ +# ║ ║ +# ║ ✅ TO EDIT: modules/source/02_tensor/tensor_dev.py ║ +# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ +# ║ ║ +# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ +# ║ Editing it directly may break module functionality and training. ║ +# ║ ║ +# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ +# ║ happens! The tinytorch/ directory is just the compiled output. ║ +# ╚═══════════════════════════════════════════════════════════════════════════════╝ # %% auto 0 __all__ = ['Tensor'] diff --git a/tinytorch/core/training.py b/tinytorch/core/training.py index 54947841..e4082b8f 100644 --- a/tinytorch/core/training.py +++ b/tinytorch/core/training.py @@ -1,7 +1,21 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/07_training/training_dev.ipynb. - +# ╔═══════════════════════════════════════════════════════════════════════════════╗ +# ║ 🚨 CRITICAL WARNING 🚨 ║ +# ║ AUTOGENERATED! DO NOT EDIT! ║ +# ║ ║ +# ║ This file is AUTOMATICALLY GENERATED from source modules. ║ +# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ +# ║ ║ +# ║ ✅ TO EDIT: modules/source/11_training/training_dev.py ║ +# ║ ✅ TO EXPORT: Run 'tito module complete ' ║ +# ║ ║ +# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ +# ║ Editing it directly may break module functionality and training. ║ +# ║ ║ +# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ +# ║ happens! The tinytorch/ directory is just the compiled output. ║ +# ╚═══════════════════════════════════════════════════════════════════════════════╝ # %% auto 0 -__all__ = [] +__all__ = ['CosineSchedule', 'Trainer'] # %% ../../modules/source/07_training/training_dev.ipynb 1 import numpy as np @@ -13,14 +27,310 @@ import sys import os # Import dependencies from other modules -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor')) -from tensor_dev import Tensor +from .tensor import Tensor +from .layers import Linear +from .losses import MSELoss, CrossEntropyLoss +from .optimizers import SGD, AdamW -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers')) -from layers_dev import Linear +# %% ../../modules/source/07_training/training_dev.ipynb 6 +class CosineSchedule: + """ + Cosine annealing learning rate schedule. -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '04_losses')) -from losses_dev import MSELoss, CrossEntropyLoss + Starts at max_lr, decreases following a cosine curve to min_lr over T epochs. + This provides aggressive learning initially, then fine-tuning at the end. -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '06_optimizers')) -from optimizers_dev import SGD, AdamW + TODO: Implement cosine annealing schedule + + APPROACH: + 1. Store max_lr, min_lr, and total_epochs + 2. In get_lr(), compute cosine factor: (1 + cos(π * epoch / total_epochs)) / 2 + 3. Interpolate: min_lr + (max_lr - min_lr) * cosine_factor + + EXAMPLE: + >>> schedule = CosineSchedule(max_lr=0.1, min_lr=0.01, total_epochs=100) + >>> print(schedule.get_lr(0)) # Start: 0.1 + >>> print(schedule.get_lr(50)) # Middle: ~0.055 + >>> print(schedule.get_lr(100)) # End: 0.01 + + HINT: Use np.cos() and np.pi for the cosine calculation + """ + ### BEGIN SOLUTION + def __init__(self, max_lr: float = 0.1, min_lr: float = 0.01, total_epochs: int = 100): + self.max_lr = max_lr + self.min_lr = min_lr + self.total_epochs = total_epochs + + def get_lr(self, epoch: int) -> float: + """Get learning rate for current epoch.""" + if epoch >= self.total_epochs: + return self.min_lr + + # Cosine annealing formula + cosine_factor = (1 + np.cos(np.pi * epoch / self.total_epochs)) / 2 + return self.min_lr + (self.max_lr - self.min_lr) * cosine_factor + ### END SOLUTION + +# %% ../../modules/source/07_training/training_dev.ipynb 14 +class Trainer: + """ + Complete training orchestrator for neural networks. + + Handles the full training lifecycle: forward pass, loss computation, + backward pass, optimization, scheduling, checkpointing, and evaluation. + + This is the central class that brings together all the components + you've built in previous modules. + + TODO: Implement complete Trainer class + + APPROACH: + 1. Store model, optimizer, loss function, and optional scheduler + 2. train_epoch(): Loop through data, compute loss, update parameters + 3. evaluate(): Similar loop but without gradient updates + 4. save/load_checkpoint(): Persist training state for resumption + + DESIGN PATTERNS: + - Context managers for train/eval modes + - Gradient accumulation for effective large batch sizes + - Progress tracking for monitoring + - Flexible scheduling integration + """ + ### BEGIN SOLUTION + def __init__(self, model, optimizer, loss_fn, scheduler=None, grad_clip_norm=None): + """ + Initialize trainer with model and training components. + + Args: + model: Neural network to train + optimizer: Parameter update strategy (SGD, Adam, etc.) + loss_fn: Loss function (CrossEntropy, MSE, etc.) + scheduler: Optional learning rate scheduler + grad_clip_norm: Optional gradient clipping threshold + """ + self.model = model + self.optimizer = optimizer + self.loss_fn = loss_fn + self.scheduler = scheduler + self.grad_clip_norm = grad_clip_norm + + # Training state + self.epoch = 0 + self.step = 0 + self.training_mode = True + + # History tracking + self.history = { + 'train_loss': [], + 'eval_loss': [], + 'learning_rates': [] + } + + def train_epoch(self, dataloader, accumulation_steps=1): + """ + Train for one epoch through the dataset. + + Args: + dataloader: Iterable yielding (inputs, targets) batches + accumulation_steps: Number of batches to accumulate before update + + Returns: + Average loss for the epoch + """ + self.model.training = True + self.training_mode = True + + total_loss = 0.0 + num_batches = 0 + accumulated_loss = 0.0 + + for batch_idx, (inputs, targets) in enumerate(dataloader): + # Forward pass + outputs = self.model.forward(inputs) + loss = self.loss_fn.forward(outputs, targets) + + # Scale loss for accumulation + scaled_loss = loss.data / accumulation_steps + accumulated_loss += scaled_loss + + # Backward pass + if hasattr(loss, 'backward'): + loss.backward() + + # Update parameters every accumulation_steps + if (batch_idx + 1) % accumulation_steps == 0: + # Gradient clipping + if self.grad_clip_norm is not None: + params = [] + if hasattr(self.model, 'parameters'): + params = self.model.parameters() + clip_grad_norm(params, self.grad_clip_norm) + + # Optimizer step + self.optimizer.step() + self.optimizer.zero_grad() + + total_loss += accumulated_loss + accumulated_loss = 0.0 + num_batches += 1 + self.step += 1 + + # Handle remaining accumulated gradients + if accumulated_loss > 0: + if self.grad_clip_norm is not None: + params = [] + if hasattr(self.model, 'parameters'): + params = self.model.parameters() + clip_grad_norm(params, self.grad_clip_norm) + + self.optimizer.step() + self.optimizer.zero_grad() + total_loss += accumulated_loss + num_batches += 1 + + avg_loss = total_loss / max(num_batches, 1) + self.history['train_loss'].append(avg_loss) + + # Update scheduler + if self.scheduler is not None: + current_lr = self.scheduler.get_lr(self.epoch) + # Update optimizer learning rate + if hasattr(self.optimizer, 'lr'): + self.optimizer.lr = current_lr + self.history['learning_rates'].append(current_lr) + + self.epoch += 1 + return avg_loss + + def evaluate(self, dataloader): + """ + Evaluate model on dataset without updating parameters. + + Args: + dataloader: Iterable yielding (inputs, targets) batches + + Returns: + Average loss and accuracy + """ + self.model.training = False + self.training_mode = False + + total_loss = 0.0 + correct = 0 + total = 0 + + for inputs, targets in dataloader: + # Forward pass only + outputs = self.model.forward(inputs) + loss = self.loss_fn.forward(outputs, targets) + + total_loss += loss.data + + # Calculate accuracy (for classification) + if hasattr(outputs, 'data') and hasattr(targets, 'data'): + if len(outputs.data.shape) > 1: # Multi-class + predictions = np.argmax(outputs.data, axis=1) + if len(targets.data.shape) == 1: # Integer targets + correct += np.sum(predictions == targets.data) + else: # One-hot targets + correct += np.sum(predictions == np.argmax(targets.data, axis=1)) + total += len(predictions) + + avg_loss = total_loss / len(dataloader) if len(dataloader) > 0 else 0.0 + accuracy = correct / total if total > 0 else 0.0 + + self.history['eval_loss'].append(avg_loss) + + return avg_loss, accuracy + + def save_checkpoint(self, path: str): + """ + Save complete training state for resumption. + + Args: + path: File path to save checkpoint + """ + checkpoint = { + 'epoch': self.epoch, + 'step': self.step, + 'model_state': self._get_model_state(), + 'optimizer_state': self._get_optimizer_state(), + 'scheduler_state': self._get_scheduler_state(), + 'history': self.history, + 'training_mode': self.training_mode + } + + Path(path).parent.mkdir(parents=True, exist_ok=True) + with open(path, 'wb') as f: + pickle.dump(checkpoint, f) + + def load_checkpoint(self, path: str): + """ + Load training state from checkpoint. + + Args: + path: File path to load checkpoint from + """ + with open(path, 'rb') as f: + checkpoint = pickle.load(f) + + self.epoch = checkpoint['epoch'] + self.step = checkpoint['step'] + self.history = checkpoint['history'] + self.training_mode = checkpoint['training_mode'] + + # Restore states (simplified for educational purposes) + if 'model_state' in checkpoint: + self._set_model_state(checkpoint['model_state']) + if 'optimizer_state' in checkpoint: + self._set_optimizer_state(checkpoint['optimizer_state']) + if 'scheduler_state' in checkpoint: + self._set_scheduler_state(checkpoint['scheduler_state']) + + def _get_model_state(self): + """Extract model parameters for checkpointing.""" + if hasattr(self.model, 'parameters'): + return {i: param.data.copy() for i, param in enumerate(self.model.parameters())} + return {} + + def _set_model_state(self, state): + """Restore model parameters from checkpoint.""" + if hasattr(self.model, 'parameters'): + for i, param in enumerate(self.model.parameters()): + if i in state: + param.data = state[i].copy() + + def _get_optimizer_state(self): + """Extract optimizer state for checkpointing.""" + state = {} + if hasattr(self.optimizer, 'lr'): + state['lr'] = self.optimizer.lr + if hasattr(self.optimizer, 'momentum_buffers'): + state['momentum_buffers'] = self.optimizer.momentum_buffers.copy() + return state + + def _set_optimizer_state(self, state): + """Restore optimizer state from checkpoint.""" + if 'lr' in state and hasattr(self.optimizer, 'lr'): + self.optimizer.lr = state['lr'] + if 'momentum_buffers' in state and hasattr(self.optimizer, 'momentum_buffers'): + self.optimizer.momentum_buffers = state['momentum_buffers'] + + def _get_scheduler_state(self): + """Extract scheduler state for checkpointing.""" + if self.scheduler is None: + return None + return { + 'max_lr': getattr(self.scheduler, 'max_lr', None), + 'min_lr': getattr(self.scheduler, 'min_lr', None), + 'total_epochs': getattr(self.scheduler, 'total_epochs', None) + } + + def _set_scheduler_state(self, state): + """Restore scheduler state from checkpoint.""" + if state is None or self.scheduler is None: + return + for key, value in state.items(): + if hasattr(self.scheduler, key): + setattr(self.scheduler, key, value) + ### END SOLUTION diff --git a/tinytorch/generation/kv_cache.py b/tinytorch/generation/kv_cache.py deleted file mode 100644 index ed623a38..00000000 --- a/tinytorch/generation/kv_cache.py +++ /dev/null @@ -1,57 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/14_kvcaching/kvcaching_dev.ipynb. - -# %% auto 0 -__all__ = ['Tensor'] - -# %% ../../modules/source/14_kvcaching/kvcaching_dev.ipynb 1 -import numpy as np -import time -from typing import Tuple, Optional, Dict, List -from dataclasses import dataclass - -# Import our TinyTorch components (Modules 01-13) -### BEGIN SOLUTION -# Note: In real implementation, these would import from previous modules -# For now, we'll implement minimal versions to focus on caching concepts - -class Tensor: - """Minimal Tensor for KV Caching focus (from Module 01)""" - def __init__(self, data, requires_grad=False): - self.data = np.array(data) - self.shape = self.data.shape - self.requires_grad = requires_grad - self.grad = None - - def __getitem__(self, key): - return Tensor(self.data[key]) - - def __setitem__(self, key, value): - if isinstance(value, Tensor): - self.data[key] = value.data - else: - self.data[key] = value - - def size(self, dim=None): - if dim is None: - return self.shape - return self.shape[dim] - - def view(self, *shape): - return Tensor(self.data.reshape(shape)) - - def transpose(self, dim0, dim1): - axes = list(range(len(self.shape))) - axes[dim0], axes[dim1] = axes[dim1], axes[dim0] - return Tensor(np.transpose(self.data, axes)) - - @staticmethod - def cat(tensors, dim=0): - """Concatenate tensors along dimension""" - arrays = [t.data for t in tensors] - return Tensor(np.concatenate(arrays, axis=dim)) - - @staticmethod - def zeros(*shape): - """Create zero tensor""" - return Tensor(np.zeros(shape)) -### END SOLUTION diff --git a/tinytorch/models/transformer.py b/tinytorch/models/transformer.py deleted file mode 100644 index 6e63d29b..00000000 --- a/tinytorch/models/transformer.py +++ /dev/null @@ -1,148 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/13_transformers/transformers_dev.ipynb. - -# %% auto 0 -__all__ = ['Tensor', 'Linear', 'MultiHeadAttention', 'Embedding', 'gelu'] - -# %% ../../modules/source/13_transformers/transformers_dev.ipynb 1 -import numpy as np -import math -from typing import Optional, List - -# Minimal implementations for development - in practice these import from previous modules -class Tensor: - """Minimal Tensor class for transformer development - imports from Module 01 in practice.""" - def __init__(self, data, requires_grad=False): - self.data = np.array(data) - self.shape = self.data.shape - self.size = self.data.size - self.requires_grad = requires_grad - self.grad = None - - def __add__(self, other): - if isinstance(other, Tensor): - return Tensor(self.data + other.data) - return Tensor(self.data + other) - - def __mul__(self, other): - if isinstance(other, Tensor): - return Tensor(self.data * other.data) - return Tensor(self.data * other) - - def matmul(self, other): - return Tensor(np.dot(self.data, other.data)) - - def sum(self, axis=None, keepdims=False): - return Tensor(self.data.sum(axis=axis, keepdims=keepdims)) - - def mean(self, axis=None, keepdims=False): - return Tensor(self.data.mean(axis=axis, keepdims=keepdims)) - - def reshape(self, *shape): - return Tensor(self.data.reshape(shape)) - - def __repr__(self): - return f"Tensor(data={self.data}, shape={self.shape})" - -class Linear: - """Minimal Linear layer - imports from Module 03 in practice.""" - def __init__(self, in_features, out_features, bias=True): - # Xavier/Glorot initialization - std = math.sqrt(2.0 / (in_features + out_features)) - self.weight = Tensor(np.random.normal(0, std, (in_features, out_features))) - self.bias = Tensor(np.zeros(out_features)) if bias else None - - def forward(self, x): - output = x.matmul(self.weight) - if self.bias is not None: - output = output + self.bias - return output - - def parameters(self): - params = [self.weight] - if self.bias is not None: - params.append(self.bias) - return params - -class MultiHeadAttention: - """Minimal MultiHeadAttention - imports from Module 12 in practice.""" - def __init__(self, embed_dim, num_heads): - assert embed_dim % num_heads == 0 - self.embed_dim = embed_dim - self.num_heads = num_heads - self.head_dim = embed_dim // num_heads - - self.q_proj = Linear(embed_dim, embed_dim) - self.k_proj = Linear(embed_dim, embed_dim) - self.v_proj = Linear(embed_dim, embed_dim) - self.out_proj = Linear(embed_dim, embed_dim) - - def forward(self, x, mask=None): - batch_size, seq_len, embed_dim = x.shape - - # Linear projections - Q = self.q_proj.forward(x) - K = self.k_proj.forward(x) - V = self.v_proj.forward(x) - - # Reshape for multi-head attention - Q = Q.reshape(batch_size, seq_len, self.num_heads, self.head_dim) - K = K.reshape(batch_size, seq_len, self.num_heads, self.head_dim) - V = V.reshape(batch_size, seq_len, self.num_heads, self.head_dim) - - # Transpose to (batch_size, num_heads, seq_len, head_dim) - Q = Tensor(np.transpose(Q.data, (0, 2, 1, 3))) - K = Tensor(np.transpose(K.data, (0, 2, 1, 3))) - V = Tensor(np.transpose(V.data, (0, 2, 1, 3))) - - # Scaled dot-product attention - scores = Tensor(np.matmul(Q.data, np.transpose(K.data, (0, 1, 3, 2)))) - scores = scores * (1.0 / math.sqrt(self.head_dim)) - - # Apply causal mask for autoregressive generation - if mask is not None: - scores = Tensor(scores.data + mask.data) - - # Softmax - attention_weights = self._softmax(scores) - - # Apply attention to values - out = Tensor(np.matmul(attention_weights.data, V.data)) - - # Transpose back and reshape - out = Tensor(np.transpose(out.data, (0, 2, 1, 3))) - out = out.reshape(batch_size, seq_len, embed_dim) - - # Final linear projection - return self.out_proj.forward(out) - - def _softmax(self, x): - """Numerically stable softmax.""" - exp_x = Tensor(np.exp(x.data - np.max(x.data, axis=-1, keepdims=True))) - return Tensor(exp_x.data / np.sum(exp_x.data, axis=-1, keepdims=True)) - - def parameters(self): - params = [] - params.extend(self.q_proj.parameters()) - params.extend(self.k_proj.parameters()) - params.extend(self.v_proj.parameters()) - params.extend(self.out_proj.parameters()) - return params - -class Embedding: - """Minimal Embedding layer - imports from Module 11 in practice.""" - def __init__(self, vocab_size, embed_dim): - self.vocab_size = vocab_size - self.embed_dim = embed_dim - # Initialize with small random values - self.weight = Tensor(np.random.normal(0, 0.02, (vocab_size, embed_dim))) - - def forward(self, indices): - # Simple embedding lookup - return Tensor(self.weight.data[indices.data]) - - def parameters(self): - return [self.weight] - -def gelu(x): - """GELU activation function.""" - return Tensor(0.5 * x.data * (1 + np.tanh(np.sqrt(2 / np.pi) * (x.data + 0.044715 * x.data**3)))) diff --git a/tinytorch/optimization/acceleration.py b/tinytorch/optimization/acceleration.py deleted file mode 100644 index d0ca1d13..00000000 --- a/tinytorch/optimization/acceleration.py +++ /dev/null @@ -1,8 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/16_acceleration/acceleration_dev.ipynb. - -# %% auto 0 -__all__ = [] - -# %% ../../modules/source/16_acceleration/acceleration_dev.ipynb 0 -#| default_exp optimization.acceleration -#| export diff --git a/tinytorch/optimization/compression.py b/tinytorch/optimization/compression.py deleted file mode 100644 index 01ef28ae..00000000 --- a/tinytorch/optimization/compression.py +++ /dev/null @@ -1,85 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/18_compression/compression_dev.ipynb. - -# %% auto 0 -__all__ = ['Tensor', 'Linear', 'Sequential'] - -# %% ../../modules/source/18_compression/compression_dev.ipynb 1 -import numpy as np -import copy -from typing import List, Dict, Any, Tuple, Optional -import time - -# Import from previous modules -# Note: In the full package, these would be imports like: -# from tinytorch.core.tensor import Tensor -# from tinytorch.core.layers import Linear -# For development, we'll create minimal implementations - -class Tensor: - """Minimal Tensor class for compression development - imports from Module 01 in practice.""" - def __init__(self, data, requires_grad=False): - self.data = np.array(data) - self.shape = self.data.shape - self.size = self.data.size - self.requires_grad = requires_grad - self.grad = None - - def __add__(self, other): - if isinstance(other, Tensor): - return Tensor(self.data + other.data) - return Tensor(self.data + other) - - def __mul__(self, other): - if isinstance(other, Tensor): - return Tensor(self.data * other.data) - return Tensor(self.data * other) - - def matmul(self, other): - return Tensor(np.dot(self.data, other.data)) - - def abs(self): - return Tensor(np.abs(self.data)) - - def sum(self, axis=None): - return Tensor(self.data.sum(axis=axis)) - - def __repr__(self): - return f"Tensor(shape={self.shape})" - -class Linear: - """Minimal Linear layer for compression development - imports from Module 03 in practice.""" - def __init__(self, in_features, out_features, bias=True): - self.in_features = in_features - self.out_features = out_features - # Initialize with He initialization - self.weight = Tensor(np.random.randn(in_features, out_features) * np.sqrt(2.0 / in_features)) - self.bias = Tensor(np.zeros(out_features)) if bias else None - - def forward(self, x): - output = x.matmul(self.weight) - if self.bias is not None: - output = output + self.bias - return output - - def parameters(self): - params = [self.weight] - if self.bias is not None: - params.append(self.bias) - return params - -class Sequential: - """Minimal Sequential container for model compression.""" - def __init__(self, *layers): - self.layers = list(layers) - - def forward(self, x): - for layer in self.layers: - x = layer.forward(x) - return x - - def parameters(self): - params = [] - for layer in self.layers: - if hasattr(layer, 'parameters'): - params.extend(layer.parameters()) - return params diff --git a/tinytorch/optimization/quantization.py b/tinytorch/optimization/quantization.py deleted file mode 100644 index f5bc7cfd..00000000 --- a/tinytorch/optimization/quantization.py +++ /dev/null @@ -1,8 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/17_quantization/quantization_dev.ipynb. - -# %% auto 0 -__all__ = [] - -# %% ../../modules/source/17_quantization/quantization_dev.ipynb 0 -#| default_exp optimization.quantization -#| export diff --git a/tinytorch/profiling/profiler.py b/tinytorch/profiling/profiler.py deleted file mode 100644 index 82aca521..00000000 --- a/tinytorch/profiling/profiler.py +++ /dev/null @@ -1,35 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/15_profiling/profiling_dev.ipynb. - -# %% auto 0 -__all__ = [] - -# %% ../../modules/source/15_profiling/profiling_dev.ipynb 1 -import time -import numpy as np -import tracemalloc -from typing import Dict, List, Any, Optional, Tuple -from collections import defaultdict -import gc - -# Import our TinyTorch components for profiling -import sys -import os -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor')) -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers')) -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '09_spatial')) - -# For testing purposes - in real package these would be proper imports -try: - from tensor_dev import Tensor - from layers_dev import Linear, Sequential - from spatial_dev import Conv2d -except ImportError: - # Fallback - create minimal implementations for testing - class Tensor: - def __init__(self, data): - self.data = np.array(data) - self.shape = self.data.shape - def __mul__(self, other): - return Tensor(self.data * other.data) - def sum(self): - return Tensor(np.sum(self.data)) diff --git a/tinytorch/text/tokenization.py b/tinytorch/text/tokenization.py deleted file mode 100644 index 10a40e63..00000000 --- a/tinytorch/text/tokenization.py +++ /dev/null @@ -1,8 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/10_tokenization/tokenization_dev.ipynb. - -# %% auto 0 -__all__ = [] - -# %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 0 -#| default_exp text.tokenization -#| export