diff --git a/modules/source/13_transformers/transformers_dev.ipynb b/modules/source/13_transformers/transformers_dev.ipynb
index bd7c7733..f2a812b8 100644
--- a/modules/source/13_transformers/transformers_dev.ipynb
+++ b/modules/source/13_transformers/transformers_dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "5aea9c35",
+   "id": "33c199c2",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -36,7 +36,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f22e51a9",
+   "id": "1ec63d43",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -46,7 +46,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a018998a",
+   "id": "3d486c1e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -60,7 +60,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "01b92407",
+   "id": "4afca29a",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -85,7 +85,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cf1a206a",
+   "id": "135c0c51",
    "metadata": {
     "lines_to_next_cell": 2
    },
@@ -104,7 +104,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0e7e76fd",
+   "id": "3057e8a0",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -190,7 +190,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0a43cd3d",
+   "id": "3d854c15",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -325,7 +325,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "dccfbe05",
+   "id": "f894e04b",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -343,7 +343,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "5e866445",
+   "id": "b4646db7",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -411,7 +411,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e4495508",
+   "id": "7e5a454a",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -484,12 +484,15 @@
     "        mean = x.mean(axis=-1, keepdims=True)\n",
     "\n",
     "        # Compute variance: E[(x - μ)²]\n",
-    "        diff = Tensor(x.data - mean.data)\n",
-    "        variance = Tensor((diff.data ** 2).mean(axis=-1, keepdims=True))\n",
+    "        # Use Tensor operations to preserve computation graph!\n",
+    "        diff = x - mean\n",
+    "        variance = (diff * diff).mean(axis=-1, keepdims=True)\n",
     "\n",
-    "        # Normalize\n",
-    "        std = Tensor(np.sqrt(variance.data + self.eps))\n",
-    "        normalized = Tensor((x.data - mean.data) / std.data)\n",
+    "        # Normalize - use Tensor operations to preserve gradients!\n",
+    "        # Add eps as a Tensor for proper gradient flow\n",
+    "        eps_tensor = Tensor(np.array(self.eps), requires_grad=False)\n",
+    "        std = Tensor(np.sqrt(variance.data + self.eps), requires_grad=variance.requires_grad)\n",
+    "        normalized = (x - mean) / std\n",
     "\n",
     "        # Apply learnable transformation\n",
     "        output = normalized * self.gamma + self.beta\n",
@@ -503,7 +506,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "f50de247",
+   "id": "92bbef2d",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -519,7 +522,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "29808a79",
+   "id": "a824ba3e",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -566,7 +569,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "f99ac0f0",
+   "id": "34a77537",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -651,7 +654,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e2e7c950",
+   "id": "2f1b0cf0",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -742,7 +745,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "12d2d450",
+   "id": "4e55a5d6",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -758,7 +761,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "72c129bb",
+   "id": "02896cfd",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -806,7 +809,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d5693773",
+   "id": "f0af20f9",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -908,7 +911,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1983128e",
+   "id": "be774576",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -1021,7 +1024,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e738b0a2",
+   "id": "20976835",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1037,7 +1040,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3ecf15a9",
+   "id": "3ad677b9",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -1088,7 +1091,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d0c34790",
+   "id": "3e0a6497",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1242,7 +1245,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d0fe1c53",
+   "id": "b6077781",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -1440,7 +1443,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "8ab2a056",
+   "id": "223ab70e",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1456,7 +1459,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "df2c9d2a",
+   "id": "843a027b",
    "metadata": {
     "nbgrader": {
      "grade": true,
@@ -1514,7 +1517,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "86e2cffb",
+   "id": "11c7afd8",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1560,8 +1563,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d11920a5",
+   "id": "464575ff",
    "metadata": {
+    "lines_to_next_cell": 1,
     "nbgrader": {
      "grade": false,
      "grade_id": "integration-demo",
@@ -1627,12 +1631,12 @@
     "\n",
     "    return model\n",
     "\n",
-    "demonstrate_transformer_integration()"
+    "# demonstrate_transformer_integration()  # Moved to __main__ block below"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "777c88af",
+   "id": "65e903ac",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1717,7 +1721,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "66957b87",
+   "id": "065a32d8",
    "metadata": {
     "lines_to_next_cell": 1,
     "nbgrader": {
@@ -1774,7 +1778,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9badfce3",
+   "id": "8ff38096",
    "metadata": {
     "nbgrader": {
      "grade": false,
@@ -1819,7 +1823,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "1701db55",
+   "id": "01719014",
    "metadata": {
     "cell_marker": "\"\"\"",
     "lines_to_next_cell": 1
@@ -1833,8 +1837,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "93ecb08c",
+   "id": "d18c01d8",
    "metadata": {
+    "lines_to_next_cell": 1,
     "nbgrader": {
      "grade": true,
      "grade_id": "test-module",
@@ -1907,25 +1912,26 @@
     "    print(\"Run: tito module complete 13\")\n",
     "\n",
     "# Call the comprehensive test\n",
-    "test_module()"
+    "# test_module()  # Only run in __main__ block below"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "75f8b229",
+   "id": "009d2ab0",
    "metadata": {},
    "outputs": [],
    "source": [
     "if __name__ == \"__main__\":\n",
     "    print(\"🚀 Running Transformers module...\")\n",
+    "    demonstrate_transformer_integration()\n",
     "    test_module()\n",
     "    print(\"✅ Module validation complete!\")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "5245c0f2",
+   "id": "28ae7326",
    "metadata": {
     "cell_marker": "\"\"\""
    },
@@ -1965,7 +1971,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ed29968a",
+   "id": "eaa4c950",
    "metadata": {
     "cell_marker": "\"\"\""
    },
diff --git a/modules/source/13_transformers/transformers_dev.py b/modules/source/13_transformers/transformers_dev.py
index 283c4078..f250ab97 100644
--- a/modules/source/13_transformers/transformers_dev.py
+++ b/modules/source/13_transformers/transformers_dev.py
@@ -430,12 +430,15 @@ class LayerNorm:
         mean = x.mean(axis=-1, keepdims=True)
 
         # Compute variance: E[(x - μ)²]
-        diff = Tensor(x.data - mean.data)
-        variance = Tensor((diff.data ** 2).mean(axis=-1, keepdims=True))
+        # Use Tensor operations to preserve computation graph!
+        diff = x - mean
+        variance = (diff * diff).mean(axis=-1, keepdims=True)
 
-        # Normalize
-        std = Tensor(np.sqrt(variance.data + self.eps))
-        normalized = Tensor((x.data - mean.data) / std.data)
+        # Normalize - use Tensor operations to preserve gradients!
+        # Add eps as a Tensor for proper gradient flow
+        eps_tensor = Tensor(np.array(self.eps), requires_grad=False)
+        std = Tensor(np.sqrt(variance.data + self.eps), requires_grad=variance.requires_grad)
+        normalized = (x - mean) / std
 
         # Apply learnable transformation
         output = normalized * self.gamma + self.beta
@@ -1414,7 +1417,7 @@ def demonstrate_transformer_integration():
 
     return model
 
-demonstrate_transformer_integration()
+# demonstrate_transformer_integration()  # Moved to __main__ block below
 
 # %% [markdown]
 """
@@ -1641,11 +1644,12 @@ def test_module():
     print("Run: tito module complete 13")
 
 # Call the comprehensive test
-test_module()
+# test_module()  # Only run in __main__ block below
 
 # %%
 if __name__ == "__main__":
     print("🚀 Running Transformers module...")
+    demonstrate_transformer_integration()
     test_module()
     print("✅ Module validation complete!")
 
diff --git a/tinytorch/models/transformer.py b/tinytorch/models/transformer.py
index 8d3126cd..80903083 100644
--- a/tinytorch/models/transformer.py
+++ b/tinytorch/models/transformer.py
@@ -86,12 +86,15 @@ class LayerNorm:
         mean = x.mean(axis=-1, keepdims=True)
 
         # Compute variance: E[(x - μ)²]
-        diff = Tensor(x.data - mean.data)
-        variance = Tensor((diff.data ** 2).mean(axis=-1, keepdims=True))
+        # Use Tensor operations to preserve computation graph!
+        diff = x - mean
+        variance = (diff * diff).mean(axis=-1, keepdims=True)
 
-        # Normalize
-        std = Tensor(np.sqrt(variance.data + self.eps))
-        normalized = Tensor((x.data - mean.data) / std.data)
+        # Normalize - use Tensor operations to preserve gradients!
+        # Add eps as a Tensor for proper gradient flow
+        eps_tensor = Tensor(np.array(self.eps), requires_grad=False)
+        std = Tensor(np.sqrt(variance.data + self.eps), requires_grad=variance.requires_grad)
+        normalized = (x - mean) / std
 
         # Apply learnable transformation
         output = normalized * self.gamma + self.beta