diff --git a/modules/source/04_losses/losses_dev.ipynb b/modules/source/04_losses/losses_dev.ipynb index 39f53cf2..d5b84fd1 100644 --- a/modules/source/04_losses/losses_dev.ipynb +++ b/modules/source/04_losses/losses_dev.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "9d798b1c", + "id": "dc4a8074", "metadata": { "cell_marker": "\"\"\"" }, @@ -35,7 +35,7 @@ }, { "cell_type": "markdown", - "id": "91804987", + "id": "08ab6b0b", "metadata": { "cell_marker": "\"\"\"" }, @@ -59,7 +59,7 @@ }, { "cell_type": "markdown", - "id": "c09dc686", + "id": "848eaef7", "metadata": { "cell_marker": "\"\"\"" }, @@ -80,7 +80,7 @@ { "cell_type": "code", "execution_count": null, - "id": "51189bc1", + "id": "90d6651a", "metadata": { "nbgrader": { "grade": false, @@ -94,8 +94,6 @@ "#| export\n", "\n", "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import time\n", "from typing import Optional\n", "\n", "def import_previous_module(module_name: str, component_name: str):\n", @@ -113,7 +111,7 @@ }, { "cell_type": "markdown", - "id": "cc227c2d", + "id": "529a8e8a", "metadata": { "cell_marker": "\"\"\"" }, @@ -189,7 +187,7 @@ }, { "cell_type": "markdown", - "id": "49e5039b", + "id": "4e69ba6d", "metadata": { "cell_marker": "\"\"\"" }, @@ -235,7 +233,7 @@ }, { "cell_type": "markdown", - "id": "b1e1cbd0", + "id": "b9a1fa2c", "metadata": { "cell_marker": "\"\"\"" }, @@ -247,7 +245,7 @@ }, { "cell_type": "markdown", - "id": "820e9937", + "id": "bf3b7915", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -297,7 +295,7 @@ { "cell_type": "code", "execution_count": null, - "id": "854758b3", + "id": "085562d6", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -348,7 +346,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6b57e650", + "id": "d274d1e1", "metadata": { "nbgrader": { "grade": true, @@ -389,7 +387,7 @@ }, { "cell_type": "markdown", - "id": "b8be9f2c", + "id": "d51980c3", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -459,7 +457,7 @@ { "cell_type": "code", "execution_count": null, - "id": "aca5154a", + "id": "1107bf9d", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -531,7 +529,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7391538b", + "id": "80f97626", "metadata": { "nbgrader": { "grade": true, @@ -577,7 +575,7 @@ }, { "cell_type": "markdown", - "id": "0b9b254c", + "id": "14b2d795", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -670,7 +668,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eb59fb50", + "id": "c0a10af0", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -746,7 +744,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c59fbbfd", + "id": "24685fb9", "metadata": { "nbgrader": { "grade": true, @@ -797,7 +795,7 @@ }, { "cell_type": "markdown", - "id": "599727d1", + "id": "68a261f3", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -906,7 +904,7 @@ { "cell_type": "code", "execution_count": null, - "id": "54a20f3f", + "id": "b02977aa", "metadata": { "lines_to_next_cell": 1, "nbgrader": { @@ -982,7 +980,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1bab9d23", + "id": "722d5c07", "metadata": { "nbgrader": { "grade": true, @@ -1033,7 +1031,7 @@ }, { "cell_type": "markdown", - "id": "ca40b581", + "id": "88bad600", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1090,7 +1088,7 @@ { "cell_type": "code", "execution_count": null, - "id": "76b4eb81", + "id": "b5a701fe", "metadata": { "nbgrader": { "grade": false, @@ -1146,7 +1144,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b90c91f0", + "id": "35891a55", "metadata": { "nbgrader": { "grade": false, @@ -1211,7 +1209,7 @@ }, { "cell_type": "markdown", - "id": "e2fc1aa7", + "id": "95e3f483", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1286,7 +1284,7 @@ { "cell_type": "code", "execution_count": null, - "id": "573fa75d", + "id": "c46f9468", "metadata": { "nbgrader": { "grade": false, @@ -1336,7 +1334,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b7f12c78", + "id": "d95c49f1", "metadata": { "nbgrader": { "grade": false, @@ -1393,7 +1391,7 @@ }, { "cell_type": "markdown", - "id": "4c6ebac9", + "id": "88afe536", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 1 @@ -1457,7 +1455,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d0b635c1", + "id": "232f8764", "metadata": { "nbgrader": { "grade": false, @@ -1513,7 +1511,7 @@ }, { "cell_type": "markdown", - "id": "d770e887", + "id": "f00b5616", "metadata": { "cell_marker": "\"\"\"" }, @@ -1526,7 +1524,7 @@ { "cell_type": "code", "execution_count": null, - "id": "55fd411d", + "id": "76ec7947", "metadata": { "nbgrader": { "grade": true, @@ -1606,7 +1604,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b66f2370", + "id": "e635a52d", "metadata": { "lines_to_next_cell": 2 }, @@ -1619,7 +1617,7 @@ }, { "cell_type": "markdown", - "id": "ce0d9c33", + "id": "da28d331", "metadata": { "cell_marker": "\"\"\"" }, diff --git a/modules/source/04_losses/losses_dev.py b/modules/source/04_losses/losses_dev.py index 6e8eab42..4dd01033 100644 --- a/modules/source/04_losses/losses_dev.py +++ b/modules/source/04_losses/losses_dev.py @@ -79,8 +79,6 @@ The `import_previous_module()` function below helps us cleanly import components #| export import numpy as np -import matplotlib.pyplot as plt -import time from typing import Optional def import_previous_module(module_name: str, component_name: str): diff --git a/test_xor_original_1986.py b/test_xor_original_1986.py new file mode 100644 index 00000000..c8028998 --- /dev/null +++ b/test_xor_original_1986.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +""" +Original 1986 XOR Solution - Rumelhart, Hinton, Williams +Testing the MINIMAL architecture that solved the XOR crisis. +""" +import sys +sys.path.insert(0, '.') + +import numpy as np +from tinytorch import Tensor, Linear, Sigmoid, BinaryCrossEntropyLoss, SGD + +print("=" * 70) +print("šŸ›ļø ORIGINAL 1986 XOR SOLUTION") +print("Rumelhart, Hinton, Williams - 'Learning representations by back-propagating errors'") +print("=" * 70) + +# Pure XOR +X_data = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]], dtype=np.float32) +y_data = np.array([[0.0], [1.0], [1.0], [0.0]], dtype=np.float32) + +X = Tensor(X_data) +y = Tensor(y_data) + +print("\nšŸ—ļø Architecture (1986 style):") +print(" Input: 2 neurons") +print(" Hidden: 2 neurons (MINIMAL!)") +print(" Output: 1 neuron") +print(" Activation: Sigmoid (ReLU didn't exist yet!)") +print(" Total params: 9 (2Ɨ2 weights + 2 bias + 2Ɨ1 weights + 1 bias)") + +# Original architecture: 2-2-1 with Sigmoid +hidden = Linear(2, 2) # Only 2 hidden neurons! +sigmoid_hidden = Sigmoid() +output = Linear(2, 1) +sigmoid_output = Sigmoid() + +loss_fn = BinaryCrossEntropyLoss() +optimizer = SGD([p for p in hidden.parameters()] + [p for p in output.parameters()], lr=1.0) + +print("\nšŸ”„ Training with original 1986 architecture...") +epochs = 2000 # May need more epochs with only 2 hidden units + +for epoch in range(epochs): + # Forward (all sigmoid, like 1986!) + h = hidden(X) + h_act = sigmoid_hidden(h) # Sigmoid in hidden layer + out = output(h_act) + pred = sigmoid_output(out) # Sigmoid in output layer + loss = loss_fn(pred, y) + + # Backward + loss.backward() + + # Update + optimizer.step() + optimizer.zero_grad() + + if (epoch + 1) % 400 == 0: + accuracy = ((pred.data > 0.5).astype(float) == y.data).mean() + print(f"Epoch {epoch+1:4d}/{epochs} Loss: {loss.data:.4f} Accuracy: {accuracy:.1%}") + +# Final evaluation +print("\nāœ… Final Results:") +final_accuracy = ((pred.data > 0.5).astype(float) == y.data).mean() + +for i in range(4): + x_in = X_data[i] + y_true = int(y_data[i, 0]) + y_pred_prob = pred.data[i, 0] + y_pred = int(y_pred_prob > 0.5) + status = "āœ…" if y_pred == y_true else "āŒ" + print(f" Input: {x_in} → Pred: {y_pred} (prob: {y_pred_prob:.3f}) True: {y_true} {status}") + +print(f"\nšŸ“Š Final Accuracy: {final_accuracy:.1%}") +print(f"šŸ“Š Final Loss: {loss.data:.4f}") + +if final_accuracy == 1.0: + print("\nšŸŽ‰ SUCCESS! XOR solved with MINIMAL 1986 architecture!") + print(" This is exactly what ended the AI Winter!") +else: + print(f"\nāš ļø Accuracy: {final_accuracy:.1%} - may need more training") + +# Show what the hidden units learned +print("\n🧠 What the 2 hidden neurons learned:") +print(" (Examining activation patterns)") +h_activations = sigmoid_hidden(hidden(X)).data +print(f"\n Hidden unit activations for each input:") +for i, x_in in enumerate(X_data): + print(f" {x_in}: h1={h_activations[i,0]:.3f}, h2={h_activations[i,1]:.3f}") + +print("\n" + "=" * 70) +print("šŸ’” Historical Note:") +print(" This 2-2-1 architecture ended the 17-year AI Winter!") +print(" Proved that backprop + hidden layers solve 'impossible' problems") +print("=" * 70) diff --git a/tinytorch/core/losses.py b/tinytorch/core/losses.py index 348bed68..9bb9e9be 100644 --- a/tinytorch/core/losses.py +++ b/tinytorch/core/losses.py @@ -19,8 +19,6 @@ __all__ = ['import_previous_module', 'MSELoss', 'CrossEntropyLoss', 'BinaryCross # %% ../../modules/source/04_losses/losses_dev.ipynb 3 import numpy as np -import matplotlib.pyplot as plt -import time from typing import Optional def import_previous_module(module_name: str, component_name: str):