mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-01 02:17:33 -05:00
- module_01 → 01_tensor - module_02 → 02_activations - module_03 → 03_layers - module_04 → 04_losses - module_05 → 05_autograd - module_06 → 06_optimizers - module_07 → 07_training - module_08 → 08_dataloader - module_09 → 09_spatial - module_10 → 10_tokenization - module_11 → 11_embeddings - module_12 → 12_attention - module_13 → 13_transformers - module_14 → 14_kvcaching - module_15 → 15_profiling This prevents misalignment between source and test directories. Tests now mirror the exact structure of modules/source/.
126 lines
4.0 KiB
Python
126 lines
4.0 KiB
Python
"""
|
|
Network Capability Tests for Module 05
|
|
Tests that networks can solve non-linear problems
|
|
"""
|
|
|
|
import numpy as np
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add project root to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
|
|
|
|
class TestXORCapability:
|
|
"""Test that multi-layer networks can solve XOR."""
|
|
|
|
def test_xor_network_structure(self):
|
|
"""Test building network for XOR problem."""
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.activations import ReLU, Sigmoid
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# Build XOR network: 2 -> 4 -> 1
|
|
hidden = Dense(2, 4, use_bias=True)
|
|
output = Dense(4, 1, use_bias=True)
|
|
relu = ReLU()
|
|
sigmoid = Sigmoid()
|
|
|
|
# XOR inputs
|
|
X = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32))
|
|
|
|
# Forward pass
|
|
h = hidden(X)
|
|
h_activated = relu(h)
|
|
out = output(h_activated)
|
|
predictions = sigmoid(out)
|
|
|
|
assert predictions.shape == (4, 1)
|
|
assert np.all(predictions.data >= 0) and np.all(predictions.data <= 1)
|
|
|
|
def test_xor_network_expressiveness(self):
|
|
"""Test that network has enough capacity for XOR."""
|
|
from tinytorch.core.layers import Dense
|
|
|
|
# XOR needs at least 2 hidden units
|
|
hidden = Dense(2, 4) # 4 hidden units is sufficient
|
|
output = Dense(4, 1)
|
|
|
|
# Count parameters
|
|
hidden_params = 2 * 4 + 4 # weights + bias
|
|
output_params = 4 * 1 + 1 # weights + bias
|
|
total_params = hidden_params + output_params
|
|
|
|
# XOR needs at least 9 parameters theoretically
|
|
assert total_params >= 9
|
|
|
|
def test_nonlinearity_required(self):
|
|
"""Test that non-linearity is essential for XOR."""
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.activations import ReLU
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# Without activation, network is just linear
|
|
layer1 = Dense(2, 4)
|
|
layer2 = Dense(4, 1)
|
|
relu = ReLU()
|
|
|
|
X = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32))
|
|
|
|
# Linear network (no activation)
|
|
linear_out = layer2(layer1(X))
|
|
|
|
# Non-linear network (with activation)
|
|
nonlinear_out = layer2(relu(layer1(X)))
|
|
|
|
# Both should produce different outputs
|
|
assert not np.allclose(linear_out.data, nonlinear_out.data)
|
|
|
|
|
|
class TestMLPCapabilities:
|
|
"""Test Multi-Layer Perceptron capabilities."""
|
|
|
|
def test_universal_approximation(self):
|
|
"""Test that MLPs can approximate continuous functions."""
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.activations import ReLU
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# Wide hidden layer can approximate any function
|
|
layer1 = Dense(1, 100) # Wide hidden layer
|
|
relu = ReLU()
|
|
layer2 = Dense(100, 1)
|
|
|
|
# Test on simple function: sin(x)
|
|
x = np.linspace(-np.pi, np.pi, 50).reshape(-1, 1)
|
|
X = Tensor(x)
|
|
|
|
# Network should be able to produce varied outputs
|
|
h = relu(layer1(X))
|
|
output = layer2(h)
|
|
|
|
# Check that network produces non-constant output
|
|
assert output.data.std() > 0 # Not all same value
|
|
assert output.shape == (50, 1)
|
|
|
|
def test_deep_network(self):
|
|
"""Test building deep networks."""
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# Build 5-layer network
|
|
layers = [
|
|
Dense(100, 50),
|
|
Dense(50, 25),
|
|
Dense(25, 12),
|
|
Dense(12, 6),
|
|
Dense(6, 1)
|
|
]
|
|
|
|
x = Tensor(np.random.randn(16, 100))
|
|
|
|
# Forward through all layers
|
|
for layer in layers:
|
|
x = layer(x)
|
|
|
|
assert x.shape == (16, 1) |