mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-26 10:02:19 -05:00
- Remove unnecessary module_paths.txt file for cleaner architecture - Update export command to discover modules dynamically from modules/source/ - Simplify nbdev command to support --all and module-specific exports - Use single source of truth: nbdev settings.ini for module paths - Clean up import structure in setup module for proper nbdev export - Maintain clean separation between module discovery and export logic This implements a proper software engineering approach with: - Single source of truth (settings.ini) - Dynamic discovery (no hardcoded paths) - Clean CLI interface (tito package nbdev --export [--all|module]) - Robust error handling with helpful feedback
336 lines
11 KiB
Python
336 lines
11 KiB
Python
"""
|
|
Test suite for the layers module.
|
|
This tests the student implementations to ensure they work correctly.
|
|
"""
|
|
|
|
import pytest
|
|
import numpy as np
|
|
import sys
|
|
import os
|
|
|
|
# Import from the main package (rock solid foundation)
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.activations import ReLU, Sigmoid, Tanh
|
|
|
|
def safe_numpy(tensor):
|
|
"""Get numpy array from tensor, using .numpy() if available, otherwise .data"""
|
|
if hasattr(tensor, 'numpy'):
|
|
return tensor.numpy()
|
|
else:
|
|
return tensor.data
|
|
|
|
class TestDenseLayer:
|
|
"""Test Dense (Linear) layer functionality."""
|
|
|
|
def test_dense_creation(self):
|
|
"""Test creating Dense layers with different configurations."""
|
|
# Basic dense layer
|
|
layer = Dense(input_size=3, output_size=2)
|
|
assert layer.input_size == 3
|
|
assert layer.output_size == 2
|
|
assert layer.use_bias == True
|
|
assert layer.weights.shape == (3, 2)
|
|
assert layer.bias.shape == (2,)
|
|
|
|
# Dense layer without bias
|
|
layer_no_bias = Dense(input_size=4, output_size=3, use_bias=False)
|
|
assert layer_no_bias.use_bias == False
|
|
assert layer_no_bias.bias is None
|
|
|
|
def test_dense_forward_single(self):
|
|
"""Test Dense layer forward pass with single input."""
|
|
layer = Dense(input_size=3, output_size=2)
|
|
|
|
# Single input
|
|
x = Tensor([[1.0, 2.0, 3.0]])
|
|
y = layer(x)
|
|
|
|
assert y.shape == (1, 2)
|
|
assert isinstance(y, Tensor)
|
|
|
|
def test_dense_forward_batch(self):
|
|
"""Test Dense layer forward pass with batch input."""
|
|
layer = Dense(input_size=3, output_size=2)
|
|
|
|
# Batch input
|
|
x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
|
|
y = layer(x)
|
|
|
|
assert y.shape == (2, 2)
|
|
assert isinstance(y, Tensor)
|
|
|
|
def test_dense_no_bias(self):
|
|
"""Test Dense layer without bias."""
|
|
layer = Dense(input_size=2, output_size=1, use_bias=False)
|
|
|
|
x = Tensor([[1.0, 2.0]])
|
|
y = layer(x)
|
|
|
|
assert y.shape == (1, 1)
|
|
# Should be just matrix multiplication without bias
|
|
expected = safe_numpy(x) @ safe_numpy(layer.weights)
|
|
np.testing.assert_array_almost_equal(safe_numpy(y), expected)
|
|
|
|
def test_dense_callable(self):
|
|
"""Test that Dense layer is callable."""
|
|
layer = Dense(input_size=2, output_size=1)
|
|
x = Tensor([[1.0, 2.0]])
|
|
|
|
# Both should work
|
|
y1 = layer.forward(x)
|
|
y2 = layer(x)
|
|
|
|
np.testing.assert_array_equal(safe_numpy(y1), safe_numpy(y2))
|
|
|
|
class TestActivationFunctions:
|
|
"""Test activation function implementations."""
|
|
|
|
def test_relu_basic(self):
|
|
"""Test ReLU activation function."""
|
|
relu = ReLU()
|
|
x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]])
|
|
y = relu(x)
|
|
|
|
expected = [[0.0, 0.0, 0.0, 1.0, 2.0]]
|
|
np.testing.assert_array_equal(safe_numpy(y), expected)
|
|
|
|
def test_relu_callable(self):
|
|
"""Test that ReLU is callable."""
|
|
relu = ReLU()
|
|
x = Tensor([[1.0, -1.0]])
|
|
|
|
y1 = relu.forward(x)
|
|
y2 = relu(x)
|
|
|
|
np.testing.assert_array_equal(safe_numpy(y1), safe_numpy(y2))
|
|
|
|
def test_sigmoid_basic(self):
|
|
"""Test Sigmoid activation function."""
|
|
sigmoid = Sigmoid()
|
|
x = Tensor([[0.0]]) # sigmoid(0) = 0.5
|
|
y = sigmoid(x)
|
|
|
|
np.testing.assert_array_almost_equal(safe_numpy(y), [[0.5]])
|
|
|
|
def test_sigmoid_range(self):
|
|
"""Test Sigmoid output range."""
|
|
sigmoid = Sigmoid()
|
|
x = Tensor([[-10.0, 0.0, 10.0]])
|
|
y = sigmoid(x)
|
|
|
|
# Should be in range [0, 1] - use reasonable bounds
|
|
assert np.all(safe_numpy(y) >= 0)
|
|
assert np.all(safe_numpy(y) <= 1)
|
|
# Check that extreme values are close to bounds
|
|
assert safe_numpy(y)[0][0] < 0.01 # Very small for -10
|
|
assert safe_numpy(y)[0][2] > 0.99 # Very large for 10
|
|
|
|
def test_tanh_basic(self):
|
|
"""Test Tanh activation function."""
|
|
tanh = Tanh()
|
|
x = Tensor([[0.0]]) # tanh(0) = 0
|
|
y = tanh(x)
|
|
|
|
np.testing.assert_array_almost_equal(safe_numpy(y), [[0.0]])
|
|
|
|
def test_tanh_range(self):
|
|
"""Test Tanh output range."""
|
|
tanh = Tanh()
|
|
x = Tensor([[-10.0, 0.0, 10.0]])
|
|
y = tanh(x)
|
|
|
|
# Should be in range [-1, 1] - use reasonable bounds
|
|
assert np.all(safe_numpy(y) >= -1)
|
|
assert np.all(safe_numpy(y) <= 1)
|
|
# Check that extreme values are close to bounds
|
|
assert safe_numpy(y)[0][0] < -0.99 # Very negative for -10
|
|
assert safe_numpy(y)[0][2] > 0.99 # Very positive for 10
|
|
|
|
class TestLayerComposition:
|
|
"""Test composing layers into neural networks."""
|
|
|
|
def test_simple_network(self):
|
|
"""Test a simple 2-layer network."""
|
|
# 3 → 4 → 2 network
|
|
layer1 = Dense(input_size=3, output_size=4)
|
|
relu = ReLU()
|
|
layer2 = Dense(input_size=4, output_size=2)
|
|
sigmoid = Sigmoid()
|
|
|
|
# Forward pass
|
|
x = Tensor([[1.0, 2.0, 3.0]])
|
|
h1 = layer1(x)
|
|
h1_activated = relu(h1)
|
|
h2 = layer2(h1_activated)
|
|
output = sigmoid(h2)
|
|
|
|
assert h1.shape == (1, 4)
|
|
assert h1_activated.shape == (1, 4)
|
|
assert h2.shape == (1, 2)
|
|
assert output.shape == (1, 2)
|
|
|
|
# Output should be in sigmoid range
|
|
assert np.all(safe_numpy(output) >= 0)
|
|
assert np.all(safe_numpy(output) <= 1)
|
|
|
|
def test_batch_network(self):
|
|
"""Test network with batch processing."""
|
|
layer1 = Dense(input_size=2, output_size=3)
|
|
relu = ReLU()
|
|
layer2 = Dense(input_size=3, output_size=1)
|
|
|
|
# Batch of 4 examples
|
|
x = Tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]])
|
|
|
|
h1 = layer1(x)
|
|
h1_activated = relu(h1)
|
|
output = layer2(h1_activated)
|
|
|
|
assert output.shape == (4, 1)
|
|
|
|
def test_deep_network(self):
|
|
"""Test deeper network composition."""
|
|
# 5-layer network
|
|
layers = [
|
|
Dense(input_size=10, output_size=8),
|
|
ReLU(),
|
|
Dense(input_size=8, output_size=6),
|
|
ReLU(),
|
|
Dense(input_size=6, output_size=4),
|
|
ReLU(),
|
|
Dense(input_size=4, output_size=2),
|
|
Sigmoid()
|
|
]
|
|
|
|
x = Tensor([[1.0] * 10]) # 10 features
|
|
|
|
# Forward pass through all layers
|
|
current = x
|
|
for layer in layers:
|
|
current = layer(current)
|
|
|
|
assert current.shape == (1, 2)
|
|
# Final output should be in sigmoid range
|
|
assert np.all(safe_numpy(current) >= 0)
|
|
assert np.all(safe_numpy(current) <= 1)
|
|
|
|
class TestEdgeCases:
|
|
"""Test edge cases and error conditions."""
|
|
|
|
def test_zero_input(self):
|
|
"""Test layers with zero input."""
|
|
layer = Dense(input_size=3, output_size=2)
|
|
relu = ReLU()
|
|
|
|
x = Tensor([[0.0, 0.0, 0.0]])
|
|
y = layer(x)
|
|
y_relu = relu(y)
|
|
|
|
assert y.shape == (1, 2)
|
|
assert y_relu.shape == (1, 2)
|
|
|
|
def test_large_input(self):
|
|
"""Test layers with large input values."""
|
|
layer = Dense(input_size=2, output_size=1)
|
|
sigmoid = Sigmoid()
|
|
|
|
x = Tensor([[1000.0, -1000.0]])
|
|
y = layer(x)
|
|
y_sigmoid = sigmoid(y)
|
|
|
|
# Should not overflow
|
|
assert not np.any(np.isnan(safe_numpy(y_sigmoid)))
|
|
assert not np.any(np.isinf(safe_numpy(y_sigmoid)))
|
|
|
|
def test_single_neuron(self):
|
|
"""Test single neuron layers."""
|
|
layer = Dense(input_size=1, output_size=1)
|
|
x = Tensor([[5.0]])
|
|
y = layer(x)
|
|
|
|
assert y.shape == (1, 1)
|
|
|
|
# Stretch goal tests (these will be skipped if methods don't exist)
|
|
class TestStretchGoals:
|
|
"""Stretch goal tests for advanced features."""
|
|
|
|
@pytest.mark.skip(reason="Stretch goal: Weight initialization methods")
|
|
def test_weight_initialization_methods(self):
|
|
"""Test different weight initialization strategies."""
|
|
# Xavier initialization
|
|
layer_xavier = Dense(input_size=100, output_size=50, init_method='xavier')
|
|
weights_xavier = safe_numpy(layer_xavier.weights)
|
|
|
|
# He initialization
|
|
layer_he = Dense(input_size=100, output_size=50, init_method='he')
|
|
weights_he = safe_numpy(layer_he.weights)
|
|
|
|
# Check initialization ranges
|
|
xavier_limit = np.sqrt(6.0 / (100 + 50))
|
|
assert np.all(np.abs(weights_xavier) <= xavier_limit)
|
|
|
|
he_limit = np.sqrt(2.0 / 100)
|
|
assert np.std(weights_he) <= he_limit * 1.5 # Some tolerance
|
|
|
|
@pytest.mark.skip(reason="Stretch goal: Layer parameter access")
|
|
def test_layer_parameters(self):
|
|
"""Test accessing and modifying layer parameters."""
|
|
layer = Dense(input_size=3, output_size=2)
|
|
|
|
# Should be able to access parameters
|
|
assert hasattr(layer, 'parameters')
|
|
params = layer.parameters()
|
|
assert len(params) == 2 # weights and bias
|
|
|
|
# Should be able to set parameters
|
|
new_weights = Tensor(np.ones((3, 2)))
|
|
layer.set_weights(new_weights)
|
|
np.testing.assert_array_equal(safe_numpy(layer.weights), safe_numpy(new_weights))
|
|
|
|
@pytest.mark.skip(reason="Stretch goal: Additional activation functions")
|
|
def test_additional_activations(self):
|
|
"""Test additional activation functions."""
|
|
# Leaky ReLU
|
|
leaky_relu = LeakyReLU(alpha=0.1)
|
|
x = Tensor([[-1.0, 0.0, 1.0]])
|
|
y = leaky_relu(x)
|
|
expected = [[-0.1, 0.0, 1.0]]
|
|
np.testing.assert_array_almost_equal(safe_numpy(y), expected)
|
|
|
|
# Softmax
|
|
softmax = Softmax()
|
|
x = Tensor([[1.0, 2.0, 3.0]])
|
|
y = softmax(x)
|
|
# Should sum to 1
|
|
assert np.allclose(np.sum(safe_numpy(y)), 1.0)
|
|
|
|
@pytest.mark.skip(reason="Stretch goal: Dropout layer")
|
|
def test_dropout_layer(self):
|
|
"""Test dropout layer implementation."""
|
|
dropout = Dropout(p=0.5)
|
|
x = Tensor([[1.0, 2.0, 3.0, 4.0]])
|
|
|
|
# Training mode
|
|
dropout.train()
|
|
y_train = dropout(x)
|
|
|
|
# Inference mode
|
|
dropout.eval()
|
|
y_eval = dropout(x)
|
|
|
|
# In eval mode, should be same as input
|
|
np.testing.assert_array_equal(safe_numpy(y_eval), safe_numpy(x))
|
|
|
|
@pytest.mark.skip(reason="Stretch goal: Batch normalization")
|
|
def test_batch_normalization(self):
|
|
"""Test batch normalization layer."""
|
|
bn = BatchNorm1d(num_features=3)
|
|
x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
|
|
y = bn(x)
|
|
|
|
# Should normalize across batch dimension
|
|
assert y.shape == x.shape
|
|
# Mean should be close to 0, std close to 1
|
|
assert np.allclose(np.mean(safe_numpy(y), axis=0), 0.0, atol=1e-6)
|
|
assert np.allclose(np.std(safe_numpy(y), axis=0), 1.0, atol=1e-6) |