mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-04-28 20:22:34 -05:00
Restructure TinyTorch: Move TinyGPT to examples, improve testing framework
Major changes: - Moved TinyGPT from Module 16 to examples/tinygpt (capstone demo) - Fixed Module 10 (optimizers) and Module 11 (training) bugs - All 16 modules now passing tests (100% health) - Added comprehensive testing with 'tito test --comprehensive' - Renamed example files for clarity (train_xor_network.py, etc.) - Created working TinyGPT example structure - Updated documentation to reflect 15 core modules + examples - Added KISS principle and testing framework documentation
This commit is contained in:
@@ -492,7 +492,20 @@ class SGD:
|
||||
# Update parameter
|
||||
# CRITICAL: Preserve original parameter shape - modify numpy array in-place
|
||||
update = self.learning_rate * self.momentum_buffers[param_id]
|
||||
param.data._data[:] = param.data.data - update
|
||||
new_data = param.data.data - update
|
||||
|
||||
# Handle different tensor shapes (scalar vs array)
|
||||
if hasattr(param.data, '_data'):
|
||||
# Real Tensor class with _data attribute
|
||||
if param.data.data.ndim == 0:
|
||||
# 0D array (scalar)
|
||||
param.data._data = new_data
|
||||
else:
|
||||
# Multi-dimensional array
|
||||
param.data._data[:] = new_data
|
||||
else:
|
||||
# Fallback Tensor class - replace data directly
|
||||
param.data.data = new_data
|
||||
|
||||
self.step_count += 1
|
||||
### END SOLUTION
|
||||
@@ -797,7 +810,20 @@ class Adam:
|
||||
# Update parameter with adaptive learning rate
|
||||
# CRITICAL: Preserve original parameter shape - modify numpy array in-place
|
||||
update = self.learning_rate * first_moment_corrected / (np.sqrt(second_moment_corrected) + self.epsilon)
|
||||
param.data._data[:] = param.data.data - update
|
||||
new_data = param.data.data - update
|
||||
|
||||
# Handle different tensor shapes (scalar vs array)
|
||||
if hasattr(param.data, '_data'):
|
||||
# Real Tensor class with _data attribute
|
||||
if param.data.data.ndim == 0:
|
||||
# 0D array (scalar)
|
||||
param.data._data = new_data
|
||||
else:
|
||||
# Multi-dimensional array
|
||||
param.data._data[:] = new_data
|
||||
else:
|
||||
# Fallback Tensor class - replace data directly
|
||||
param.data.data = new_data
|
||||
### END SOLUTION
|
||||
|
||||
def zero_grad(self) -> None:
|
||||
|
||||
@@ -79,6 +79,35 @@ from tinytorch.core.optimizers import SGD, Adam, StepLR
|
||||
# 🔥 AUTOGRAD INTEGRATION: Loss functions now return Variables that support .backward()
|
||||
# This enables automatic gradient computation for neural network training!
|
||||
|
||||
# Utility function for tensor data access
|
||||
def get_tensor_value(tensor_obj):
|
||||
"""Extract numeric value from tensor/variable objects for testing."""
|
||||
# Handle Variable wrapper
|
||||
if hasattr(tensor_obj, 'data'):
|
||||
data = tensor_obj.data
|
||||
else:
|
||||
data = tensor_obj
|
||||
|
||||
# Handle nested Tensor data access
|
||||
if hasattr(data, 'data'):
|
||||
value = data.data
|
||||
else:
|
||||
value = data
|
||||
|
||||
# Extract scalar value
|
||||
if hasattr(value, 'item'):
|
||||
return value.item()
|
||||
elif hasattr(value, '__len__') and len(value) == 1:
|
||||
return value[0]
|
||||
elif hasattr(value, '__iter__'):
|
||||
# For numpy arrays or lists
|
||||
try:
|
||||
return float(value)
|
||||
except:
|
||||
return value
|
||||
else:
|
||||
return value
|
||||
|
||||
# %% [markdown]
|
||||
"""
|
||||
## 🔧 DEVELOPMENT
|
||||
@@ -259,7 +288,8 @@ def test_unit_mse_loss():
|
||||
y_pred = Tensor([[1.0, 2.0], [3.0, 4.0]])
|
||||
y_true = Tensor([[1.0, 2.0], [3.0, 4.0]])
|
||||
loss = mse(y_pred, y_true)
|
||||
assert abs(loss.data) < 1e-6, f"Perfect predictions should have loss ≈ 0, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert abs(loss_value) < 1e-6, f"Perfect predictions should have loss ≈ 0, got {loss_value}"
|
||||
print("✅ Perfect predictions test passed")
|
||||
|
||||
# Test 2: Known loss computation
|
||||
@@ -267,7 +297,8 @@ def test_unit_mse_loss():
|
||||
y_true = Tensor([[0.0, 1.0]])
|
||||
loss = mse(y_pred, y_true)
|
||||
expected = 1.0 # [(1-0)² + (2-1)²] / 2 = [1 + 1] / 2 = 1.0
|
||||
assert abs(loss.data - expected) < 1e-6, f"Expected loss {expected}, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert abs(loss_value - expected) < 1e-6, f"Expected loss {expected}, got {loss_value}"
|
||||
print("✅ Known loss computation test passed")
|
||||
|
||||
# Test 3: Batch processing
|
||||
@@ -275,7 +306,8 @@ def test_unit_mse_loss():
|
||||
y_true = Tensor([[1.5, 2.5], [2.5, 3.5]])
|
||||
loss = mse(y_pred, y_true)
|
||||
expected = 0.25 # All squared differences are 0.25
|
||||
assert abs(loss.data - expected) < 1e-6, f"Expected batch loss {expected}, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert abs(loss_value - expected) < 1e-6, f"Expected batch loss {expected}, got {loss_value}"
|
||||
print("✅ Batch processing test passed")
|
||||
|
||||
# Test 4: Single value
|
||||
@@ -283,7 +315,8 @@ def test_unit_mse_loss():
|
||||
y_true = Tensor([3.0])
|
||||
loss = mse(y_pred, y_true)
|
||||
expected = 4.0 # (5-3)² = 4
|
||||
assert abs(loss.data - expected) < 1e-6, f"Expected single value loss {expected}, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert abs(loss_value - expected) < 1e-6, f"Expected single value loss {expected}, got {loss_value}"
|
||||
print("✅ Single value test passed")
|
||||
|
||||
print("🎯 MSE Loss: All tests passed!")
|
||||
@@ -437,7 +470,8 @@ def test_unit_crossentropy_loss():
|
||||
y_pred = Tensor([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0]]) # Very confident correct predictions
|
||||
y_true = Tensor([0, 1]) # Class indices
|
||||
loss = ce(y_pred, y_true)
|
||||
assert loss.data < 0.1, f"Perfect predictions should have low loss, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert loss_value < 0.1, f"Perfect predictions should have low loss, got {loss_value}"
|
||||
print("✅ Perfect predictions test passed")
|
||||
|
||||
# Test 2: Random predictions (should have higher loss)
|
||||
@@ -445,21 +479,24 @@ def test_unit_crossentropy_loss():
|
||||
y_true = Tensor([0, 1])
|
||||
loss = ce(y_pred, y_true)
|
||||
expected_random = -np.log(1.0/3.0) # log(1/num_classes) for uniform distribution
|
||||
assert abs(loss.data - expected_random) < 0.1, f"Random predictions should have loss ≈ {expected_random}, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert abs(loss_value - expected_random) < 0.1, f"Random predictions should have loss ≈ {expected_random}, got {loss_value}"
|
||||
print("✅ Random predictions test passed")
|
||||
|
||||
# Test 3: Binary classification
|
||||
y_pred = Tensor([[2.0, 1.0], [1.0, 2.0]])
|
||||
y_true = Tensor([0, 1])
|
||||
loss = ce(y_pred, y_true)
|
||||
assert 0.0 < loss.data < 2.0, f"Binary classification loss should be reasonable, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert 0.0 < loss_value < 2.0, f"Binary classification loss should be reasonable, got {loss_value}"
|
||||
print("✅ Binary classification test passed")
|
||||
|
||||
# Test 4: One-hot encoded labels
|
||||
y_pred = Tensor([[2.0, 1.0, 0.0], [0.0, 2.0, 1.0]])
|
||||
y_true = Tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]) # One-hot encoded
|
||||
loss = ce(y_pred, y_true)
|
||||
assert 0.0 < loss.data < 2.0, f"One-hot encoded loss should be reasonable, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert 0.0 < loss_value < 2.0, f"One-hot encoded loss should be reasonable, got {loss_value}"
|
||||
print("✅ One-hot encoded labels test passed")
|
||||
|
||||
print("🎯 CrossEntropy Loss: All tests passed!")
|
||||
@@ -607,7 +644,8 @@ def test_unit_binary_crossentropy_loss():
|
||||
y_pred = Tensor([[10.0], [-10.0]]) # Very confident correct predictions
|
||||
y_true = Tensor([[1.0], [0.0]])
|
||||
loss = bce(y_pred, y_true)
|
||||
assert loss.data < 0.1, f"Perfect predictions should have low loss, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert loss_value < 0.1, f"Perfect predictions should have low loss, got {loss_value}"
|
||||
print("✅ Perfect predictions test passed")
|
||||
|
||||
# Test 2: Random predictions (should have higher loss)
|
||||
@@ -615,21 +653,24 @@ def test_unit_binary_crossentropy_loss():
|
||||
y_true = Tensor([[1.0], [0.0]])
|
||||
loss = bce(y_pred, y_true)
|
||||
expected_random = -np.log(0.5) # log(0.5) for random guessing
|
||||
assert abs(loss.data - expected_random) < 0.1, f"Random predictions should have loss ≈ {expected_random}, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert abs(loss_value - expected_random) < 0.1, f"Random predictions should have loss ≈ {expected_random}, got {loss_value}"
|
||||
print("✅ Random predictions test passed")
|
||||
|
||||
# Test 3: Batch processing
|
||||
y_pred = Tensor([[1.0], [2.0], [-1.0]])
|
||||
y_true = Tensor([[1.0], [1.0], [0.0]])
|
||||
loss = bce(y_pred, y_true)
|
||||
assert 0.0 < loss.data < 2.0, f"Batch processing loss should be reasonable, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert 0.0 < loss_value < 2.0, f"Batch processing loss should be reasonable, got {loss_value}"
|
||||
print("✅ Batch processing test passed")
|
||||
|
||||
# Test 4: Edge cases
|
||||
y_pred = Tensor([[100.0], [-100.0]]) # Extreme values
|
||||
y_true = Tensor([[1.0], [0.0]])
|
||||
loss = bce(y_pred, y_true)
|
||||
assert loss.data < 0.1, f"Extreme correct predictions should have low loss, got {loss.data}"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert loss_value < 0.1, f"Extreme correct predictions should have low loss, got {loss_value}"
|
||||
print("✅ Edge cases test passed")
|
||||
|
||||
print("🎯 Binary CrossEntropy Loss: All tests passed!")
|
||||
@@ -1277,19 +1318,22 @@ def test_module_training():
|
||||
y_pred = Tensor([[1.0, 2.0]])
|
||||
y_true = Tensor([[1.0, 2.0]])
|
||||
loss = mse(y_pred, y_true)
|
||||
assert abs(loss.data) < 1e-6, "MSE should work for perfect predictions"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert abs(loss_value) < 1e-6, "MSE should work for perfect predictions"
|
||||
|
||||
# CrossEntropy test
|
||||
y_pred = Tensor([[10.0, 0.0], [0.0, 10.0]])
|
||||
y_true = Tensor([0, 1])
|
||||
loss = ce(y_pred, y_true)
|
||||
assert loss.data < 1.0, "CrossEntropy should work for good predictions"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert loss_value < 1.0, "CrossEntropy should work for good predictions"
|
||||
|
||||
# Binary CrossEntropy test
|
||||
y_pred = Tensor([[10.0], [-10.0]])
|
||||
y_true = Tensor([[1.0], [0.0]])
|
||||
loss = bce(y_pred, y_true)
|
||||
assert loss.data < 1.0, "Binary CrossEntropy should work for good predictions"
|
||||
loss_value = get_tensor_value(loss)
|
||||
assert loss_value < 1.0, "Binary CrossEntropy should work for good predictions"
|
||||
|
||||
print("✅ Loss functions work correctly")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user