mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-04-28 03:07:37 -05:00
CRITICAL FIX: Gradients now flow through entire training stack! Changes: 1. Enable autograd in __init__.py - patches Tensor operations on import 2. Extend enable_autograd() to patch Sigmoid and BCE forward methods 3. Fix gradient accumulation to handle broadcasting (bias gradients) 4. Fix optimizer.step() - param.grad is numpy array, not Tensor.data 5. Add debug_gradients.py for systematic gradient flow testing Architecture: - Clean patching pattern - all gradient tracking in enable_autograd() - Activations/losses remain simple (Module 02/04) - Autograd (Module 05) upgrades them with gradient tracking - Pedagogically sound: separation of concerns Results: ✅ All 6 debug tests pass ✅ Perceptron learns: 50% → 93% accuracy ✅ Loss decreases: 0.79 → 0.36 ✅ Weights update correctly through SGD
27 lines
765 B
Python
Generated
27 lines
765 B
Python
Generated
__version__ = "0.1.0"
|
|
|
|
# Import core functionality
|
|
from . import core
|
|
|
|
# Make common components easily accessible at top level
|
|
from .core.tensor import Tensor
|
|
from .core.layers import Linear, Dropout
|
|
from .core.activations import Sigmoid, ReLU, Tanh, GELU, Softmax
|
|
from .core.losses import MSELoss, CrossEntropyLoss, BinaryCrossEntropyLoss
|
|
from .core.optimizers import SGD, AdamW
|
|
|
|
# 🔥 CRITICAL: Enable automatic differentiation
|
|
# This patches Tensor operations to track gradients
|
|
from .core.autograd import enable_autograd
|
|
enable_autograd()
|
|
|
|
# Export main public API
|
|
__all__ = [
|
|
'core',
|
|
'Tensor',
|
|
'Linear', 'Dropout',
|
|
'Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax',
|
|
'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss',
|
|
'SGD', 'AdamW'
|
|
]
|