mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-10 07:43:25 -05:00
Re-exported all modules after restructuring: - Updated _modidx.py with new module locations - Removed outdated autogeneration headers - Updated all core modules (tensor, autograd, layers, etc.) - Updated optimization modules (quantization, compression, etc.) - Updated TITO commands for new structure Changes include: - 24 tinytorch/ module files - 24 tito/ command and core files - Updated references from modules/source/ to modules/ All modules re-exported via nbdev from their new locations.
466 lines
17 KiB
Python
Generated
466 lines
17 KiB
Python
Generated
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/01_tensor/tensor_dev.ipynb.
|
||
|
||
# %% auto 0
|
||
__all__ = ['Tensor']
|
||
|
||
# %% ../../modules/source/01_tensor/tensor_dev.ipynb 1
|
||
import numpy as np
|
||
|
||
# %% ../../modules/source/01_tensor/tensor_dev.ipynb 6
|
||
class Tensor:
|
||
"""Educational tensor that grows with student knowledge.
|
||
|
||
This class starts simple but includes dormant features for future modules:
|
||
- requires_grad: Will be used for automatic differentiation (Module 05)
|
||
- grad: Will store computed gradients (Module 05)
|
||
- backward(): Will compute gradients (Module 05)
|
||
|
||
For now, focus on: data, shape, and basic operations.
|
||
"""
|
||
|
||
def __init__(self, data, requires_grad=False):
|
||
"""
|
||
Create a new tensor from data.
|
||
|
||
TODO: Initialize tensor attributes
|
||
|
||
APPROACH:
|
||
1. Convert data to NumPy array - handles lists, scalars, etc.
|
||
2. Store shape and size for quick access
|
||
3. Set up gradient tracking (dormant until Module 05)
|
||
|
||
EXAMPLE:
|
||
>>> tensor = Tensor([1, 2, 3])
|
||
>>> print(tensor.data)
|
||
[1 2 3]
|
||
>>> print(tensor.shape)
|
||
(3,)
|
||
|
||
HINT: np.array() handles type conversion automatically
|
||
"""
|
||
### BEGIN SOLUTION
|
||
# Core tensor data - always present
|
||
self.data = np.array(data, dtype=np.float32) # Consistent float32 for ML
|
||
self.shape = self.data.shape
|
||
self.size = self.data.size
|
||
self.dtype = self.data.dtype
|
||
|
||
# Gradient features (dormant until Module 05)
|
||
self.requires_grad = requires_grad
|
||
self.grad = None
|
||
### END SOLUTION
|
||
|
||
def __repr__(self):
|
||
"""String representation of tensor for debugging."""
|
||
grad_info = f", requires_grad={self.requires_grad}" if self.requires_grad else ""
|
||
return f"Tensor(data={self.data}, shape={self.shape}{grad_info})"
|
||
|
||
def __str__(self):
|
||
"""Human-readable string representation."""
|
||
return f"Tensor({self.data})"
|
||
|
||
def numpy(self):
|
||
"""Return the underlying NumPy array."""
|
||
return self.data
|
||
|
||
# nbgrader={\"grade\": false, \"grade_id\": \"addition-impl\", \"solution\": true}
|
||
def __add__(self, other):
|
||
"""
|
||
Add two tensors element-wise with broadcasting support.
|
||
|
||
TODO: Implement tensor addition with automatic broadcasting
|
||
|
||
APPROACH:
|
||
1. Handle both Tensor and scalar inputs
|
||
2. Use NumPy's broadcasting for automatic shape alignment
|
||
3. Return new Tensor with result (don't modify self)
|
||
|
||
EXAMPLE:
|
||
>>> a = Tensor([1, 2, 3])
|
||
>>> b = Tensor([4, 5, 6])
|
||
>>> result = a + b
|
||
>>> print(result.data)
|
||
[5. 7. 9.]
|
||
|
||
BROADCASTING EXAMPLE:
|
||
>>> matrix = Tensor([[1, 2], [3, 4]]) # Shape: (2, 2)
|
||
>>> vector = Tensor([10, 20]) # Shape: (2,)
|
||
>>> result = matrix + vector # Broadcasting: (2,2) + (2,) → (2,2)
|
||
>>> print(result.data)
|
||
[[11. 22.]
|
||
[13. 24.]]
|
||
|
||
HINTS:
|
||
- Use isinstance() to check if other is a Tensor
|
||
- NumPy handles broadcasting automatically with +
|
||
- Always return a new Tensor, don't modify self
|
||
- Preserve gradient tracking for future modules
|
||
"""
|
||
### BEGIN SOLUTION
|
||
if isinstance(other, Tensor):
|
||
# Tensor + Tensor: let NumPy handle broadcasting
|
||
return Tensor(self.data + other.data)
|
||
else:
|
||
# Tensor + scalar: NumPy broadcasts automatically
|
||
return Tensor(self.data + other)
|
||
### END SOLUTION
|
||
|
||
# nbgrader={"grade": false, "grade_id": "more-arithmetic", "solution": true}
|
||
def __sub__(self, other):
|
||
"""
|
||
Subtract two tensors element-wise.
|
||
|
||
Common use: Centering data (x - mean), computing differences for loss functions.
|
||
"""
|
||
### BEGIN SOLUTION
|
||
if isinstance(other, Tensor):
|
||
return Tensor(self.data - other.data)
|
||
else:
|
||
return Tensor(self.data - other)
|
||
### END SOLUTION
|
||
|
||
def __mul__(self, other):
|
||
"""
|
||
Multiply two tensors element-wise (NOT matrix multiplication).
|
||
|
||
Common use: Scaling features, applying masks, gating mechanisms in neural networks.
|
||
Note: This is * operator, not @ (which will be matrix multiplication).
|
||
"""
|
||
### BEGIN SOLUTION
|
||
if isinstance(other, Tensor):
|
||
return Tensor(self.data * other.data)
|
||
else:
|
||
return Tensor(self.data * other)
|
||
### END SOLUTION
|
||
|
||
def __truediv__(self, other):
|
||
"""
|
||
Divide two tensors element-wise.
|
||
|
||
Common use: Normalization (x / std), converting counts to probabilities.
|
||
"""
|
||
### BEGIN SOLUTION
|
||
if isinstance(other, Tensor):
|
||
return Tensor(self.data / other.data)
|
||
else:
|
||
return Tensor(self.data / other)
|
||
### END SOLUTION
|
||
|
||
# nbgrader={"grade": false, "grade_id": "matmul-impl", "solution": true}
|
||
def matmul(self, other):
|
||
"""
|
||
Matrix multiplication of two tensors.
|
||
|
||
TODO: Implement matrix multiplication using np.dot with proper validation
|
||
|
||
APPROACH:
|
||
1. Validate inputs are Tensors
|
||
2. Check dimension compatibility (inner dimensions must match)
|
||
3. Use np.dot for optimized computation
|
||
4. Return new Tensor with result
|
||
|
||
EXAMPLE:
|
||
>>> a = Tensor([[1, 2], [3, 4]]) # 2×2
|
||
>>> b = Tensor([[5, 6], [7, 8]]) # 2×2
|
||
>>> result = a.matmul(b) # 2×2 result
|
||
>>> # Result: [[1×5+2×7, 1×6+2×8], [3×5+4×7, 3×6+4×8]] = [[19, 22], [43, 50]]
|
||
|
||
SHAPE RULES:
|
||
- (M, K) @ (K, N) → (M, N) ✓ Valid
|
||
- (M, K) @ (J, N) → Error ✗ K ≠ J
|
||
|
||
COMPLEXITY: O(M×N×K) for (M×K) @ (K×N) matrices
|
||
|
||
HINTS:
|
||
- np.dot handles the optimization for us
|
||
- Check self.shape[-1] == other.shape[-2] for compatibility
|
||
- Provide clear error messages for debugging
|
||
"""
|
||
### BEGIN SOLUTION
|
||
if not isinstance(other, Tensor):
|
||
raise TypeError(f"Expected Tensor for matrix multiplication, got {type(other)}")
|
||
|
||
# Handle edge cases
|
||
if self.shape == () or other.shape == ():
|
||
# Scalar multiplication
|
||
return Tensor(self.data * other.data)
|
||
|
||
# For matrix multiplication, we need at least 1D tensors
|
||
if len(self.shape) == 0 or len(other.shape) == 0:
|
||
return Tensor(self.data * other.data)
|
||
|
||
# Check dimension compatibility for matrix multiplication
|
||
if len(self.shape) >= 2 and len(other.shape) >= 2:
|
||
if self.shape[-1] != other.shape[-2]:
|
||
raise ValueError(
|
||
f"Cannot perform matrix multiplication: {self.shape} @ {other.shape}. "
|
||
f"Inner dimensions must match: {self.shape[-1]} ≠ {other.shape[-2]}. "
|
||
f"💡 HINT: For (M,K) @ (K,N) → (M,N), the K dimensions must be equal."
|
||
)
|
||
elif len(self.shape) == 1 and len(other.shape) == 2:
|
||
# Vector @ Matrix
|
||
if self.shape[0] != other.shape[0]:
|
||
raise ValueError(
|
||
f"Cannot multiply vector {self.shape} with matrix {other.shape}. "
|
||
f"Vector length {self.shape[0]} must match matrix rows {other.shape[0]}."
|
||
)
|
||
elif len(self.shape) == 2 and len(other.shape) == 1:
|
||
# Matrix @ Vector
|
||
if self.shape[1] != other.shape[0]:
|
||
raise ValueError(
|
||
f"Cannot multiply matrix {self.shape} with vector {other.shape}. "
|
||
f"Matrix columns {self.shape[1]} must match vector length {other.shape[0]}."
|
||
)
|
||
|
||
# Perform optimized matrix multiplication
|
||
# Use np.matmul (not np.dot) for proper batched matrix multiplication with 3D+ tensors
|
||
result_data = np.matmul(self.data, other.data)
|
||
return Tensor(result_data)
|
||
### END SOLUTION
|
||
|
||
# nbgrader={"grade": false, "grade_id": "shape-ops", "solution": true}
|
||
def reshape(self, *shape):
|
||
"""
|
||
Reshape tensor to new dimensions.
|
||
|
||
TODO: Implement tensor reshaping with validation
|
||
|
||
APPROACH:
|
||
1. Handle different calling conventions: reshape(2, 3) vs reshape((2, 3))
|
||
2. Validate total elements remain the same
|
||
3. Use NumPy's reshape for the actual operation
|
||
4. Return new Tensor (keep immutability)
|
||
|
||
EXAMPLE:
|
||
>>> tensor = Tensor([1, 2, 3, 4, 5, 6]) # Shape: (6,)
|
||
>>> reshaped = tensor.reshape(2, 3) # Shape: (2, 3)
|
||
>>> print(reshaped.data)
|
||
[[1. 2. 3.]
|
||
[4. 5. 6.]]
|
||
|
||
COMMON USAGE:
|
||
>>> # Flatten for MLP input
|
||
>>> image = Tensor(np.random.rand(3, 32, 32)) # (channels, height, width)
|
||
>>> flattened = image.reshape(-1) # (3072,) - all pixels in vector
|
||
>>>
|
||
>>> # Prepare batch for convolution
|
||
>>> batch = Tensor(np.random.rand(32, 784)) # (batch, features)
|
||
>>> images = batch.reshape(32, 1, 28, 28) # (batch, channels, height, width)
|
||
|
||
HINTS:
|
||
- Handle both reshape(2, 3) and reshape((2, 3)) calling styles
|
||
- Check np.prod(new_shape) == self.size for validation
|
||
- Use descriptive error messages for debugging
|
||
"""
|
||
### BEGIN SOLUTION
|
||
# Handle both reshape(2, 3) and reshape((2, 3)) calling conventions
|
||
if len(shape) == 1 and isinstance(shape[0], (tuple, list)):
|
||
new_shape = tuple(shape[0])
|
||
else:
|
||
new_shape = shape
|
||
|
||
# Handle -1 for automatic dimension inference (like NumPy)
|
||
if -1 in new_shape:
|
||
if new_shape.count(-1) > 1:
|
||
raise ValueError("Can only specify one unknown dimension with -1")
|
||
|
||
# Calculate the unknown dimension
|
||
known_size = 1
|
||
unknown_idx = new_shape.index(-1)
|
||
for i, dim in enumerate(new_shape):
|
||
if i != unknown_idx:
|
||
known_size *= dim
|
||
|
||
unknown_dim = self.size // known_size
|
||
new_shape = list(new_shape)
|
||
new_shape[unknown_idx] = unknown_dim
|
||
new_shape = tuple(new_shape)
|
||
|
||
# Validate total elements remain the same
|
||
if np.prod(new_shape) != self.size:
|
||
raise ValueError(
|
||
f"Cannot reshape tensor of size {self.size} to shape {new_shape}. "
|
||
f"Total elements must match: {self.size} ≠ {np.prod(new_shape)}. "
|
||
f"💡 HINT: Make sure new_shape dimensions multiply to {self.size}"
|
||
)
|
||
|
||
# Reshape the data (NumPy handles the memory layout efficiently)
|
||
reshaped_data = np.reshape(self.data, new_shape)
|
||
# Preserve gradient tracking from the original tensor (important for autograd!)
|
||
result = Tensor(reshaped_data, requires_grad=self.requires_grad)
|
||
return result
|
||
### END SOLUTION
|
||
|
||
def transpose(self, dim0=None, dim1=None):
|
||
"""
|
||
Transpose tensor dimensions.
|
||
|
||
TODO: Implement tensor transposition
|
||
|
||
APPROACH:
|
||
1. Handle default case (transpose last two dimensions)
|
||
2. Handle specific dimension swapping
|
||
3. Use NumPy's transpose with proper axis specification
|
||
4. Return new Tensor
|
||
|
||
EXAMPLE:
|
||
>>> matrix = Tensor([[1, 2, 3], [4, 5, 6]]) # (2, 3)
|
||
>>> transposed = matrix.transpose() # (3, 2)
|
||
>>> print(transposed.data)
|
||
[[1. 4.]
|
||
[2. 5.]
|
||
[3. 6.]]
|
||
|
||
NEURAL NETWORK USAGE:
|
||
>>> # Weight matrix transpose for backward pass
|
||
>>> W = Tensor([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (3, 2)
|
||
>>> W_T = W.transpose() # (2, 3) - for gradient computation
|
||
>>>
|
||
>>> # Attention mechanism
|
||
>>> Q = Tensor([[1, 2], [3, 4]]) # queries (2, 2)
|
||
>>> K = Tensor([[5, 6], [7, 8]]) # keys (2, 2)
|
||
>>> attention_scores = Q.matmul(K.transpose()) # Q @ K^T
|
||
|
||
HINTS:
|
||
- Default: transpose last two dimensions (most common case)
|
||
- Use np.transpose() with axes parameter
|
||
- Handle 1D tensors gracefully (transpose is identity)
|
||
"""
|
||
### BEGIN SOLUTION
|
||
if dim0 is None and dim1 is None:
|
||
# Default: transpose last two dimensions
|
||
if len(self.shape) < 2:
|
||
# For 1D tensors, transpose is identity operation
|
||
return Tensor(self.data.copy())
|
||
else:
|
||
# Transpose last two dimensions (most common in ML)
|
||
axes = list(range(len(self.shape)))
|
||
axes[-2], axes[-1] = axes[-1], axes[-2]
|
||
transposed_data = np.transpose(self.data, axes)
|
||
else:
|
||
# Specific dimensions to transpose
|
||
if dim0 is None or dim1 is None:
|
||
raise ValueError("Both dim0 and dim1 must be specified for specific dimension transpose")
|
||
|
||
# Validate dimensions exist
|
||
if dim0 >= len(self.shape) or dim1 >= len(self.shape) or dim0 < 0 or dim1 < 0:
|
||
raise ValueError(
|
||
f"Dimension out of range for tensor with shape {self.shape}. "
|
||
f"Got dim0={dim0}, dim1={dim1}, but tensor has {len(self.shape)} dimensions."
|
||
)
|
||
|
||
# Create axes list and swap the specified dimensions
|
||
axes = list(range(len(self.shape)))
|
||
axes[dim0], axes[dim1] = axes[dim1], axes[dim0]
|
||
transposed_data = np.transpose(self.data, axes)
|
||
|
||
# Preserve requires_grad for gradient tracking (Module 05 will add _grad_fn)
|
||
result = Tensor(transposed_data, requires_grad=self.requires_grad if hasattr(self, 'requires_grad') else False)
|
||
return result
|
||
### END SOLUTION
|
||
|
||
# nbgrader={"grade": false, "grade_id": "reduction-ops", "solution": true}
|
||
def sum(self, axis=None, keepdims=False):
|
||
"""
|
||
Sum tensor along specified axis.
|
||
|
||
TODO: Implement tensor sum with axis control
|
||
|
||
APPROACH:
|
||
1. Use NumPy's sum with axis parameter
|
||
2. Handle axis=None (sum all elements) vs specific axis
|
||
3. Support keepdims to maintain shape for broadcasting
|
||
4. Return new Tensor with result
|
||
|
||
EXAMPLE:
|
||
>>> tensor = Tensor([[1, 2], [3, 4]])
|
||
>>> total = tensor.sum() # Sum all elements: 10
|
||
>>> col_sum = tensor.sum(axis=0) # Sum columns: [4, 6]
|
||
>>> row_sum = tensor.sum(axis=1) # Sum rows: [3, 7]
|
||
|
||
NEURAL NETWORK USAGE:
|
||
>>> # Batch loss computation
|
||
>>> batch_losses = Tensor([0.1, 0.3, 0.2, 0.4]) # Individual losses
|
||
>>> total_loss = batch_losses.sum() # Total: 1.0
|
||
>>> avg_loss = batch_losses.mean() # Average: 0.25
|
||
>>>
|
||
>>> # Global average pooling
|
||
>>> feature_maps = Tensor(np.random.rand(32, 256, 7, 7)) # (batch, channels, h, w)
|
||
>>> global_features = feature_maps.sum(axis=(2, 3)) # (batch, channels)
|
||
|
||
HINTS:
|
||
- np.sum handles all the complexity for us
|
||
- axis=None sums all elements (returns scalar)
|
||
- axis=0 sums along first dimension, axis=1 along second, etc.
|
||
- keepdims=True preserves dimensions for broadcasting
|
||
"""
|
||
### BEGIN SOLUTION
|
||
result = np.sum(self.data, axis=axis, keepdims=keepdims)
|
||
return Tensor(result)
|
||
### END SOLUTION
|
||
|
||
def mean(self, axis=None, keepdims=False):
|
||
"""
|
||
Compute mean of tensor along specified axis.
|
||
|
||
Common usage: Batch normalization, loss averaging, global pooling.
|
||
"""
|
||
### BEGIN SOLUTION
|
||
result = np.mean(self.data, axis=axis, keepdims=keepdims)
|
||
return Tensor(result)
|
||
### END SOLUTION
|
||
|
||
def max(self, axis=None, keepdims=False):
|
||
"""
|
||
Find maximum values along specified axis.
|
||
|
||
Common usage: Max pooling, finding best predictions, activation clipping.
|
||
"""
|
||
### BEGIN SOLUTION
|
||
result = np.max(self.data, axis=axis, keepdims=keepdims)
|
||
return Tensor(result)
|
||
### END SOLUTION
|
||
|
||
# nbgrader={"grade": false, "grade_id": "gradient-placeholder", "solution": true}
|
||
def backward(self):
|
||
"""
|
||
Compute gradients (implemented in Module 05: Autograd).
|
||
|
||
TODO: Placeholder implementation for gradient computation
|
||
|
||
STUDENT NOTE:
|
||
This method exists but does nothing until Module 05: Autograd.
|
||
Don't worry about it for now - focus on the basic tensor operations.
|
||
|
||
In Module 05, we'll implement:
|
||
- Gradient computation via chain rule
|
||
- Automatic differentiation
|
||
- Backpropagation through operations
|
||
- Computation graph construction
|
||
|
||
FUTURE IMPLEMENTATION PREVIEW:
|
||
```python
|
||
def backward(self, gradient=None):
|
||
# Module 05 will implement:
|
||
# 1. Set gradient for this tensor
|
||
# 2. Propagate to parent operations
|
||
# 3. Apply chain rule recursively
|
||
# 4. Accumulate gradients properly
|
||
pass
|
||
```
|
||
|
||
CURRENT BEHAVIOR:
|
||
>>> x = Tensor([1, 2, 3], requires_grad=True)
|
||
>>> y = x * 2
|
||
>>> y.sum().backward() # Calls this method - does nothing
|
||
>>> print(x.grad) # Still None
|
||
None
|
||
"""
|
||
### BEGIN SOLUTION
|
||
# Placeholder - will be implemented in Module 05
|
||
# For now, just ensure it doesn't crash when called
|
||
# This allows students to experiment with gradient syntax
|
||
# without getting confusing errors about missing methods
|
||
pass
|
||
### END SOLUTION
|