mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-09 10:12:08 -05:00
Package exports: - Fix tinytorch/__init__.py to export all required components for milestones - Add Dense as alias for Linear for compatibility - Add loss functions (MSELoss, CrossEntropyLoss, BinaryCrossEntropyLoss) - Export spatial operations, data loaders, and transformer components Test infrastructure: - Create tests/conftest.py to handle path setup - Create tests/test_utils.py with shared test utilities - Rename test_progressive_integration.py files to include module number - Fix syntax errors in test files (spaces in class names) - Remove stale test file referencing non-existent modules Documentation: - Update README.md with correct milestone file names - Fix milestone requirements to match actual module dependencies Export system: - Run tito export --all to regenerate package from source modules - Ensure all 20 modules are properly exported
472 lines
16 KiB
Python
Generated
472 lines
16 KiB
Python
Generated
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
|
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
|
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
|
# ║ ║
|
|
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
|
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
|
# ║ ║
|
|
# ║ ✅ TO EDIT: src/XX_loader/XX_loader.py ║
|
|
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
|
# ║ ║
|
|
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
|
# ║ Editing it directly may break module functionality and training. ║
|
|
# ║ ║
|
|
# ║ 🎓 LEARNING TIP: Work in src/ (developers) or modules/ (learners) ║
|
|
# ║ The tinytorch/ directory is generated code - edit source files instead! ║
|
|
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
|
# %% auto 0
|
|
__all__ = ['Dataset', 'TensorDataset', 'DataLoader', 'RandomHorizontalFlip', 'RandomCrop', 'Compose']
|
|
|
|
# %% ../../modules/08_dataloader/08_dataloader.ipynb 0
|
|
#| default_exp data.loader
|
|
#| export
|
|
|
|
# %% ../../modules/08_dataloader/08_dataloader.ipynb 2
|
|
# Essential imports for data loading
|
|
import numpy as np
|
|
import random
|
|
import time
|
|
import sys
|
|
from typing import Iterator, Tuple, List, Optional, Union
|
|
from abc import ABC, abstractmethod
|
|
|
|
# Import real Tensor class from tinytorch package
|
|
from ..core.tensor import Tensor
|
|
|
|
# %% ../../modules/08_dataloader/08_dataloader.ipynb 4
|
|
class Dataset(ABC):
|
|
"""
|
|
Abstract base class for all datasets.
|
|
|
|
Provides the fundamental interface that all datasets must implement:
|
|
- __len__(): Returns the total number of samples
|
|
- __getitem__(idx): Returns the sample at given index
|
|
|
|
TODO: Implement the abstract Dataset base class
|
|
|
|
APPROACH:
|
|
1. Use ABC (Abstract Base Class) to define interface
|
|
2. Mark methods as @abstractmethod to force implementation
|
|
3. Provide clear docstrings for subclasses
|
|
|
|
EXAMPLE:
|
|
>>> class MyDataset(Dataset):
|
|
... def __len__(self): return 100
|
|
... def __getitem__(self, idx): return idx
|
|
>>> dataset = MyDataset()
|
|
>>> print(len(dataset)) # 100
|
|
>>> print(dataset[42]) # 42
|
|
|
|
HINT: Abstract methods force subclasses to implement core functionality
|
|
"""
|
|
|
|
### BEGIN SOLUTION
|
|
@abstractmethod
|
|
def __len__(self) -> int:
|
|
"""
|
|
Return the total number of samples in the dataset.
|
|
|
|
This method must be implemented by all subclasses to enable
|
|
len(dataset) calls and batch size calculations.
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def __getitem__(self, idx: int):
|
|
"""
|
|
Return the sample at the given index.
|
|
|
|
Args:
|
|
idx: Index of the sample to retrieve (0 <= idx < len(dataset))
|
|
|
|
Returns:
|
|
The sample at index idx. Format depends on the dataset implementation.
|
|
Could be (data, label) tuple, single tensor, etc.
|
|
"""
|
|
pass
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/08_dataloader/08_dataloader.ipynb 7
|
|
class TensorDataset(Dataset):
|
|
"""
|
|
Dataset wrapping tensors for supervised learning.
|
|
|
|
Each sample is a tuple of tensors from the same index across all input tensors.
|
|
All tensors must have the same size in their first dimension.
|
|
|
|
TODO: Implement TensorDataset for tensor-based data
|
|
|
|
APPROACH:
|
|
1. Store all input tensors
|
|
2. Validate they have same first dimension (number of samples)
|
|
3. Return tuple of tensor slices for each index
|
|
|
|
EXAMPLE:
|
|
>>> features = Tensor([[1, 2], [3, 4], [5, 6]]) # 3 samples, 2 features each
|
|
>>> labels = Tensor([0, 1, 0]) # 3 labels
|
|
>>> dataset = TensorDataset(features, labels)
|
|
>>> print(len(dataset)) # 3
|
|
>>> print(dataset[1]) # (Tensor([3, 4]), Tensor(1))
|
|
|
|
HINTS:
|
|
- Use *tensors to accept variable number of tensor arguments
|
|
- Check all tensors have same length in dimension 0
|
|
- Return tuple of tensor[idx] for all tensors
|
|
"""
|
|
|
|
def __init__(self, *tensors):
|
|
"""
|
|
Create dataset from multiple tensors.
|
|
|
|
Args:
|
|
*tensors: Variable number of Tensor objects
|
|
|
|
All tensors must have the same size in their first dimension.
|
|
"""
|
|
### BEGIN SOLUTION
|
|
assert len(tensors) > 0, "Must provide at least one tensor"
|
|
|
|
# Store all tensors
|
|
self.tensors = tensors
|
|
|
|
# Validate all tensors have same first dimension
|
|
first_size = len(tensors[0].data) # Size of first dimension
|
|
for i, tensor in enumerate(tensors):
|
|
if len(tensor.data) != first_size:
|
|
raise ValueError(
|
|
f"All tensors must have same size in first dimension. "
|
|
f"Tensor 0: {first_size}, Tensor {i}: {len(tensor.data)}"
|
|
)
|
|
### END SOLUTION
|
|
|
|
def __len__(self) -> int:
|
|
"""Return number of samples (size of first dimension)."""
|
|
### BEGIN SOLUTION
|
|
return len(self.tensors[0].data)
|
|
### END SOLUTION
|
|
|
|
def __getitem__(self, idx: int) -> Tuple[Tensor, ...]:
|
|
"""
|
|
Return tuple of tensor slices at given index.
|
|
|
|
Args:
|
|
idx: Sample index
|
|
|
|
Returns:
|
|
Tuple containing tensor[idx] for each input tensor
|
|
"""
|
|
### BEGIN SOLUTION
|
|
if idx >= len(self) or idx < 0:
|
|
raise IndexError(f"Index {idx} out of range for dataset of size {len(self)}")
|
|
|
|
# Return tuple of slices from all tensors
|
|
return tuple(Tensor(tensor.data[idx]) for tensor in self.tensors)
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/08_dataloader/08_dataloader.ipynb 10
|
|
class DataLoader:
|
|
"""
|
|
Data loader with batching and shuffling support.
|
|
|
|
Wraps a dataset to provide batched iteration with optional shuffling.
|
|
Essential for efficient training with mini-batch gradient descent.
|
|
|
|
TODO: Implement DataLoader with batching and shuffling
|
|
|
|
APPROACH:
|
|
1. Store dataset, batch_size, and shuffle settings
|
|
2. Create iterator that groups samples into batches
|
|
3. Handle shuffling by randomizing indices
|
|
4. Collate individual samples into batch tensors
|
|
|
|
EXAMPLE:
|
|
>>> dataset = TensorDataset(Tensor([[1,2], [3,4], [5,6]]), Tensor([0,1,0]))
|
|
>>> loader = DataLoader(dataset, batch_size=2, shuffle=True)
|
|
>>> for batch in loader:
|
|
... features_batch, labels_batch = batch
|
|
... print(f"Features: {features_batch.shape}, Labels: {labels_batch.shape}")
|
|
|
|
HINTS:
|
|
- Use random.shuffle() for index shuffling
|
|
- Group consecutive samples into batches
|
|
- Stack individual tensors using np.stack()
|
|
"""
|
|
|
|
def __init__(self, dataset: Dataset, batch_size: int, shuffle: bool = False):
|
|
"""
|
|
Create DataLoader for batched iteration.
|
|
|
|
Args:
|
|
dataset: Dataset to load from
|
|
batch_size: Number of samples per batch
|
|
shuffle: Whether to shuffle data each epoch
|
|
"""
|
|
### BEGIN SOLUTION
|
|
self.dataset = dataset
|
|
self.batch_size = batch_size
|
|
self.shuffle = shuffle
|
|
### END SOLUTION
|
|
|
|
def __len__(self) -> int:
|
|
"""Return number of batches per epoch."""
|
|
### BEGIN SOLUTION
|
|
# Calculate number of complete batches
|
|
return (len(self.dataset) + self.batch_size - 1) // self.batch_size
|
|
### END SOLUTION
|
|
|
|
def __iter__(self) -> Iterator:
|
|
"""Return iterator over batches."""
|
|
### BEGIN SOLUTION
|
|
# Create list of indices
|
|
indices = list(range(len(self.dataset)))
|
|
|
|
# Shuffle if requested
|
|
if self.shuffle:
|
|
random.shuffle(indices)
|
|
|
|
# Yield batches
|
|
for i in range(0, len(indices), self.batch_size):
|
|
batch_indices = indices[i:i + self.batch_size]
|
|
batch = [self.dataset[idx] for idx in batch_indices]
|
|
|
|
# Collate batch - convert list of tuples to tuple of tensors
|
|
yield self._collate_batch(batch)
|
|
### END SOLUTION
|
|
|
|
def _collate_batch(self, batch: List[Tuple[Tensor, ...]]) -> Tuple[Tensor, ...]:
|
|
"""
|
|
Collate individual samples into batch tensors.
|
|
|
|
Args:
|
|
batch: List of sample tuples from dataset
|
|
|
|
Returns:
|
|
Tuple of batched tensors
|
|
"""
|
|
### BEGIN SOLUTION
|
|
if len(batch) == 0:
|
|
return ()
|
|
|
|
# Determine number of tensors per sample
|
|
num_tensors = len(batch[0])
|
|
|
|
# Group tensors by position
|
|
batched_tensors = []
|
|
for tensor_idx in range(num_tensors):
|
|
# Extract all tensors at this position
|
|
tensor_list = [sample[tensor_idx].data for sample in batch]
|
|
|
|
# Stack into batch tensor
|
|
batched_data = np.stack(tensor_list, axis=0)
|
|
batched_tensors.append(Tensor(batched_data))
|
|
|
|
return tuple(batched_tensors)
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/08_dataloader/08_dataloader.ipynb 12
|
|
class RandomHorizontalFlip:
|
|
"""
|
|
Randomly flip images horizontally with given probability.
|
|
|
|
A simple but effective augmentation for most image datasets.
|
|
Flipping is appropriate when horizontal orientation doesn't change class
|
|
(cats, dogs, cars - not digits or text!).
|
|
|
|
Args:
|
|
p: Probability of flipping (default: 0.5)
|
|
"""
|
|
|
|
def __init__(self, p=0.5):
|
|
"""
|
|
Initialize RandomHorizontalFlip.
|
|
|
|
TODO: Store flip probability
|
|
|
|
EXAMPLE:
|
|
>>> flip = RandomHorizontalFlip(p=0.5) # 50% chance to flip
|
|
"""
|
|
### BEGIN SOLUTION
|
|
if not 0.0 <= p <= 1.0:
|
|
raise ValueError(f"Probability must be between 0 and 1, got {p}")
|
|
self.p = p
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x):
|
|
"""
|
|
Apply random horizontal flip to input.
|
|
|
|
TODO: Implement random horizontal flip
|
|
|
|
APPROACH:
|
|
1. Generate random number in [0, 1)
|
|
2. If random < p, flip horizontally
|
|
3. Otherwise, return unchanged
|
|
|
|
Args:
|
|
x: Input array with shape (..., H, W) or (..., H, W, C)
|
|
Flips along the last-1 axis (width dimension)
|
|
|
|
Returns:
|
|
Flipped or unchanged array (same shape as input)
|
|
|
|
EXAMPLE:
|
|
>>> flip = RandomHorizontalFlip(0.5)
|
|
>>> img = np.array([[1, 2, 3], [4, 5, 6]]) # 2x3 image
|
|
>>> # 50% chance output is [[3, 2, 1], [6, 5, 4]]
|
|
|
|
HINT: Use np.flip(x, axis=-1) to flip along width axis
|
|
"""
|
|
### BEGIN SOLUTION
|
|
if np.random.random() < self.p:
|
|
# Flip along the width axis (last axis for HW format, second-to-last for HWC)
|
|
# Using axis=-1 works for both (..., H, W) and (..., H, W, C)
|
|
if isinstance(x, Tensor):
|
|
return Tensor(np.flip(x.data, axis=-1).copy())
|
|
else:
|
|
return np.flip(x, axis=-1).copy()
|
|
return x
|
|
### END SOLUTION
|
|
|
|
#| export
|
|
|
|
class RandomCrop:
|
|
"""
|
|
Randomly crop image after padding.
|
|
|
|
This is the standard augmentation for CIFAR-10:
|
|
1. Pad image by `padding` pixels on each side
|
|
2. Randomly crop back to original size
|
|
|
|
This simulates small translations in the image, forcing the model
|
|
to recognize objects regardless of their exact position.
|
|
|
|
Args:
|
|
size: Output crop size (int for square, or tuple (H, W))
|
|
padding: Pixels to pad on each side before cropping (default: 4)
|
|
"""
|
|
|
|
def __init__(self, size, padding=4):
|
|
"""
|
|
Initialize RandomCrop.
|
|
|
|
TODO: Store crop parameters
|
|
|
|
EXAMPLE:
|
|
>>> crop = RandomCrop(32, padding=4) # CIFAR-10 standard
|
|
>>> # Pads to 40x40, then crops back to 32x32
|
|
"""
|
|
### BEGIN SOLUTION
|
|
if isinstance(size, int):
|
|
self.size = (size, size)
|
|
else:
|
|
self.size = size
|
|
self.padding = padding
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x):
|
|
"""
|
|
Apply random crop after padding.
|
|
|
|
TODO: Implement random crop with padding
|
|
|
|
APPROACH:
|
|
1. Add zero-padding to all sides
|
|
2. Choose random top-left corner for crop
|
|
3. Extract crop of target size
|
|
|
|
Args:
|
|
x: Input image with shape (C, H, W) or (H, W) or (H, W, C)
|
|
Assumes spatial dimensions are H, W
|
|
|
|
Returns:
|
|
Cropped image with target size
|
|
|
|
EXAMPLE:
|
|
>>> crop = RandomCrop(32, padding=4)
|
|
>>> img = np.random.randn(3, 32, 32) # CIFAR-10 format (C, H, W)
|
|
>>> out = crop(img)
|
|
>>> print(out.shape) # (3, 32, 32)
|
|
|
|
HINTS:
|
|
- Use np.pad for adding zeros
|
|
- Handle both (C, H, W) and (H, W) formats
|
|
- Random offsets should be in [0, 2*padding]
|
|
"""
|
|
### BEGIN SOLUTION
|
|
is_tensor = isinstance(x, Tensor)
|
|
data = x.data if is_tensor else x
|
|
|
|
target_h, target_w = self.size
|
|
|
|
# Determine image format and dimensions
|
|
if len(data.shape) == 2:
|
|
# (H, W) format
|
|
h, w = data.shape
|
|
padded = np.pad(data, self.padding, mode='constant', constant_values=0)
|
|
|
|
# Random crop position
|
|
top = np.random.randint(0, 2 * self.padding + h - target_h + 1)
|
|
left = np.random.randint(0, 2 * self.padding + w - target_w + 1)
|
|
|
|
cropped = padded[top:top + target_h, left:left + target_w]
|
|
|
|
elif len(data.shape) == 3:
|
|
if data.shape[0] <= 4: # Likely (C, H, W) format
|
|
c, h, w = data.shape
|
|
# Pad only spatial dimensions
|
|
padded = np.pad(data,
|
|
((0, 0), (self.padding, self.padding), (self.padding, self.padding)),
|
|
mode='constant', constant_values=0)
|
|
|
|
# Random crop position
|
|
top = np.random.randint(0, 2 * self.padding + 1)
|
|
left = np.random.randint(0, 2 * self.padding + 1)
|
|
|
|
cropped = padded[:, top:top + target_h, left:left + target_w]
|
|
else: # Likely (H, W, C) format
|
|
h, w, c = data.shape
|
|
padded = np.pad(data,
|
|
((self.padding, self.padding), (self.padding, self.padding), (0, 0)),
|
|
mode='constant', constant_values=0)
|
|
|
|
top = np.random.randint(0, 2 * self.padding + 1)
|
|
left = np.random.randint(0, 2 * self.padding + 1)
|
|
|
|
cropped = padded[top:top + target_h, left:left + target_w, :]
|
|
else:
|
|
raise ValueError(f"Expected 2D or 3D input, got shape {data.shape}")
|
|
|
|
return Tensor(cropped) if is_tensor else cropped
|
|
### END SOLUTION
|
|
|
|
#| export
|
|
|
|
class Compose:
|
|
"""
|
|
Compose multiple transforms into a pipeline.
|
|
|
|
Applies transforms in sequence, passing output of each
|
|
as input to the next.
|
|
|
|
Args:
|
|
transforms: List of transform callables
|
|
"""
|
|
|
|
def __init__(self, transforms):
|
|
"""
|
|
Initialize Compose with list of transforms.
|
|
|
|
EXAMPLE:
|
|
>>> transforms = Compose([
|
|
... RandomHorizontalFlip(0.5),
|
|
... RandomCrop(32, padding=4)
|
|
... ])
|
|
"""
|
|
self.transforms = transforms
|
|
|
|
def __call__(self, x):
|
|
"""Apply all transforms in sequence."""
|
|
for transform in self.transforms:
|
|
x = transform(x)
|
|
return x
|