# ╔═══════════════════════════════════════════════════════════════════════════════╗ # ║ 🚨 CRITICAL WARNING 🚨 ║ # ║ AUTOGENERATED! DO NOT EDIT! ║ # ║ ║ # ║ This file is AUTOMATICALLY GENERATED from source modules. ║ # ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ # ║ ║ # ║ ✅ TO EDIT: src/XX_loader/XX_loader.py ║ # ║ ✅ TO EXPORT: Run 'tito module complete ' ║ # ║ ║ # ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ # ║ Editing it directly may break module functionality and training. ║ # ║ ║ # ║ 🎓 LEARNING TIP: Work in src/ (developers) or modules/ (learners) ║ # ║ The tinytorch/ directory is generated code - edit source files instead! ║ # ╚═══════════════════════════════════════════════════════════════════════════════╝ # %% auto 0 __all__ = ['Dataset', 'TensorDataset', 'DataLoader', 'RandomHorizontalFlip', 'RandomCrop', 'Compose'] # %% ../../modules/08_dataloader/08_dataloader.ipynb 0 #| default_exp data.loader #| export # %% ../../modules/08_dataloader/08_dataloader.ipynb 2 # Essential imports for data loading import numpy as np import random import time import sys from typing import Iterator, Tuple, List, Optional, Union from abc import ABC, abstractmethod # Import real Tensor class from tinytorch package from ..core.tensor import Tensor # %% ../../modules/08_dataloader/08_dataloader.ipynb 4 class Dataset(ABC): """ Abstract base class for all datasets. Provides the fundamental interface that all datasets must implement: - __len__(): Returns the total number of samples - __getitem__(idx): Returns the sample at given index TODO: Implement the abstract Dataset base class APPROACH: 1. Use ABC (Abstract Base Class) to define interface 2. Mark methods as @abstractmethod to force implementation 3. Provide clear docstrings for subclasses EXAMPLE: >>> class MyDataset(Dataset): ... def __len__(self): return 100 ... def __getitem__(self, idx): return idx >>> dataset = MyDataset() >>> print(len(dataset)) # 100 >>> print(dataset[42]) # 42 HINT: Abstract methods force subclasses to implement core functionality """ ### BEGIN SOLUTION @abstractmethod def __len__(self) -> int: """ Return the total number of samples in the dataset. This method must be implemented by all subclasses to enable len(dataset) calls and batch size calculations. """ pass @abstractmethod def __getitem__(self, idx: int): """ Return the sample at the given index. Args: idx: Index of the sample to retrieve (0 <= idx < len(dataset)) Returns: The sample at index idx. Format depends on the dataset implementation. Could be (data, label) tuple, single tensor, etc. """ pass ### END SOLUTION # %% ../../modules/08_dataloader/08_dataloader.ipynb 7 class TensorDataset(Dataset): """ Dataset wrapping tensors for supervised learning. Each sample is a tuple of tensors from the same index across all input tensors. All tensors must have the same size in their first dimension. TODO: Implement TensorDataset for tensor-based data APPROACH: 1. Store all input tensors 2. Validate they have same first dimension (number of samples) 3. Return tuple of tensor slices for each index EXAMPLE: >>> features = Tensor([[1, 2], [3, 4], [5, 6]]) # 3 samples, 2 features each >>> labels = Tensor([0, 1, 0]) # 3 labels >>> dataset = TensorDataset(features, labels) >>> print(len(dataset)) # 3 >>> print(dataset[1]) # (Tensor([3, 4]), Tensor(1)) HINTS: - Use *tensors to accept variable number of tensor arguments - Check all tensors have same length in dimension 0 - Return tuple of tensor[idx] for all tensors """ def __init__(self, *tensors): """ Create dataset from multiple tensors. Args: *tensors: Variable number of Tensor objects All tensors must have the same size in their first dimension. """ ### BEGIN SOLUTION assert len(tensors) > 0, "Must provide at least one tensor" # Store all tensors self.tensors = tensors # Validate all tensors have same first dimension first_size = len(tensors[0].data) # Size of first dimension for i, tensor in enumerate(tensors): if len(tensor.data) != first_size: raise ValueError( f"All tensors must have same size in first dimension. " f"Tensor 0: {first_size}, Tensor {i}: {len(tensor.data)}" ) ### END SOLUTION def __len__(self) -> int: """Return number of samples (size of first dimension).""" ### BEGIN SOLUTION return len(self.tensors[0].data) ### END SOLUTION def __getitem__(self, idx: int) -> Tuple[Tensor, ...]: """ Return tuple of tensor slices at given index. Args: idx: Sample index Returns: Tuple containing tensor[idx] for each input tensor """ ### BEGIN SOLUTION if idx >= len(self) or idx < 0: raise IndexError(f"Index {idx} out of range for dataset of size {len(self)}") # Return tuple of slices from all tensors return tuple(Tensor(tensor.data[idx]) for tensor in self.tensors) ### END SOLUTION # %% ../../modules/08_dataloader/08_dataloader.ipynb 10 class DataLoader: """ Data loader with batching and shuffling support. Wraps a dataset to provide batched iteration with optional shuffling. Essential for efficient training with mini-batch gradient descent. TODO: Implement DataLoader with batching and shuffling APPROACH: 1. Store dataset, batch_size, and shuffle settings 2. Create iterator that groups samples into batches 3. Handle shuffling by randomizing indices 4. Collate individual samples into batch tensors EXAMPLE: >>> dataset = TensorDataset(Tensor([[1,2], [3,4], [5,6]]), Tensor([0,1,0])) >>> loader = DataLoader(dataset, batch_size=2, shuffle=True) >>> for batch in loader: ... features_batch, labels_batch = batch ... print(f"Features: {features_batch.shape}, Labels: {labels_batch.shape}") HINTS: - Use random.shuffle() for index shuffling - Group consecutive samples into batches - Stack individual tensors using np.stack() """ def __init__(self, dataset: Dataset, batch_size: int, shuffle: bool = False): """ Create DataLoader for batched iteration. Args: dataset: Dataset to load from batch_size: Number of samples per batch shuffle: Whether to shuffle data each epoch """ ### BEGIN SOLUTION self.dataset = dataset self.batch_size = batch_size self.shuffle = shuffle ### END SOLUTION def __len__(self) -> int: """Return number of batches per epoch.""" ### BEGIN SOLUTION # Calculate number of complete batches return (len(self.dataset) + self.batch_size - 1) // self.batch_size ### END SOLUTION def __iter__(self) -> Iterator: """Return iterator over batches.""" ### BEGIN SOLUTION # Create list of indices indices = list(range(len(self.dataset))) # Shuffle if requested if self.shuffle: random.shuffle(indices) # Yield batches for i in range(0, len(indices), self.batch_size): batch_indices = indices[i:i + self.batch_size] batch = [self.dataset[idx] for idx in batch_indices] # Collate batch - convert list of tuples to tuple of tensors yield self._collate_batch(batch) ### END SOLUTION def _collate_batch(self, batch: List[Tuple[Tensor, ...]]) -> Tuple[Tensor, ...]: """ Collate individual samples into batch tensors. Args: batch: List of sample tuples from dataset Returns: Tuple of batched tensors """ ### BEGIN SOLUTION if len(batch) == 0: return () # Determine number of tensors per sample num_tensors = len(batch[0]) # Group tensors by position batched_tensors = [] for tensor_idx in range(num_tensors): # Extract all tensors at this position tensor_list = [sample[tensor_idx].data for sample in batch] # Stack into batch tensor batched_data = np.stack(tensor_list, axis=0) batched_tensors.append(Tensor(batched_data)) return tuple(batched_tensors) ### END SOLUTION # %% ../../modules/08_dataloader/08_dataloader.ipynb 12 class RandomHorizontalFlip: """ Randomly flip images horizontally with given probability. A simple but effective augmentation for most image datasets. Flipping is appropriate when horizontal orientation doesn't change class (cats, dogs, cars - not digits or text!). Args: p: Probability of flipping (default: 0.5) """ def __init__(self, p=0.5): """ Initialize RandomHorizontalFlip. TODO: Store flip probability EXAMPLE: >>> flip = RandomHorizontalFlip(p=0.5) # 50% chance to flip """ ### BEGIN SOLUTION if not 0.0 <= p <= 1.0: raise ValueError(f"Probability must be between 0 and 1, got {p}") self.p = p ### END SOLUTION def __call__(self, x): """ Apply random horizontal flip to input. TODO: Implement random horizontal flip APPROACH: 1. Generate random number in [0, 1) 2. If random < p, flip horizontally 3. Otherwise, return unchanged Args: x: Input array with shape (..., H, W) or (..., H, W, C) Flips along the last-1 axis (width dimension) Returns: Flipped or unchanged array (same shape as input) EXAMPLE: >>> flip = RandomHorizontalFlip(0.5) >>> img = np.array([[1, 2, 3], [4, 5, 6]]) # 2x3 image >>> # 50% chance output is [[3, 2, 1], [6, 5, 4]] HINT: Use np.flip(x, axis=-1) to flip along width axis """ ### BEGIN SOLUTION if np.random.random() < self.p: # Flip along the width axis (last axis for HW format, second-to-last for HWC) # Using axis=-1 works for both (..., H, W) and (..., H, W, C) if isinstance(x, Tensor): return Tensor(np.flip(x.data, axis=-1).copy()) else: return np.flip(x, axis=-1).copy() return x ### END SOLUTION #| export class RandomCrop: """ Randomly crop image after padding. This is the standard augmentation for CIFAR-10: 1. Pad image by `padding` pixels on each side 2. Randomly crop back to original size This simulates small translations in the image, forcing the model to recognize objects regardless of their exact position. Args: size: Output crop size (int for square, or tuple (H, W)) padding: Pixels to pad on each side before cropping (default: 4) """ def __init__(self, size, padding=4): """ Initialize RandomCrop. TODO: Store crop parameters EXAMPLE: >>> crop = RandomCrop(32, padding=4) # CIFAR-10 standard >>> # Pads to 40x40, then crops back to 32x32 """ ### BEGIN SOLUTION if isinstance(size, int): self.size = (size, size) else: self.size = size self.padding = padding ### END SOLUTION def __call__(self, x): """ Apply random crop after padding. TODO: Implement random crop with padding APPROACH: 1. Add zero-padding to all sides 2. Choose random top-left corner for crop 3. Extract crop of target size Args: x: Input image with shape (C, H, W) or (H, W) or (H, W, C) Assumes spatial dimensions are H, W Returns: Cropped image with target size EXAMPLE: >>> crop = RandomCrop(32, padding=4) >>> img = np.random.randn(3, 32, 32) # CIFAR-10 format (C, H, W) >>> out = crop(img) >>> print(out.shape) # (3, 32, 32) HINTS: - Use np.pad for adding zeros - Handle both (C, H, W) and (H, W) formats - Random offsets should be in [0, 2*padding] """ ### BEGIN SOLUTION is_tensor = isinstance(x, Tensor) data = x.data if is_tensor else x target_h, target_w = self.size # Determine image format and dimensions if len(data.shape) == 2: # (H, W) format h, w = data.shape padded = np.pad(data, self.padding, mode='constant', constant_values=0) # Random crop position top = np.random.randint(0, 2 * self.padding + h - target_h + 1) left = np.random.randint(0, 2 * self.padding + w - target_w + 1) cropped = padded[top:top + target_h, left:left + target_w] elif len(data.shape) == 3: if data.shape[0] <= 4: # Likely (C, H, W) format c, h, w = data.shape # Pad only spatial dimensions padded = np.pad(data, ((0, 0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant', constant_values=0) # Random crop position top = np.random.randint(0, 2 * self.padding + 1) left = np.random.randint(0, 2 * self.padding + 1) cropped = padded[:, top:top + target_h, left:left + target_w] else: # Likely (H, W, C) format h, w, c = data.shape padded = np.pad(data, ((self.padding, self.padding), (self.padding, self.padding), (0, 0)), mode='constant', constant_values=0) top = np.random.randint(0, 2 * self.padding + 1) left = np.random.randint(0, 2 * self.padding + 1) cropped = padded[top:top + target_h, left:left + target_w, :] else: raise ValueError(f"Expected 2D or 3D input, got shape {data.shape}") return Tensor(cropped) if is_tensor else cropped ### END SOLUTION #| export class Compose: """ Compose multiple transforms into a pipeline. Applies transforms in sequence, passing output of each as input to the next. Args: transforms: List of transform callables """ def __init__(self, transforms): """ Initialize Compose with list of transforms. EXAMPLE: >>> transforms = Compose([ ... RandomHorizontalFlip(0.5), ... RandomCrop(32, padding=4) ... ]) """ self.transforms = transforms def __call__(self, x): """Apply all transforms in sequence.""" for transform in self.transforms: x = transform(x) return x