# ╔═══════════════════════════════════════════════════════════════════════════════╗ # ║ 🚨 CRITICAL WARNING 🚨 ║ # ║ AUTOGENERATED! DO NOT EDIT! ║ # ║ ║ # ║ This file is AUTOMATICALLY GENERATED from source modules. ║ # ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║ # ║ ║ # ║ ✅ TO EDIT: modules/source/02_tensor/tensor_dev.py ║ # ║ ✅ TO EXPORT: Run 'tito module complete ' ║ # ║ ║ # ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║ # ║ Editing it directly may break module functionality and training. ║ # ║ ║ # ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║ # ║ happens! The tinytorch/ directory is just the compiled output. ║ # ╚═══════════════════════════════════════════════════════════════════════════════╝ # %% auto 0 __all__ = ['Tensor'] # %% ../../modules/source/01_tensor/tensor_dev.ipynb 1 import numpy as np # %% ../../modules/source/01_tensor/tensor_dev.ipynb 6 class Tensor: """Educational tensor that grows with student knowledge. This class starts simple but includes dormant features for future modules: - requires_grad: Will be used for automatic differentiation (Module 05) - grad: Will store computed gradients (Module 05) - backward(): Will compute gradients (Module 05) For now, focus on: data, shape, and basic operations. """ def __init__(self, data, requires_grad=False): """ Create a new tensor from data. TODO: Initialize tensor attributes APPROACH: 1. Convert data to NumPy array - handles lists, scalars, etc. 2. Store shape and size for quick access 3. Set up gradient tracking (dormant until Module 05) EXAMPLE: >>> tensor = Tensor([1, 2, 3]) >>> print(tensor.data) [1 2 3] >>> print(tensor.shape) (3,) HINT: np.array() handles type conversion automatically """ ### BEGIN SOLUTION # Core tensor data - always present self.data = np.array(data, dtype=np.float32) # Consistent float32 for ML self.shape = self.data.shape self.size = self.data.size self.dtype = self.data.dtype # Gradient features (dormant until Module 05) self.requires_grad = requires_grad self.grad = None ### END SOLUTION def __repr__(self): """String representation of tensor for debugging.""" grad_info = f", requires_grad={self.requires_grad}" if self.requires_grad else "" return f"Tensor(data={self.data}, shape={self.shape}{grad_info})" def __str__(self): """Human-readable string representation.""" return f"Tensor({self.data})" def numpy(self): """Return the underlying NumPy array.""" return self.data # nbgrader={\"grade\": false, \"grade_id\": \"addition-impl\", \"solution\": true} def __add__(self, other): """ Add two tensors element-wise with broadcasting support. TODO: Implement tensor addition with automatic broadcasting APPROACH: 1. Handle both Tensor and scalar inputs 2. Use NumPy's broadcasting for automatic shape alignment 3. Return new Tensor with result (don't modify self) EXAMPLE: >>> a = Tensor([1, 2, 3]) >>> b = Tensor([4, 5, 6]) >>> result = a + b >>> print(result.data) [5. 7. 9.] BROADCASTING EXAMPLE: >>> matrix = Tensor([[1, 2], [3, 4]]) # Shape: (2, 2) >>> vector = Tensor([10, 20]) # Shape: (2,) >>> result = matrix + vector # Broadcasting: (2,2) + (2,) → (2,2) >>> print(result.data) [[11. 22.] [13. 24.]] HINTS: - Use isinstance() to check if other is a Tensor - NumPy handles broadcasting automatically with + - Always return a new Tensor, don't modify self - Preserve gradient tracking for future modules """ ### BEGIN SOLUTION if isinstance(other, Tensor): # Tensor + Tensor: let NumPy handle broadcasting result_data = self.data + other.data else: # Tensor + scalar: NumPy broadcasts automatically result_data = self.data + other # Create new tensor with result result = Tensor(result_data) # Preserve gradient tracking if either operand requires gradients if hasattr(self, 'requires_grad') and hasattr(other, 'requires_grad'): result.requires_grad = self.requires_grad or (isinstance(other, Tensor) and other.requires_grad) elif hasattr(self, 'requires_grad'): result.requires_grad = self.requires_grad return result ### END SOLUTION # nbgrader={"grade": false, "grade_id": "more-arithmetic", "solution": true} def __sub__(self, other): """ Subtract two tensors element-wise. Common use: Centering data (x - mean), computing differences for loss functions. """ if isinstance(other, Tensor): return Tensor(self.data - other.data) else: return Tensor(self.data - other) def __mul__(self, other): """ Multiply two tensors element-wise (NOT matrix multiplication). Common use: Scaling features, applying masks, gating mechanisms in neural networks. Note: This is * operator, not @ (which will be matrix multiplication). """ if isinstance(other, Tensor): return Tensor(self.data * other.data) else: return Tensor(self.data * other) def __truediv__(self, other): """ Divide two tensors element-wise. Common use: Normalization (x / std), converting counts to probabilities. """ if isinstance(other, Tensor): return Tensor(self.data / other.data) else: return Tensor(self.data / other) # nbgrader={"grade": false, "grade_id": "matmul-impl", "solution": true} def matmul(self, other): """ Matrix multiplication of two tensors. TODO: Implement matrix multiplication using np.dot with proper validation APPROACH: 1. Validate inputs are Tensors 2. Check dimension compatibility (inner dimensions must match) 3. Use np.dot for optimized computation 4. Return new Tensor with result EXAMPLE: >>> a = Tensor([[1, 2], [3, 4]]) # 2×2 >>> b = Tensor([[5, 6], [7, 8]]) # 2×2 >>> result = a.matmul(b) # 2×2 result >>> # Result: [[1×5+2×7, 1×6+2×8], [3×5+4×7, 3×6+4×8]] = [[19, 22], [43, 50]] SHAPE RULES: - (M, K) @ (K, N) → (M, N) ✓ Valid - (M, K) @ (J, N) → Error ✗ K ≠ J COMPLEXITY: O(M×N×K) for (M×K) @ (K×N) matrices HINTS: - np.dot handles the optimization for us - Check self.shape[-1] == other.shape[-2] for compatibility - Provide clear error messages for debugging """ ### BEGIN SOLUTION if not isinstance(other, Tensor): raise TypeError(f"Expected Tensor for matrix multiplication, got {type(other)}") # Handle edge cases if self.shape == () or other.shape == (): # Scalar multiplication return Tensor(self.data * other.data) # For matrix multiplication, we need at least 1D tensors if len(self.shape) == 0 or len(other.shape) == 0: return Tensor(self.data * other.data) # Check dimension compatibility for matrix multiplication if len(self.shape) >= 2 and len(other.shape) >= 2: if self.shape[-1] != other.shape[-2]: raise ValueError( f"Cannot perform matrix multiplication: {self.shape} @ {other.shape}. " f"Inner dimensions must match: {self.shape[-1]} ≠ {other.shape[-2]}. " f"💡 HINT: For (M,K) @ (K,N) → (M,N), the K dimensions must be equal." ) elif len(self.shape) == 1 and len(other.shape) == 2: # Vector @ Matrix if self.shape[0] != other.shape[0]: raise ValueError( f"Cannot multiply vector {self.shape} with matrix {other.shape}. " f"Vector length {self.shape[0]} must match matrix rows {other.shape[0]}." ) elif len(self.shape) == 2 and len(other.shape) == 1: # Matrix @ Vector if self.shape[1] != other.shape[0]: raise ValueError( f"Cannot multiply matrix {self.shape} with vector {other.shape}. " f"Matrix columns {self.shape[1]} must match vector length {other.shape[0]}." ) # Perform optimized matrix multiplication result_data = np.dot(self.data, other.data) return Tensor(result_data) ### END SOLUTION # nbgrader={"grade": false, "grade_id": "shape-ops", "solution": true} def reshape(self, *shape): """ Reshape tensor to new dimensions. TODO: Implement tensor reshaping with validation APPROACH: 1. Handle different calling conventions: reshape(2, 3) vs reshape((2, 3)) 2. Validate total elements remain the same 3. Use NumPy's reshape for the actual operation 4. Return new Tensor (keep immutability) EXAMPLE: >>> tensor = Tensor([1, 2, 3, 4, 5, 6]) # Shape: (6,) >>> reshaped = tensor.reshape(2, 3) # Shape: (2, 3) >>> print(reshaped.data) [[1. 2. 3.] [4. 5. 6.]] COMMON USAGE: >>> # Flatten for MLP input >>> image = Tensor(np.random.rand(3, 32, 32)) # (channels, height, width) >>> flattened = image.reshape(-1) # (3072,) - all pixels in vector >>> >>> # Prepare batch for convolution >>> batch = Tensor(np.random.rand(32, 784)) # (batch, features) >>> images = batch.reshape(32, 1, 28, 28) # (batch, channels, height, width) HINTS: - Handle both reshape(2, 3) and reshape((2, 3)) calling styles - Check np.prod(new_shape) == self.size for validation - Use descriptive error messages for debugging """ ### BEGIN SOLUTION # Handle both reshape(2, 3) and reshape((2, 3)) calling conventions if len(shape) == 1 and isinstance(shape[0], (tuple, list)): new_shape = tuple(shape[0]) else: new_shape = shape # Handle -1 for automatic dimension inference (like NumPy) if -1 in new_shape: if new_shape.count(-1) > 1: raise ValueError("Can only specify one unknown dimension with -1") # Calculate the unknown dimension known_size = 1 unknown_idx = new_shape.index(-1) for i, dim in enumerate(new_shape): if i != unknown_idx: known_size *= dim unknown_dim = self.size // known_size new_shape = list(new_shape) new_shape[unknown_idx] = unknown_dim new_shape = tuple(new_shape) # Validate total elements remain the same if np.prod(new_shape) != self.size: raise ValueError( f"Cannot reshape tensor of size {self.size} to shape {new_shape}. " f"Total elements must match: {self.size} ≠ {np.prod(new_shape)}. " f"💡 HINT: Make sure new_shape dimensions multiply to {self.size}" ) # Reshape the data (NumPy handles the memory layout efficiently) reshaped_data = np.reshape(self.data, new_shape) # Create output tensor preserving gradient tracking result = Tensor(reshaped_data, requires_grad=self.requires_grad) # Set up backward function for autograd if self.requires_grad: from tinytorch.core.autograd import ReshapeBackward result._grad_fn = ReshapeBackward() result._grad_fn.saved_tensors = (self,) return result ### END SOLUTION def transpose(self, dim0=None, dim1=None): """ Transpose tensor dimensions. TODO: Implement tensor transposition APPROACH: 1. Handle default case (transpose last two dimensions) 2. Handle specific dimension swapping 3. Use NumPy's transpose with proper axis specification 4. Return new Tensor EXAMPLE: >>> matrix = Tensor([[1, 2, 3], [4, 5, 6]]) # (2, 3) >>> transposed = matrix.transpose() # (3, 2) >>> print(transposed.data) [[1. 4.] [2. 5.] [3. 6.]] NEURAL NETWORK USAGE: >>> # Weight matrix transpose for backward pass >>> W = Tensor([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (3, 2) >>> W_T = W.transpose() # (2, 3) - for gradient computation >>> >>> # Attention mechanism >>> Q = Tensor([[1, 2], [3, 4]]) # queries (2, 2) >>> K = Tensor([[5, 6], [7, 8]]) # keys (2, 2) >>> attention_scores = Q.matmul(K.transpose()) # Q @ K^T HINTS: - Default: transpose last two dimensions (most common case) - Use np.transpose() with axes parameter - Handle 1D tensors gracefully (transpose is identity) """ ### BEGIN SOLUTION if dim0 is None and dim1 is None: # Default: transpose last two dimensions if len(self.shape) < 2: # For 1D tensors, transpose is identity operation return Tensor(self.data.copy()) else: # Transpose last two dimensions (most common in ML) axes = list(range(len(self.shape))) axes[-2], axes[-1] = axes[-1], axes[-2] transposed_data = np.transpose(self.data, axes) else: # Specific dimensions to transpose if dim0 is None or dim1 is None: raise ValueError("Both dim0 and dim1 must be specified for specific dimension transpose") # Validate dimensions exist if dim0 >= len(self.shape) or dim1 >= len(self.shape) or dim0 < 0 or dim1 < 0: raise ValueError( f"Dimension out of range for tensor with shape {self.shape}. " f"Got dim0={dim0}, dim1={dim1}, but tensor has {len(self.shape)} dimensions." ) # Create axes list and swap the specified dimensions axes = list(range(len(self.shape))) axes[dim0], axes[dim1] = axes[dim1], axes[dim0] transposed_data = np.transpose(self.data, axes) return Tensor(transposed_data) ### END SOLUTION # nbgrader={"grade": false, "grade_id": "reduction-ops", "solution": true} def sum(self, axis=None, keepdims=False): """ Sum tensor along specified axis. TODO: Implement tensor sum with axis control APPROACH: 1. Use NumPy's sum with axis parameter 2. Handle axis=None (sum all elements) vs specific axis 3. Support keepdims to maintain shape for broadcasting 4. Return new Tensor with result EXAMPLE: >>> tensor = Tensor([[1, 2], [3, 4]]) >>> total = tensor.sum() # Sum all elements: 10 >>> col_sum = tensor.sum(axis=0) # Sum columns: [4, 6] >>> row_sum = tensor.sum(axis=1) # Sum rows: [3, 7] NEURAL NETWORK USAGE: >>> # Batch loss computation >>> batch_losses = Tensor([0.1, 0.3, 0.2, 0.4]) # Individual losses >>> total_loss = batch_losses.sum() # Total: 1.0 >>> avg_loss = batch_losses.mean() # Average: 0.25 >>> >>> # Global average pooling >>> feature_maps = Tensor(np.random.rand(32, 256, 7, 7)) # (batch, channels, h, w) >>> global_features = feature_maps.sum(axis=(2, 3)) # (batch, channels) HINTS: - np.sum handles all the complexity for us - axis=None sums all elements (returns scalar) - axis=0 sums along first dimension, axis=1 along second, etc. - keepdims=True preserves dimensions for broadcasting """ ### BEGIN SOLUTION result = np.sum(self.data, axis=axis, keepdims=keepdims) return Tensor(result) ### END SOLUTION def mean(self, axis=None, keepdims=False): """ Compute mean of tensor along specified axis. Common usage: Batch normalization, loss averaging, global pooling. """ ### BEGIN SOLUTION result = np.mean(self.data, axis=axis, keepdims=keepdims) return Tensor(result) ### END SOLUTION def max(self, axis=None, keepdims=False): """ Find maximum values along specified axis. Common usage: Max pooling, finding best predictions, activation clipping. """ ### BEGIN SOLUTION result = np.max(self.data, axis=axis, keepdims=keepdims) return Tensor(result) ### END SOLUTION # nbgrader={"grade": false, "grade_id": "gradient-placeholder", "solution": true} def backward(self): """ Compute gradients (implemented in Module 05: Autograd). TODO: Placeholder implementation for gradient computation STUDENT NOTE: This method exists but does nothing until Module 05: Autograd. Don't worry about it for now - focus on the basic tensor operations. In Module 05, we'll implement: - Gradient computation via chain rule - Automatic differentiation - Backpropagation through operations - Computation graph construction FUTURE IMPLEMENTATION PREVIEW: ```python def backward(self, gradient=None): # Module 05 will implement: # 1. Set gradient for this tensor # 2. Propagate to parent operations # 3. Apply chain rule recursively # 4. Accumulate gradients properly pass ``` CURRENT BEHAVIOR: >>> x = Tensor([1, 2, 3], requires_grad=True) >>> y = x * 2 >>> y.sum().backward() # Calls this method - does nothing >>> print(x.grad) # Still None None """ ### BEGIN SOLUTION # Placeholder - will be implemented in Module 05 # For now, just ensure it doesn't crash when called # This allows students to experiment with gradient syntax # without getting confusing errors about missing methods pass ### END SOLUTION