# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/02_tensor/tensor_dev.ipynb. # %% auto 0 __all__ = ['Tensor', 'Parameter'] # %% ../../modules/02_tensor/tensor_dev.ipynb 1 import numpy as np import sys from typing import Union, Tuple, Optional, Any # %% ../../modules/02_tensor/tensor_dev.ipynb 3 class Tensor: """ TinyTorch Tensor: N-dimensional array with ML operations. The fundamental data structure for all TinyTorch operations. Wraps NumPy arrays with ML-specific functionality. """ def __init__(self, data: Any, dtype: Optional[str] = None, requires_grad: bool = False): """ Create a new tensor from data. Args: data: Input data (scalar, list, or numpy array) dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect. requires_grad: Whether this tensor needs gradients for training. Defaults to False. TODO: Implement tensor creation with proper type handling. STEP-BY-STEP: 1. Check if data is a scalar (int/float) - convert to numpy array 2. Check if data is a list - convert to numpy array 3. Check if data is already a numpy array - use as-is 4. Apply dtype conversion if specified 5. Store the result in self._data EXAMPLE: Tensor(5) → stores np.array(5) Tensor([1, 2, 3]) → stores np.array([1, 2, 3]) Tensor(np.array([1, 2, 3])) → stores the array directly HINTS: - Use isinstance() to check data types - Use np.array() for conversion - Handle dtype parameter for type conversion - Store the array in self._data """ ### BEGIN SOLUTION # Convert input to numpy array if isinstance(data, (int, float, np.number)): # Handle Python and NumPy scalars if dtype is None: # Auto-detect type: int for integers, float32 for floats if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)): dtype = 'int32' else: dtype = 'float32' self._data = np.array(data, dtype=dtype) elif isinstance(data, list): # Let NumPy auto-detect type, then convert if needed temp_array = np.array(data) if dtype is None: # Use NumPy's auto-detected type, but prefer float32 for floats if temp_array.dtype == np.float64: dtype = 'float32' else: dtype = str(temp_array.dtype) self._data = np.array(data, dtype=dtype) elif isinstance(data, np.ndarray): # Already a numpy array if dtype is None: # Keep existing dtype, but prefer float32 for float64 if data.dtype == np.float64: dtype = 'float32' else: dtype = str(data.dtype) self._data = data.astype(dtype) if dtype != data.dtype else data.copy() elif isinstance(data, Tensor): # Input is another Tensor - extract its data if dtype is None: # Keep existing dtype, but prefer float32 for float64 if data.data.dtype == np.float64: dtype = 'float32' else: dtype = str(data.data.dtype) self._data = data.data.astype(dtype) if dtype != str(data.data.dtype) else data.data.copy() else: # Try to convert unknown types self._data = np.array(data, dtype=dtype) # Initialize gradient tracking attributes self.requires_grad = requires_grad self.grad = None if requires_grad else None self._grad_fn = None ### END SOLUTION @property def data(self) -> np.ndarray: """ Access underlying numpy array. TODO: Return the stored numpy array. STEP-BY-STEP IMPLEMENTATION: 1. Access the internal _data attribute 2. Return the numpy array directly 3. This provides access to underlying data for NumPy operations LEARNING CONNECTIONS: Real-world relevance: - PyTorch: tensor.numpy() converts to NumPy for visualization/analysis - TensorFlow: tensor.numpy() enables integration with scientific Python - Production: Data scientists need to access raw arrays for debugging - Performance: Direct access avoids copying for read-only operations HINT: Return self._data (the array you stored in __init__) """ ### BEGIN SOLUTION return self._data ### END SOLUTION @data.setter def data(self, value: Union[np.ndarray, 'Tensor']) -> None: """ Set the underlying data of the tensor. Args: value: New data (numpy array or Tensor) """ if isinstance(value, Tensor): self._data = value._data.copy() else: self._data = np.array(value) @property def shape(self) -> Tuple[int, ...]: """ Get tensor shape. TODO: Return the shape of the stored numpy array. STEP-BY-STEP IMPLEMENTATION: 1. Access the _data attribute (the NumPy array) 2. Get the shape property from the NumPy array 3. Return the shape tuple directly LEARNING CONNECTIONS: Real-world relevance: - Neural networks: Layer compatibility requires matching shapes - Computer vision: Image shape (height, width, channels) determines architecture - NLP: Sequence length and vocabulary size affect model design - Debugging: Shape mismatches are the #1 cause of ML errors HINT: Use .shape attribute of the numpy array EXAMPLE: Tensor([1, 2, 3]).shape should return (3,) """ ### BEGIN SOLUTION return self._data.shape ### END SOLUTION @property def size(self) -> int: """ Get total number of elements. TODO: Return the total number of elements in the tensor. STEP-BY-STEP IMPLEMENTATION: 1. Access the _data attribute (the NumPy array) 2. Get the size property from the NumPy array 3. Return the total element count as an integer LEARNING CONNECTIONS: Real-world relevance: - Memory planning: Calculate RAM requirements for large tensors - Model architecture: Determine parameter counts for layers - Performance optimization: Size affects computation time - Batch processing: Total elements determines vectorization efficiency HINT: Use .size attribute of the numpy array EXAMPLE: Tensor([1, 2, 3]).size should return 3 """ ### BEGIN SOLUTION return self._data.size ### END SOLUTION @property def dtype(self) -> np.dtype: """ Get data type as numpy dtype. TODO: Return the data type of the stored numpy array. STEP-BY-STEP IMPLEMENTATION: 1. Access the _data attribute (the NumPy array) 2. Get the dtype property from the NumPy array 3. Return the NumPy dtype object directly LEARNING CONNECTIONS: Real-world relevance: - Precision vs speed: float32 is faster, float64 more accurate - Memory optimization: int8 uses 1/4 memory of int32 - GPU compatibility: Some operations only work with specific types - Model deployment: Mobile/edge devices prefer smaller data types HINT: Use .dtype attribute of the numpy array EXAMPLE: Tensor([1, 2, 3]).dtype should return dtype('int32') """ ### BEGIN SOLUTION return self._data.dtype ### END SOLUTION def __repr__(self) -> str: """ String representation. TODO: Create a clear string representation of the tensor. STEP-BY-STEP IMPLEMENTATION: 1. Convert the numpy array to a list using .tolist() 2. Get shape and dtype information from properties 3. Format as "Tensor([data], shape=shape, dtype=dtype)" 4. Return the formatted string LEARNING CONNECTIONS: Real-world relevance: - Debugging: Clear tensor representation speeds debugging - Jupyter notebooks: Good __repr__ improves data exploration - Logging: Production systems log tensor info for monitoring - Education: Students understand tensors better with clear output APPROACH: 1. Convert the numpy array to a list for readable output 2. Include the shape and dtype information 3. Format: "Tensor([data], shape=shape, dtype=dtype)" EXAMPLE: Tensor([1, 2, 3]) → "Tensor([1, 2, 3], shape=(3,), dtype=int32)" HINTS: - Use .tolist() to convert numpy array to list - Include shape and dtype information - Keep format consistent and readable """ ### BEGIN SOLUTION return f"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})" ### END SOLUTION def add(self, other: 'Tensor') -> 'Tensor': """ Add two tensors element-wise. TODO: Implement tensor addition. STEP-BY-STEP IMPLEMENTATION: 1. Extract numpy arrays from both tensors 2. Use NumPy's + operator for element-wise addition 3. Create a new Tensor object with the result 4. Return the new tensor LEARNING CONNECTIONS: Real-world relevance: - Neural networks: Adding bias terms to linear layer outputs - Residual connections: skip connections in ResNet architectures - Gradient updates: Adding computed gradients to parameters - Ensemble methods: Combining predictions from multiple models APPROACH: 1. Add the numpy arrays using + 2. Return a new Tensor with the result 3. Handle broadcasting automatically EXAMPLE: Tensor([1, 2]) + Tensor([3, 4]) → Tensor([4, 6]) HINTS: - Use self._data + other._data - Return Tensor(result) - NumPy handles broadcasting automatically """ ### BEGIN SOLUTION result = self._data + other._data return Tensor(result) ### END SOLUTION def multiply(self, other: 'Tensor') -> 'Tensor': """ Multiply two tensors element-wise. TODO: Implement tensor multiplication. STEP-BY-STEP IMPLEMENTATION: 1. Extract numpy arrays from both tensors 2. Use NumPy's * operator for element-wise multiplication 3. Create a new Tensor object with the result 4. Return the new tensor LEARNING CONNECTIONS: Real-world relevance: - Activation functions: Element-wise operations like ReLU masking - Attention mechanisms: Element-wise scaling in transformer models - Feature scaling: Multiplying features by learned scaling factors - Gating: Element-wise gating in LSTM and GRU cells APPROACH: 1. Multiply the numpy arrays using * 2. Return a new Tensor with the result 3. Handle broadcasting automatically EXAMPLE: Tensor([1, 2]) * Tensor([3, 4]) → Tensor([3, 8]) HINTS: - Use self._data * other._data - Return Tensor(result) - This is element-wise, not matrix multiplication """ ### BEGIN SOLUTION result = self._data * other._data return Tensor(result) ### END SOLUTION def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor': """ Addition operator: tensor + other TODO: Implement + operator for tensors. STEP-BY-STEP IMPLEMENTATION: 1. Check if other is a Tensor object 2. If Tensor, call the add() method directly 3. If scalar, convert to Tensor then call add() 4. Return the result from add() method LEARNING CONNECTIONS: Real-world relevance: - Natural syntax: tensor + scalar enables intuitive code - Broadcasting: Adding scalars to tensors is common in ML - Operator overloading: Python's magic methods enable math-like syntax - API design: Clean interfaces reduce cognitive load for researchers APPROACH: 1. If other is a Tensor, use tensor addition 2. If other is a scalar, convert to Tensor first 3. Return the result EXAMPLE: Tensor([1, 2]) + Tensor([3, 4]) → Tensor([4, 6]) Tensor([1, 2]) + 5 → Tensor([6, 7]) """ ### BEGIN SOLUTION if isinstance(other, Tensor): return self.add(other) else: return self.add(Tensor(other)) ### END SOLUTION def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor': """ Multiplication operator: tensor * other TODO: Implement * operator for tensors. STEP-BY-STEP IMPLEMENTATION: 1. Check if other is a Tensor object 2. If Tensor, call the multiply() method directly 3. If scalar, convert to Tensor then call multiply() 4. Return the result from multiply() method LEARNING CONNECTIONS: Real-world relevance: - Scaling features: tensor * learning_rate for gradient updates - Masking: tensor * mask for attention mechanisms - Regularization: tensor * dropout_mask during training - Normalization: tensor * scale_factor in batch normalization APPROACH: 1. If other is a Tensor, use tensor multiplication 2. If other is a scalar, convert to Tensor first 3. Return the result EXAMPLE: Tensor([1, 2]) * Tensor([3, 4]) → Tensor([3, 8]) Tensor([1, 2]) * 3 → Tensor([3, 6]) """ ### BEGIN SOLUTION if isinstance(other, Tensor): return self.multiply(other) else: return self.multiply(Tensor(other)) ### END SOLUTION def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor': """ Subtraction operator: tensor - other TODO: Implement - operator for tensors. STEP-BY-STEP IMPLEMENTATION: 1. Check if other is a Tensor object 2. If Tensor, subtract other._data from self._data 3. If scalar, subtract scalar directly from self._data 4. Create new Tensor with result and return LEARNING CONNECTIONS: Real-world relevance: - Gradient computation: parameter - learning_rate * gradient - Residual connections: output - skip_connection in some architectures - Error calculation: predicted - actual for loss computation - Centering data: tensor - mean for zero-centered inputs APPROACH: 1. Convert other to Tensor if needed 2. Subtract using numpy arrays 3. Return new Tensor with result EXAMPLE: Tensor([5, 6]) - Tensor([1, 2]) → Tensor([4, 4]) Tensor([5, 6]) - 1 → Tensor([4, 5]) """ ### BEGIN SOLUTION if isinstance(other, Tensor): result = self._data - other._data else: result = self._data - other return Tensor(result) ### END SOLUTION def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor': """ Division operator: tensor / other TODO: Implement / operator for tensors. STEP-BY-STEP IMPLEMENTATION: 1. Check if other is a Tensor object 2. If Tensor, divide self._data by other._data 3. If scalar, divide self._data by scalar directly 4. Create new Tensor with result and return LEARNING CONNECTIONS: Real-world relevance: - Normalization: tensor / std_deviation for standard scaling - Learning rate decay: parameter / decay_factor over time - Probability computation: counts / total_counts for frequencies - Temperature scaling: logits / temperature in softmax functions APPROACH: 1. Convert other to Tensor if needed 2. Divide using numpy arrays 3. Return new Tensor with result EXAMPLE: Tensor([6, 8]) / Tensor([2, 4]) → Tensor([3, 2]) Tensor([6, 8]) / 2 → Tensor([3, 4]) """ ### BEGIN SOLUTION if isinstance(other, Tensor): result = self._data / other._data else: result = self._data / other return Tensor(result) ### END SOLUTION def mean(self, axis=None, dtype=None, out=None, keepdims=False) -> 'Tensor': """ Computes the mean of the tensor's elements. Args: axis: Axis or axes along which the means are computed. dtype: Type to use in computing the mean. out: Alternative output array (not supported in TinyTorch). keepdims: If True, the axes which are reduced are left as dimensions with size one. Returns: New tensor with computed means. """ if out is not None: raise NotImplementedError("out parameter not supported in TinyTorch") result = np.mean(self.data, axis=axis, dtype=dtype, keepdims=keepdims) return Tensor(result) def matmul(self, other: 'Tensor') -> 'Tensor': """ Perform matrix multiplication between two tensors using explicit loops. This implementation uses triple-nested loops for educational understanding of the fundamental operations. Module 15 will show the optimization progression from loops → blocking → vectorized operations. TODO: Implement matrix multiplication. STEP-BY-STEP IMPLEMENTATION: 1. Extract numpy arrays from both tensors 2. Check tensor shapes for compatibility 3. Use triple-nested loops for educational understanding 4. Create new Tensor object with the result 5. Return the new tensor LEARNING CONNECTIONS: Real-world relevance: - Linear layers: input @ weight matrices in neural networks - Transformer attention: Q @ K^T for attention scores - CNN convolutions: Implemented as matrix multiplications - Batch processing: Matrix ops enable parallel computation EDUCATIONAL APPROACH: 1. Show every operation explicitly with loops 2. Build understanding before optimizing in Module 15 3. Connect mathematical operations to computational patterns EXAMPLE: Tensor([[1, 2], [3, 4]]) @ Tensor([[5, 6], [7, 8]]) → Tensor([[19, 22], [43, 50]]) HINTS: - This is intentionally simple for education, not optimized - Module 15 will show the progression to high-performance implementations - Understanding loops helps appreciate vectorization benefits """ ### BEGIN SOLUTION # Matrix multiplication using explicit loops for educational understanding a_data = self._data b_data = other._data # Get dimensions and validate compatibility if len(a_data.shape) != 2 or len(b_data.shape) != 2: raise ValueError("matmul requires 2D tensors") m, k = a_data.shape k2, n = b_data.shape if k != k2: raise ValueError(f"Inner dimensions must match: {k} != {k2}") # Initialize result matrix result = np.zeros((m, n), dtype=a_data.dtype) # Triple nested loops - educational, shows every operation # This is intentionally simple to understand the fundamental computation # Module 15 will show the optimization journey: # Step 1 (here): Educational loops - slow but clear # Step 2: Loop blocking for cache efficiency # Step 3: Vectorized operations with NumPy # Step 4: GPU acceleration and BLAS libraries for i in range(m): # For each row in result for j in range(n): # For each column in result for k_idx in range(k): # Dot product: sum over inner dimension result[i, j] += a_data[i, k_idx] * b_data[k_idx, j] return Tensor(result) ### END SOLUTION def __matmul__(self, other: 'Tensor') -> 'Tensor': """ Matrix multiplication operator: tensor @ other Enables the @ operator for matrix multiplication, providing clean syntax for neural network operations. """ return self.matmul(other) def backward(self, gradient=None): """ Compute gradients for this tensor and propagate backward. This is a stub for now - full implementation in Module 09 (Autograd). For now, just accumulates gradients if requires_grad=True. Args: gradient: Gradient from upstream. If None, assumes scalar with grad=1 """ if not self.requires_grad: return if gradient is None: # Scalar case - gradient is 1 gradient = Tensor(np.ones_like(self._data)) # Accumulate gradients if self.grad is None: self.grad = gradient else: self.grad = self.grad + gradient def zero_grad(self): """ Reset gradients to None. Used by optimizers before backward pass. This method is called by optimizers to clear gradients before computing new ones, preventing gradient accumulation across batches. """ self.grad = None def reshape(self, *shape: int) -> 'Tensor': """ Return a new tensor with the same data but different shape. Args: *shape: New shape dimensions. Use -1 for automatic sizing. Returns: New Tensor with reshaped data Example: tensor.reshape(2, -1) # Reshape to 2 rows, auto columns tensor.reshape(4, 3) # Reshape to 4x3 matrix """ reshaped_data = self._data.reshape(*shape) return Tensor(reshaped_data) def numpy(self) -> np.ndarray: """ Convert tensor to NumPy array. This is the PyTorch-inspired method for tensor-to-numpy conversion. Provides clean interface for interoperability with NumPy operations. Returns: NumPy array containing the tensor's data Example: tensor = Tensor([1, 2, 3]) array = tensor.numpy() # Get NumPy array for scientific computing """ return self._data def __array__(self, dtype=None) -> np.ndarray: """ NumPy array protocol implementation. This enables NumPy functions to work directly with Tensor objects by automatically converting them to arrays when needed. This is the key method that fixes np.allclose() compatibility! Args: dtype: Optional dtype to cast to (NumPy may request this) Returns: The underlying NumPy array, optionally cast to requested dtype Examples: tensor = Tensor([1, 2, 3]) np.sum(tensor) # Works automatically np.allclose(tensor, [1, 2, 3]) # Now works! """ if dtype is not None: return self._data.astype(dtype) return self._data def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): """ NumPy universal function protocol implementation. This enables NumPy ufuncs to work with Tensor objects by converting them to arrays first, then wrapping results back in Tensor objects. This fixes advanced NumPy operations like np.maximum, np.minimum, etc. """ # Convert Tensor inputs to NumPy arrays args = [] for input_ in inputs: if isinstance(input_, Tensor): args.append(input_._data) else: args.append(input_) # Call the ufunc on NumPy arrays outputs = getattr(ufunc, method)(*args, **kwargs) # If method returns NotImplemented, let NumPy handle it if outputs is NotImplemented: return NotImplemented # Wrap result back in Tensor if appropriate if method == '__call__': if isinstance(outputs, np.ndarray): return Tensor(outputs) elif isinstance(outputs, tuple): return tuple(Tensor(output) if isinstance(output, np.ndarray) else output for output in outputs) return outputs # # Testing Your Implementation # # Now let's test our tensor implementation with comprehensive tests that validate all functionality. # ### 🧪 Unit Test: Tensor Creation # # Let's test your tensor creation implementation right away! This gives you immediate feedback on whether your `__init__` method works correctly. # # **This is a unit test** - it tests one specific function (tensor creation) in isolation. # %% ../../modules/02_tensor/tensor_dev.ipynb 14 def Parameter(data, dtype=None): """ Convenience function for creating trainable tensors. This is equivalent to Tensor(data, requires_grad=True) but provides cleaner syntax for neural network parameters. Args: data: Input data (scalar, list, or numpy array) dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect. Returns: Tensor with requires_grad=True Examples: weight = Parameter(np.random.randn(784, 128)) # Neural network weight bias = Parameter(np.zeros(128)) # Neural network bias """ return Tensor(data, dtype=dtype, requires_grad=True) # # MODULE SUMMARY: Tensor Foundation # # Congratulations! You've successfully implemented the fundamental data structure that powers all machine learning: # # ## What You've Built # - **Tensor Class**: N-dimensional array wrapper with professional interfaces # - **Core Operations**: Creation, property access, and arithmetic operations # - **Shape Management**: Automatic shape tracking and validation # - **Data Types**: Proper NumPy integration and type handling # - **Foundation**: The building block for all subsequent TinyTorch modules # # ## Key Learning Outcomes # - **Understanding**: How tensors work as the foundation of machine learning # - **Implementation**: Built tensor operations from scratch # - **Professional patterns**: Clean APIs, proper error handling, comprehensive testing # - **Real-world connection**: Understanding PyTorch/TensorFlow tensor foundations # - **Systems thinking**: Building reliable, reusable components # # ## Mathematical Foundations Mastered # - **N-dimensional arrays**: Shape, size, and dimensionality concepts # - **Element-wise operations**: Addition, subtraction, multiplication, division # - **Broadcasting**: Understanding how operations work with different shapes # - **Memory management**: Efficient data storage and access patterns # # ## Professional Skills Developed # - **API design**: Clean, intuitive interfaces for tensor operations # - **Error handling**: Graceful handling of invalid operations and edge cases # - **Testing methodology**: Comprehensive validation of tensor functionality # - **Documentation**: Clear, educational documentation with examples # # ## Ready for Advanced Applications # Your tensor implementation now enables: # - **Neural Networks**: Foundation for all layer implementations # - **Automatic Differentiation**: Gradient computation through computational graphs # - **Complex Models**: CNNs, RNNs, Transformers - all built on tensors # - **Real Applications**: Training models on real datasets # # ## Connection to Real ML Systems # Your implementation mirrors production systems: # - **PyTorch**: `torch.Tensor` provides identical functionality # - **TensorFlow**: `tf.Tensor` implements similar concepts # - **NumPy**: `numpy.ndarray` serves as the foundation # - **Industry Standard**: Every major ML framework uses these exact principles # # ## The Power of Tensors # You've built the fundamental data structure of modern AI: # - **Universality**: Tensors represent all data: images, text, audio, video # - **Efficiency**: Vectorized operations enable fast computation # - **Scalability**: Handles everything from single numbers to massive matrices # - **Flexibility**: Foundation for any mathematical operation # # ## What's Next # Your tensor implementation is the foundation for: # - **Activations**: Nonlinear functions that enable complex learning # - **Layers**: Linear transformations and neural network building blocks # - **Networks**: Composing layers into powerful architectures # - **Training**: Optimizing networks to solve real problems # # **Next Module**: Activation functions - adding the nonlinearity that makes neural networks powerful! # # You've built the foundation of modern AI. Now let's add the mathematical functions that enable machines to learn complex patterns!