TinyTorch/tinytorch/core/tensor.py

# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/02_tensor/tensor_dev.ipynb.

# %% auto 0
__all__ = ['Tensor', 'Parameter']

# %% ../../modules/02_tensor/tensor_dev.ipynb 1
import numpy as np
import sys
from typing import Union, Tuple, Optional, Any

# %% ../../modules/02_tensor/tensor_dev.ipynb 3
class Tensor:
    """
    TinyTorch Tensor: N-dimensional array with ML operations.

    The fundamental data structure for all TinyTorch operations.
    Wraps NumPy arrays with ML-specific functionality.
    """

    def __init__(self, data: Any, dtype: Optional[str] = None, requires_grad: bool = False):
        """
        Create a new tensor from data.

        Args:
            data: Input data (scalar, list, or numpy array)
            dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.
            requires_grad: Whether this tensor needs gradients for training. Defaults to False.

        TODO: Implement tensor creation with proper type handling.

        STEP-BY-STEP:
        1. Check if data is a scalar (int/float) - convert to numpy array
        2. Check if data is a list - convert to numpy array
        3. Check if data is already a numpy array - use as-is
        4. Apply dtype conversion if specified
        5. Store the result in self._data

        EXAMPLE:
        Tensor(5) → stores np.array(5)
        Tensor([1, 2, 3]) → stores np.array([1, 2, 3])
        Tensor(np.array([1, 2, 3])) → stores the array directly

        HINTS:
        - Use isinstance() to check data types
        - Use np.array() for conversion
        - Handle dtype parameter for type conversion
        - Store the array in self._data
        """
        ### BEGIN SOLUTION
        # Convert input to numpy array
        if isinstance(data, (int, float, np.number)):
            # Handle Python and NumPy scalars
            if dtype is None:
                # Auto-detect type: int for integers, float32 for floats
                if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)):
                    dtype = 'int32'
                else:
                    dtype = 'float32'
            self._data = np.array(data, dtype=dtype)
        elif isinstance(data, list):
            # Let NumPy auto-detect type, then convert if needed
            temp_array = np.array(data)
            if dtype is None:
                # Use NumPy's auto-detected type, but prefer float32 for floats
                if temp_array.dtype == np.float64:
                    dtype = 'float32'
                else:
                    dtype = str(temp_array.dtype)
            self._data = np.array(data, dtype=dtype)
        elif isinstance(data, np.ndarray):
            # Already a numpy array
            if dtype is None:
                # Keep existing dtype, but prefer float32 for float64
                if data.dtype == np.float64:
                    dtype = 'float32'
                else:
                    dtype = str(data.dtype)
            self._data = data.astype(dtype) if dtype != data.dtype else data.copy()
        elif isinstance(data, Tensor):
            # Input is another Tensor - extract its data
            if dtype is None:
                # Keep existing dtype, but prefer float32 for float64
                if data.data.dtype == np.float64:
                    dtype = 'float32'
                else:
                    dtype = str(data.data.dtype)
            self._data = data.data.astype(dtype) if dtype != str(data.data.dtype) else data.data.copy()
        else:
            # Try to convert unknown types
            self._data = np.array(data, dtype=dtype)

        # Initialize gradient tracking attributes
        self.requires_grad = requires_grad
        self.grad = None if requires_grad else None
        self._grad_fn = None
        ### END SOLUTION

    @property
    def data(self) -> np.ndarray:
        """
        Access underlying numpy array.

        TODO: Return the stored numpy array.

        STEP-BY-STEP IMPLEMENTATION:
        1. Access the internal _data attribute
        2. Return the numpy array directly
        3. This provides access to underlying data for NumPy operations

        LEARNING CONNECTIONS:
        Real-world relevance:
        - PyTorch: tensor.numpy() converts to NumPy for visualization/analysis
        - TensorFlow: tensor.numpy() enables integration with scientific Python
        - Production: Data scientists need to access raw arrays for debugging
        - Performance: Direct access avoids copying for read-only operations

        HINT: Return self._data (the array you stored in __init__)
        """
        ### BEGIN SOLUTION
        return self._data
        ### END SOLUTION

    @data.setter
    def data(self, value: Union[np.ndarray, 'Tensor']) -> None:
        """
        Set the underlying data of the tensor.

        Args:
            value: New data (numpy array or Tensor)
        """
        if isinstance(value, Tensor):
            self._data = value._data.copy()
        else:
            self._data = np.array(value)

    @property
    def shape(self) -> Tuple[int, ...]:
        """
        Get tensor shape.

        TODO: Return the shape of the stored numpy array.

        STEP-BY-STEP IMPLEMENTATION:
        1. Access the _data attribute (the NumPy array)
        2. Get the shape property from the NumPy array
        3. Return the shape tuple directly

        LEARNING CONNECTIONS:
        Real-world relevance:
        - Neural networks: Layer compatibility requires matching shapes
        - Computer vision: Image shape (height, width, channels) determines architecture
        - NLP: Sequence length and vocabulary size affect model design
        - Debugging: Shape mismatches are the #1 cause of ML errors

        HINT: Use .shape attribute of the numpy array
        EXAMPLE: Tensor([1, 2, 3]).shape should return (3,)
        """
        ### BEGIN SOLUTION
        return self._data.shape
        ### END SOLUTION

    @property
    def size(self) -> int:
        """
        Get total number of elements.

        TODO: Return the total number of elements in the tensor.

        STEP-BY-STEP IMPLEMENTATION:
        1. Access the _data attribute (the NumPy array)
        2. Get the size property from the NumPy array
        3. Return the total element count as an integer

        LEARNING CONNECTIONS:
        Real-world relevance:
        - Memory planning: Calculate RAM requirements for large tensors
        - Model architecture: Determine parameter counts for layers
        - Performance optimization: Size affects computation time
        - Batch processing: Total elements determines vectorization efficiency

        HINT: Use .size attribute of the numpy array
        EXAMPLE: Tensor([1, 2, 3]).size should return 3
        """
        ### BEGIN SOLUTION
        return self._data.size
        ### END SOLUTION

    @property
    def dtype(self) -> np.dtype:
        """
        Get data type as numpy dtype.

        TODO: Return the data type of the stored numpy array.

        STEP-BY-STEP IMPLEMENTATION:
        1. Access the _data attribute (the NumPy array)
        2. Get the dtype property from the NumPy array
        3. Return the NumPy dtype object directly

        LEARNING CONNECTIONS:
        Real-world relevance:
        - Precision vs speed: float32 is faster, float64 more accurate
        - Memory optimization: int8 uses 1/4 memory of int32
        - GPU compatibility: Some operations only work with specific types
        - Model deployment: Mobile/edge devices prefer smaller data types

        HINT: Use .dtype attribute of the numpy array
        EXAMPLE: Tensor([1, 2, 3]).dtype should return dtype('int32')
        """
        ### BEGIN SOLUTION
        return self._data.dtype
        ### END SOLUTION

    def __repr__(self) -> str:
        """
        String representation.

        TODO: Create a clear string representation of the tensor.

        STEP-BY-STEP IMPLEMENTATION:
        1. Convert the numpy array to a list using .tolist()
        2. Get shape and dtype information from properties
        3. Format as "Tensor([data], shape=shape, dtype=dtype)"
        4. Return the formatted string

        LEARNING CONNECTIONS:
        Real-world relevance:
        - Debugging: Clear tensor representation speeds debugging
        - Jupyter notebooks: Good __repr__ improves data exploration
        - Logging: Production systems log tensor info for monitoring
        - Education: Students understand tensors better with clear output

        APPROACH:
        1. Convert the numpy array to a list for readable output
        2. Include the shape and dtype information
        3. Format: "Tensor([data], shape=shape, dtype=dtype)"

        EXAMPLE:
        Tensor([1, 2, 3]) → "Tensor([1, 2, 3], shape=(3,), dtype=int32)"

        HINTS:
        - Use .tolist() to convert numpy array to list
        - Include shape and dtype information
        - Keep format consistent and readable
        """
        ### BEGIN SOLUTION
        return f"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})"
        ### END SOLUTION

    def add(self, other: 'Tensor') -> 'Tensor':
        """
        Add two tensors element-wise.

        TODO: Implement tensor addition.

        STEP-BY-STEP IMPLEMENTATION:
        1. Extract numpy arrays from both tensors
        2. Use NumPy's + operator for element-wise addition
        3. Create a new Tensor object with the result
        4. Return the new tensor

        LEARNING CONNECTIONS:
        Real-world relevance:
        - Neural networks: Adding bias terms to linear layer outputs
        - Residual connections: skip connections in ResNet architectures
        - Gradient updates: Adding computed gradients to parameters
        - Ensemble methods: Combining predictions from multiple models

        APPROACH:
        1. Add the numpy arrays using +
        2. Return a new Tensor with the result
        3. Handle broadcasting automatically

        EXAMPLE:
        Tensor([1, 2]) + Tensor([3, 4]) → Tensor([4, 6])

        HINTS:
        - Use self._data + other._data
        - Return Tensor(result)
        - NumPy handles broadcasting automatically
        """
        ### BEGIN SOLUTION
        result = self._data + other._data
        return Tensor(result)
        ### END SOLUTION

    def multiply(self, other: 'Tensor') -> 'Tensor':
        """
        Multiply two tensors element-wise.

        TODO: Implement tensor multiplication.

        STEP-BY-STEP IMPLEMENTATION:
        1. Extract numpy arrays from both tensors
        2. Use NumPy's * operator for element-wise multiplication
        3. Create a new Tensor object with the result
        4. Return the new tensor

        LEARNING CONNECTIONS:
        Real-world relevance:
        - Activation functions: Element-wise operations like ReLU masking
        - Attention mechanisms: Element-wise scaling in transformer models
        - Feature scaling: Multiplying features by learned scaling factors
        - Gating: Element-wise gating in LSTM and GRU cells

        APPROACH:
        1. Multiply the numpy arrays using *
        2. Return a new Tensor with the result
        3. Handle broadcasting automatically

        EXAMPLE:
        Tensor([1, 2]) * Tensor([3, 4]) → Tensor([3, 8])

        HINTS:
        - Use self._data * other._data
        - Return Tensor(result)
        - This is element-wise, not matrix multiplication
        """
        ### BEGIN SOLUTION
        result = self._data * other._data
        return Tensor(result)
        ### END SOLUTION

    def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor':
        """
        Addition operator: tensor + other

        TODO: Implement + operator for tensors.

        STEP-BY-STEP IMPLEMENTATION:
        1. Check if other is a Tensor object
        2. If Tensor, call the add() method directly
        3. If scalar, convert to Tensor then call add()
        4. Return the result from add() method

        LEARNING CONNECTIONS:
        Real-world relevance:
        - Natural syntax: tensor + scalar enables intuitive code
        - Broadcasting: Adding scalars to tensors is common in ML
        - Operator overloading: Python's magic methods enable math-like syntax
        - API design: Clean interfaces reduce cognitive load for researchers

        APPROACH:
        1. If other is a Tensor, use tensor addition
        2. If other is a scalar, convert to Tensor first
        3. Return the result

        EXAMPLE:
        Tensor([1, 2]) + Tensor([3, 4]) → Tensor([4, 6])
        Tensor([1, 2]) + 5 → Tensor([6, 7])
        """
        ### BEGIN SOLUTION
        if isinstance(other, Tensor):
            return self.add(other)
        else:
            return self.add(Tensor(other))
        ### END SOLUTION

    def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor':
        """
        Multiplication operator: tensor * other

        TODO: Implement * operator for tensors.

        STEP-BY-STEP IMPLEMENTATION:
        1. Check if other is a Tensor object
        2. If Tensor, call the multiply() method directly
        3. If scalar, convert to Tensor then call multiply()
        4. Return the result from multiply() method

        LEARNING CONNECTIONS:
        Real-world relevance:
        - Scaling features: tensor * learning_rate for gradient updates
        - Masking: tensor * mask for attention mechanisms
        - Regularization: tensor * dropout_mask during training
        - Normalization: tensor * scale_factor in batch normalization

        APPROACH:
        1. If other is a Tensor, use tensor multiplication
        2. If other is a scalar, convert to Tensor first
        3. Return the result

        EXAMPLE:
        Tensor([1, 2]) * Tensor([3, 4]) → Tensor([3, 8])
        Tensor([1, 2]) * 3 → Tensor([3, 6])
        """
        ### BEGIN SOLUTION
        if isinstance(other, Tensor):
            return self.multiply(other)
        else:
            return self.multiply(Tensor(other))
        ### END SOLUTION

    def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor':
        """
        Subtraction operator: tensor - other

        TODO: Implement - operator for tensors.

        STEP-BY-STEP IMPLEMENTATION:
        1. Check if other is a Tensor object
        2. If Tensor, subtract other._data from self._data
        3. If scalar, subtract scalar directly from self._data
        4. Create new Tensor with result and return

        LEARNING CONNECTIONS:
        Real-world relevance:
        - Gradient computation: parameter - learning_rate * gradient
        - Residual connections: output - skip_connection in some architectures
        - Error calculation: predicted - actual for loss computation
        - Centering data: tensor - mean for zero-centered inputs

        APPROACH:
        1. Convert other to Tensor if needed
        2. Subtract using numpy arrays
        3. Return new Tensor with result

        EXAMPLE:
        Tensor([5, 6]) - Tensor([1, 2]) → Tensor([4, 4])
        Tensor([5, 6]) - 1 → Tensor([4, 5])
        """
        ### BEGIN SOLUTION
        if isinstance(other, Tensor):
            result = self._data - other._data
        else:
            result = self._data - other
        return Tensor(result)
        ### END SOLUTION

    def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor':
        """
        Division operator: tensor / other

        TODO: Implement / operator for tensors.

        STEP-BY-STEP IMPLEMENTATION:
        1. Check if other is a Tensor object
        2. If Tensor, divide self._data by other._data
        3. If scalar, divide self._data by scalar directly
        4. Create new Tensor with result and return

        LEARNING CONNECTIONS:
        Real-world relevance:
        - Normalization: tensor / std_deviation for standard scaling
        - Learning rate decay: parameter / decay_factor over time
        - Probability computation: counts / total_counts for frequencies
        - Temperature scaling: logits / temperature in softmax functions

        APPROACH:
        1. Convert other to Tensor if needed
        2. Divide using numpy arrays
        3. Return new Tensor with result

        EXAMPLE:
        Tensor([6, 8]) / Tensor([2, 4]) → Tensor([3, 2])
        Tensor([6, 8]) / 2 → Tensor([3, 4])
        """
        ### BEGIN SOLUTION
        if isinstance(other, Tensor):
            result = self._data / other._data
        else:
            result = self._data / other
        return Tensor(result)
        ### END SOLUTION

    def mean(self) -> 'Tensor':
        """Computes the mean of the tensor's elements."""
        return Tensor(np.mean(self.data))

    def matmul(self, other: 'Tensor') -> 'Tensor':
        """
        Perform matrix multiplication between two tensors using explicit loops.

        This implementation uses triple-nested loops for educational understanding
        of the fundamental operations. Module 15 will show the optimization progression
        from loops → blocking → vectorized operations.

        TODO: Implement matrix multiplication.

        STEP-BY-STEP IMPLEMENTATION:
        1. Extract numpy arrays from both tensors
        2. Check tensor shapes for compatibility
        3. Use triple-nested loops for educational understanding
        4. Create new Tensor object with the result
        5. Return the new tensor

        LEARNING CONNECTIONS:
        Real-world relevance:
        - Linear layers: input @ weight matrices in neural networks
        - Transformer attention: Q @ K^T for attention scores
        - CNN convolutions: Implemented as matrix multiplications
        - Batch processing: Matrix ops enable parallel computation

        EDUCATIONAL APPROACH:
        1. Show every operation explicitly with loops
        2. Build understanding before optimizing in Module 15
        3. Connect mathematical operations to computational patterns

        EXAMPLE:
        Tensor([[1, 2], [3, 4]]) @ Tensor([[5, 6], [7, 8]]) → Tensor([[19, 22], [43, 50]])

        HINTS:
        - This is intentionally simple for education, not optimized
        - Module 15 will show the progression to high-performance implementations
        - Understanding loops helps appreciate vectorization benefits
        """
        ### BEGIN SOLUTION
        # Matrix multiplication using explicit loops for educational understanding
        a_data = self._data
        b_data = other._data

        # Get dimensions and validate compatibility
        if len(a_data.shape) != 2 or len(b_data.shape) != 2:
            raise ValueError("matmul requires 2D tensors")

        m, k = a_data.shape
        k2, n = b_data.shape

        if k != k2:
            raise ValueError(f"Inner dimensions must match: {k} != {k2}")

        # Initialize result matrix
        result = np.zeros((m, n), dtype=a_data.dtype)

        # Triple nested loops - educational, shows every operation
        # This is intentionally simple to understand the fundamental computation
        # Module 15 will show the optimization journey:
        #   Step 1 (here): Educational loops - slow but clear
        #   Step 2: Loop blocking for cache efficiency
        #   Step 3: Vectorized operations with NumPy
        #   Step 4: GPU acceleration and BLAS libraries
        for i in range(m):                      # For each row in result
            for j in range(n):                  # For each column in result
                for k_idx in range(k):          # Dot product: sum over inner dimension
                    result[i, j] += a_data[i, k_idx] * b_data[k_idx, j]

        return Tensor(result)
        ### END SOLUTION

    def __matmul__(self, other: 'Tensor') -> 'Tensor':
        """
        Matrix multiplication operator: tensor @ other

        Enables the @ operator for matrix multiplication, providing
        clean syntax for neural network operations.
        """
        return self.matmul(other)

    def backward(self, gradient=None):
        """
        Compute gradients for this tensor and propagate backward.

        This is a stub for now - full implementation in Module 09 (Autograd).
        For now, just accumulates gradients if requires_grad=True.

        Args:
            gradient: Gradient from upstream. If None, assumes scalar with grad=1
        """
        if not self.requires_grad:
            return

        if gradient is None:
            # Scalar case - gradient is 1
            gradient = Tensor(np.ones_like(self._data))

        # Accumulate gradients
        if self.grad is None:
            self.grad = gradient
        else:
            self.grad = self.grad + gradient

    def zero_grad(self):
        """
        Reset gradients to None. Used by optimizers before backward pass.

        This method is called by optimizers to clear gradients before
        computing new ones, preventing gradient accumulation across batches.
        """
        self.grad = None

    def reshape(self, *shape: int) -> 'Tensor':
        """
        Return a new tensor with the same data but different shape.

        Args:
            *shape: New shape dimensions. Use -1 for automatic sizing.

        Returns:
            New Tensor with reshaped data

        Example:
            tensor.reshape(2, -1)  # Reshape to 2 rows, auto columns
            tensor.reshape(4, 3)   # Reshape to 4x3 matrix
        """
        reshaped_data = self._data.reshape(*shape)
        return Tensor(reshaped_data)


# # Testing Your Implementation
#
# Now let's test our tensor implementation with comprehensive tests that validate all functionality.

# ### 🧪 Unit Test: Tensor Creation
#
# Let's test your tensor creation implementation right away! This gives you immediate feedback on whether your `__init__` method works correctly.
#
# **This is a unit test** - it tests one specific function (tensor creation) in isolation.

# %% ../../modules/02_tensor/tensor_dev.ipynb 14
def Parameter(data, dtype=None):
    """
    Convenience function for creating trainable tensors.

    This is equivalent to Tensor(data, requires_grad=True) but provides
    cleaner syntax for neural network parameters.

    Args:
        data: Input data (scalar, list, or numpy array)
        dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.

    Returns:
        Tensor with requires_grad=True

    Examples:
        weight = Parameter(np.random.randn(784, 128))  # Neural network weight
        bias = Parameter(np.zeros(128))                # Neural network bias
    """
    return Tensor(data, dtype=dtype, requires_grad=True)


# # MODULE SUMMARY: Tensor Foundation
#
# Congratulations! You've successfully implemented the fundamental data structure that powers all machine learning:
#
# ## What You've Built
# - **Tensor Class**: N-dimensional array wrapper with professional interfaces
# - **Core Operations**: Creation, property access, and arithmetic operations
# - **Shape Management**: Automatic shape tracking and validation
# - **Data Types**: Proper NumPy integration and type handling
# - **Foundation**: The building block for all subsequent TinyTorch modules
#
# ## Key Learning Outcomes
# - **Understanding**: How tensors work as the foundation of machine learning
# - **Implementation**: Built tensor operations from scratch
# - **Professional patterns**: Clean APIs, proper error handling, comprehensive testing
# - **Real-world connection**: Understanding PyTorch/TensorFlow tensor foundations
# - **Systems thinking**: Building reliable, reusable components
#
# ## Mathematical Foundations Mastered
# - **N-dimensional arrays**: Shape, size, and dimensionality concepts
# - **Element-wise operations**: Addition, subtraction, multiplication, division
# - **Broadcasting**: Understanding how operations work with different shapes
# - **Memory management**: Efficient data storage and access patterns
#
# ## Professional Skills Developed
# - **API design**: Clean, intuitive interfaces for tensor operations
# - **Error handling**: Graceful handling of invalid operations and edge cases
# - **Testing methodology**: Comprehensive validation of tensor functionality
# - **Documentation**: Clear, educational documentation with examples
#
# ## Ready for Advanced Applications
# Your tensor implementation now enables:
# - **Neural Networks**: Foundation for all layer implementations
# - **Automatic Differentiation**: Gradient computation through computational graphs
# - **Complex Models**: CNNs, RNNs, Transformers - all built on tensors
# - **Real Applications**: Training models on real datasets
#
# ## Connection to Real ML Systems
# Your implementation mirrors production systems:
# - **PyTorch**: `torch.Tensor` provides identical functionality
# - **TensorFlow**: `tf.Tensor` implements similar concepts
# - **NumPy**: `numpy.ndarray` serves as the foundation
# - **Industry Standard**: Every major ML framework uses these exact principles
#
# ## The Power of Tensors
# You've built the fundamental data structure of modern AI:
# - **Universality**: Tensors represent all data: images, text, audio, video
# - **Efficiency**: Vectorized operations enable fast computation
# - **Scalability**: Handles everything from single numbers to massive matrices
# - **Flexibility**: Foundation for any mathematical operation
#
# ## What's Next
# Your tensor implementation is the foundation for:
# - **Activations**: Nonlinear functions that enable complex learning
# - **Layers**: Linear transformations and neural network building blocks
# - **Networks**: Composing layers into powerful architectures
# - **Training**: Optimizing networks to solve real problems
#
# **Next Module**: Activation functions - adding the nonlinearity that makes neural networks powerful!
#
# You've built the foundation of modern AI. Now let's add the mathematical functions that enable machines to learn complex patterns!