# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/02_activations/activations_dev.ipynb.

# %% auto 0
__all__ = ['visualize_activation_function', 'visualize_activation_on_data', 'ReLU', 'Sigmoid', 'Tanh', 'Softmax']

# %% ../../modules/source/02_activations/activations_dev.ipynb 1
import math
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
from typing import Union, List

# Import our Tensor class - try from package first, then from local module
try:
    from tinytorch.core.tensor import Tensor
except ImportError:
    # For development, import from local tensor module
    sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
    from tensor_dev import Tensor

# %% ../../modules/source/02_activations/activations_dev.ipynb 2
def _should_show_plots():
    """Check if we should show plots (disable during testing)"""
    # Check multiple conditions that indicate we're in test mode
    is_pytest = (
        'pytest' in sys.modules or
        'test' in sys.argv or
        os.environ.get('PYTEST_CURRENT_TEST') is not None or
        any('test' in arg for arg in sys.argv) or
        any('pytest' in arg for arg in sys.argv)
    )
    
    # Show plots in development mode (when not in test mode)
    return not is_pytest

# %% ../../modules/source/02_activations/activations_dev.ipynb 3
def visualize_activation_function(activation_fn, name: str, x_range: tuple = (-5, 5), num_points: int = 100):
    """Visualize an activation function's behavior"""
    if not _should_show_plots():
        return
        
    try:
        
        # Generate input values
        x_vals = np.linspace(x_range[0], x_range[1], num_points)
        
        # Apply activation function
        y_vals = []
        for x in x_vals:
            input_tensor = Tensor([[x]])
            output = activation_fn(input_tensor)
            y_vals.append(output.data.item())
        
        # Create plot
        plt.figure(figsize=(10, 6))
        plt.plot(x_vals, y_vals, 'b-', linewidth=2, label=f'{name} Activation')
        plt.grid(True, alpha=0.3)
        plt.xlabel('Input (x)')
        plt.ylabel(f'{name}(x)')
        plt.title(f'{name} Activation Function')
        plt.legend()
        plt.show()
        
    except ImportError:
        print("   📊 Matplotlib not available - skipping visualization")
    except Exception as e:
        print(f"   ⚠️  Visualization error: {e}")

def visualize_activation_on_data(activation_fn, name: str, data: Tensor):
    """Show activation function applied to sample data"""
    if not _should_show_plots():
        return
        
    try:
        output = activation_fn(data)
        print(f"   📊 {name} Example:")
        print(f"      Input:  {data.data.flatten()}")
        print(f"      Output: {output.data.flatten()}")
        print(f"      Range:  [{output.data.min():.3f}, {output.data.max():.3f}]")
        
    except Exception as e:
        print(f"   ⚠️  Data visualization error: {e}")

# %% ../../modules/source/02_activations/activations_dev.ipynb 8
class ReLU:
    """
    ReLU Activation Function: f(x) = max(0, x)
    
    The most popular activation function in deep learning.
    Simple, fast, and effective for most applications.
    """
    
    def forward(self, x: Tensor) -> Tensor:
        """
        Apply ReLU activation: f(x) = max(0, x)
        
        TODO: Implement ReLU activation
        
        APPROACH:
        1. For each element in the input tensor, apply max(0, element)
        2. Return a new Tensor with the results
        
        EXAMPLE:
        Input: Tensor([[-1, 0, 1, 2, -3]])
        Expected: Tensor([[0, 0, 1, 2, 0]])
        
        HINTS:
        - Use np.maximum(0, x.data) for element-wise max
        - Remember to return a new Tensor object
        - The shape should remain the same as input
        """
        ### BEGIN SOLUTION
        result = np.maximum(0, x.data)
        return type(x)(result)
        ### END SOLUTION
    
    def __call__(self, x: Tensor) -> Tensor:
        """Make the class callable: relu(x) instead of relu.forward(x)"""
        return self.forward(x)

# %% ../../modules/source/02_activations/activations_dev.ipynb 12
class Sigmoid:
    """
    Sigmoid Activation Function: f(x) = 1 / (1 + e^(-x))
    
    Smooth S-shaped function that squashes inputs to (0, 1).
    Useful for binary classification and probabilistic outputs.
    """
    
    def forward(self, x: Tensor) -> Tensor:
        """
        Apply Sigmoid activation: f(x) = 1 / (1 + e^(-x))
        
        TODO: Implement Sigmoid activation with numerical stability
        
        APPROACH:
        1. Clip input values to prevent overflow (e.g., between -500 and 500)
        2. Apply the sigmoid formula: 1 / (1 + exp(-x))
        3. Return a new Tensor with the results
        
        EXAMPLE:
        Input: Tensor([[-2, 0, 2]])
        Expected: Tensor([[0.119, 0.5, 0.881]]) (approximately)
        
        HINTS:
        - Use np.clip(x.data, -500, 500) for numerical stability
        - Use np.exp() for the exponential function
        - Be careful with very large/small inputs to avoid overflow
        """
        ### BEGIN SOLUTION
        # Clip for numerical stability
        clipped = np.clip(x.data, -500, 500)
        result = 1 / (1 + np.exp(-clipped))
        return type(x)(result)
        ### END SOLUTION
    
    def __call__(self, x: Tensor) -> Tensor:
        """Make the class callable: sigmoid(x) instead of sigmoid.forward(x)"""
        return self.forward(x)

# %% ../../modules/source/02_activations/activations_dev.ipynb 16
class Tanh:
    """
    Tanh Activation Function: f(x) = tanh(x)
    
    Zero-centered S-shaped function that squashes inputs to (-1, 1).
    Better than sigmoid for hidden layers due to zero-centered outputs.
    """
    
    def forward(self, x: Tensor) -> Tensor:
        """
        Apply Tanh activation: f(x) = tanh(x)
        
        TODO: Implement Tanh activation
        
        APPROACH:
        1. Use NumPy's tanh function for numerical stability
        2. Apply to the tensor data
        3. Return a new Tensor with the results
        
        EXAMPLE:
        Input: Tensor([[-2, 0, 2]])
        Expected: Tensor([[-0.964, 0.0, 0.964]]) (approximately)
        
        HINTS:
        - Use np.tanh(x.data) - NumPy handles the math
        - Much simpler than implementing the formula manually
        - NumPy's tanh is numerically stable
        """
        ### BEGIN SOLUTION
        result = np.tanh(x.data)
        return type(x)(result)
        ### END SOLUTION
    
    def __call__(self, x: Tensor) -> Tensor:
        """Make the class callable: tanh(x) instead of tanh.forward(x)"""
        return self.forward(x)

# %% ../../modules/source/02_activations/activations_dev.ipynb 20
class Softmax:
    """
    Softmax Activation Function: f(x_i) = e^(x_i) / Σ(e^(x_j))
    
    Converts a vector of numbers into a probability distribution.
    Essential for multi-class classification and attention mechanisms.
    """
    
    def forward(self, x: Tensor) -> Tensor:
        """
        Apply Softmax activation: f(x_i) = e^(x_i) / Σ(e^(x_j))
        
        TODO: Implement Softmax activation with numerical stability
        
        APPROACH:
        1. Subtract max value from inputs for numerical stability
        2. Compute exponentials: e^(x_i - max)
        3. Divide by sum of exponentials
        4. Return a new Tensor with the results
        
        EXAMPLE:
        Input: Tensor([[1, 2, 3]])
        Expected: Tensor([[0.09, 0.24, 0.67]]) (approximately, sums to 1)
        
        HINTS:
        - Use np.max(x.data, axis=-1, keepdims=True) for stability
        - Use np.exp() for exponentials
        - Use np.sum() for the denominator
        - Make sure the result sums to 1 along the last axis
        """
        ### BEGIN SOLUTION
        # Subtract max for numerical stability
        x_max = np.max(x.data, axis=-1, keepdims=True)
        x_shifted = x.data - x_max
        
        # Compute softmax
        exp_x = np.exp(x_shifted)
        sum_exp = np.sum(exp_x, axis=-1, keepdims=True)
        result = exp_x / sum_exp
        
        return type(x)(result)
        ### END SOLUTION
    
    def __call__(self, x: Tensor) -> Tensor:
        """Make the class callable: softmax(x) instead of softmax.forward(x)"""
        return self.forward(x)