mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-04-29 16:22:36 -05:00
- Added 'Understanding This Demo' panels explaining what students will see
- Added inline interpretation guides with 💡 markers throughout demos
- Enhanced explanations of outputs, tables, and visualizations
- Added context about why concepts matter in ML/AI
- Improved pedagogical clarity for all 8 demo files:
- demo_tensor_math.py: Matrix operations context
- demo_activations.py: Nonlinearity importance
- demo_single_neuron.py: Learning process clarity
- demo_xor_network.py: Multi-layer necessity
- demo_vision.py: CNN feature hierarchy
- demo_attention.py: Attention mechanics
- demo_training.py: Pipeline understanding
- demo_language.py: Language generation insights
These additions help students not just see the demos run, but understand
what the outputs mean and why these concepts are fundamental to ML.
294 lines
12 KiB
Python
294 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
TinyTorch Demo 03: Activation Functions - The Key to Intelligence
|
|
Shows how nonlinear functions enable neural networks to learn complex patterns
|
|
"""
|
|
|
|
import sys
|
|
import numpy as np
|
|
from rich.console import Console
|
|
from rich.panel import Panel
|
|
from rich.table import Table
|
|
from rich.text import Text
|
|
from rich.columns import Columns
|
|
|
|
def demo_activations():
|
|
"""Demo activation functions with real function approximation"""
|
|
|
|
console = Console()
|
|
|
|
try:
|
|
# Import TinyTorch modules
|
|
import tinytorch.core.tensor as tt
|
|
import tinytorch.core.activations as act
|
|
|
|
# Main header
|
|
console.print(Panel.fit(
|
|
"📈 TinyTorch Activation Functions Demo\nDiscover how nonlinearity creates intelligence!",
|
|
style="bold cyan",
|
|
border_style="bright_blue"
|
|
))
|
|
console.print()
|
|
|
|
# What this demo shows
|
|
console.print(Panel(
|
|
"[bold yellow]What This Demo Shows:[/bold yellow]\n\n"
|
|
"Activation functions are the 'secret sauce' that gives neural networks their power.\n"
|
|
"Without them, even deep networks would only learn linear patterns. You'll discover:\n\n"
|
|
"• Why linear transformations fail on the famous XOR problem\n"
|
|
"• How ReLU creates sparse, learnable features from data\n"
|
|
"• How Softmax converts raw scores into probabilities for classification\n"
|
|
"• The complete forward pass through a neural network\n\n"
|
|
"[bold cyan]Key Insight:[/bold cyan] Nonlinearity allows networks to learn complex decision boundaries\n"
|
|
"that can separate any data pattern, not just straight lines!",
|
|
title="📚 Understanding This Demo",
|
|
style="blue"
|
|
))
|
|
console.print()
|
|
|
|
# Demo 1: Function shapes visualization
|
|
console.print(Panel(
|
|
"Comparing linear vs nonlinear transformations...",
|
|
title="🎨 Demo 1: Activation Function Shapes",
|
|
style="green"
|
|
))
|
|
|
|
# Create test inputs
|
|
x_data = np.linspace(-3, 3, 11) # -3 to 3 in steps
|
|
x = tt.Tensor(x_data.reshape(-1, 1))
|
|
|
|
console.print(f"[bold cyan]Input values:[/bold cyan] {x_data}")
|
|
console.print()
|
|
|
|
# Test different activations
|
|
relu = act.ReLU()
|
|
sigmoid = act.Sigmoid()
|
|
softmax = act.Softmax()
|
|
|
|
# Create activation comparison table
|
|
activation_table = Table(show_header=True, header_style="bold magenta")
|
|
activation_table.add_column("Function", style="cyan")
|
|
activation_table.add_column("Output", style="yellow")
|
|
activation_table.add_column("Key Property", style="green")
|
|
|
|
# ReLU transformation
|
|
relu_output = relu.forward(x)
|
|
relu_str = "[" + ", ".join(f"{val:.1f}" for val in relu_output.data.flatten()) + "]"
|
|
activation_table.add_row("ReLU(x)", relu_str, "Cuts off negative values → sparse representations")
|
|
|
|
# Sigmoid transformation
|
|
sigmoid_output = sigmoid.forward(x)
|
|
sigmoid_str = "[" + ", ".join(f"{val:.2f}" for val in sigmoid_output.data.flatten()) + "]"
|
|
activation_table.add_row("Sigmoid(x)", sigmoid_str, "Squashes to (0,1) → probability-like outputs")
|
|
|
|
console.print(activation_table)
|
|
console.print()
|
|
|
|
console.print("[dim]💡 [bold]How to Interpret:[/bold] Each activation function shapes data differently:[/dim]")
|
|
console.print("[dim] • ReLU: Keeps positive values, zeros out negatives (creates sparsity)[/dim]")
|
|
console.print("[dim] • Sigmoid: Squashes any input to (0,1) range (good for probabilities)[/dim]")
|
|
console.print()
|
|
|
|
# Demo 2: The XOR Problem Setup
|
|
console.print(Panel(
|
|
"Showing why we NEED nonlinear activations...",
|
|
title="⚡ Demo 2: Why Linearity Fails - The XOR Problem",
|
|
style="yellow"
|
|
))
|
|
|
|
# XOR truth table
|
|
xor_inputs = tt.Tensor([[0, 0], [0, 1], [1, 0], [1, 1]])
|
|
xor_outputs = tt.Tensor([[0], [1], [1], [0]])
|
|
|
|
# Create XOR truth table
|
|
xor_table = Table(show_header=True, header_style="bold magenta")
|
|
xor_table.add_column("X1", style="cyan", justify="center")
|
|
xor_table.add_column("X2", style="cyan", justify="center")
|
|
xor_table.add_column("XOR Output", style="yellow", justify="center")
|
|
|
|
for i in range(4):
|
|
x1, x2 = xor_inputs.data[i]
|
|
y = xor_outputs.data[i, 0]
|
|
xor_table.add_row(str(int(x1)), str(int(x2)), str(int(y)))
|
|
|
|
console.print(xor_table)
|
|
console.print()
|
|
|
|
# Try linear transformation (will fail)
|
|
console.print("[bold red]🔍 Testing Linear Transformation:[/bold red]")
|
|
linear_weights = tt.Tensor([[1.0], [1.0]]) # Simple linear combination
|
|
linear_output = tt.Tensor(xor_inputs.data @ linear_weights.data)
|
|
|
|
# Create linear test results table
|
|
linear_table = Table(show_header=True, header_style="bold magenta")
|
|
linear_table.add_column("Input", style="cyan")
|
|
linear_table.add_column("Linear Output", style="yellow")
|
|
linear_table.add_column("Expected", style="green")
|
|
linear_table.add_column("Status", style="red")
|
|
|
|
for i in range(4):
|
|
x1, x2 = xor_inputs.data[i]
|
|
linear_pred = linear_output.data[i, 0]
|
|
actual = xor_outputs.data[i, 0]
|
|
status = "✅" if abs(linear_pred - actual) < 0.5 else "❌"
|
|
linear_table.add_row(f"[{int(x1)}, {int(x2)}]", f"{linear_pred:.1f}", str(int(actual)), status)
|
|
|
|
console.print(linear_table)
|
|
|
|
# Failure explanation
|
|
failure_panel = Panel(
|
|
"❌ Linear transformation cannot solve XOR!\n (No single line can separate XOR classes)",
|
|
title="Linear Limitation",
|
|
style="red"
|
|
)
|
|
console.print(failure_panel)
|
|
console.print()
|
|
|
|
# Show how nonlinearity helps
|
|
console.print("[bold green]✨ Adding Nonlinearity (ReLU):[/bold green]")
|
|
|
|
# First layer: create useful features
|
|
W1 = tt.Tensor([[1.0, 1.0], [-1.0, -1.0]]) # 2 neurons
|
|
b1 = tt.Tensor([[-0.5], [1.5]]) # Biases
|
|
|
|
# Forward pass through first layer + ReLU
|
|
z1 = tt.Tensor(xor_inputs.data @ W1.data + b1.data.T)
|
|
a1 = relu.forward(z1)
|
|
|
|
# Create ReLU transformation table
|
|
relu_table = Table(show_header=True, header_style="bold magenta")
|
|
relu_table.add_column("Input", style="cyan")
|
|
relu_table.add_column("After ReLU", style="green")
|
|
relu_table.add_column("Linearly Separable?", style="yellow")
|
|
|
|
for i in range(4):
|
|
x1, x2 = xor_inputs.data[i]
|
|
features = a1.data[i]
|
|
separable = "✅" if (features[0] > 0 or features[1] > 0) else "❌"
|
|
relu_table.add_row(f"[{int(x1)}, {int(x2)}]", f"[{features[0]:.1f}, {features[1]:.1f}]", separable)
|
|
|
|
console.print(relu_table)
|
|
|
|
success_panel = Panel(
|
|
"🎯 ReLU created linearly separable features!",
|
|
title="Nonlinearity Success",
|
|
style="green"
|
|
)
|
|
console.print(success_panel)
|
|
console.print()
|
|
|
|
# Demo 3: Softmax for classification
|
|
console.print(Panel(
|
|
"Converting raw scores to probabilities...",
|
|
title="🎲 Demo 3: Softmax for Multi-Class Classification",
|
|
style="blue"
|
|
))
|
|
|
|
# Simulate classifier outputs for 3 classes
|
|
raw_scores = tt.Tensor([[2.0, 1.0, 0.1], # Confident class 0
|
|
[0.5, 2.8, 0.2], # Confident class 1
|
|
[1.0, 1.1, 1.05]]) # Uncertain
|
|
|
|
# Apply softmax
|
|
probabilities = softmax.forward(raw_scores)
|
|
|
|
# Create softmax comparison table
|
|
softmax_table = Table(show_header=True, header_style="bold magenta")
|
|
softmax_table.add_column("Sample", style="cyan")
|
|
softmax_table.add_column("Raw Scores", style="yellow")
|
|
softmax_table.add_column("Probabilities", style="green")
|
|
softmax_table.add_column("Prediction", style="red")
|
|
|
|
for i in range(3):
|
|
scores = raw_scores.data[i]
|
|
probs = probabilities.data[i]
|
|
predicted_class = np.argmax(probs)
|
|
confidence = probs[predicted_class]
|
|
|
|
raw_str = f"[{scores[0]:.1f}, {scores[1]:.1f}, {scores[2]:.2f}]"
|
|
prob_str = f"[{probs[0]:.3f}, {probs[1]:.3f}, {probs[2]:.3f}]"
|
|
pred_str = f"Class {predicted_class} ({confidence:.1%})"
|
|
|
|
softmax_table.add_row(f"Sample {i+1}", raw_str, prob_str, pred_str)
|
|
|
|
console.print(softmax_table)
|
|
console.print()
|
|
|
|
# Demo 4: Activation combinations
|
|
console.print(Panel(
|
|
"Combining linear transformations + activations...",
|
|
title="🧠 Demo 4: Building Neural Network Layers",
|
|
style="magenta"
|
|
))
|
|
|
|
# Simulate a 2-layer network: input → hidden (ReLU) → output (Sigmoid)
|
|
input_data = tt.Tensor([[0.5], [0.8], [-0.3]])
|
|
|
|
# Layer 1: Linear + ReLU
|
|
W1 = tt.Tensor([[0.6, -0.4], [0.2, 0.9], [-0.1, 0.3]]) # 3→2
|
|
hidden = relu.forward(tt.Tensor(W1.data.T @ input_data.data))
|
|
|
|
# Layer 2: Linear + Sigmoid
|
|
W2 = tt.Tensor([[0.7], [0.5]]) # 2→1
|
|
output = sigmoid.forward(tt.Tensor(W2.data.T @ hidden.data))
|
|
|
|
# Create neural network flow table
|
|
nn_table = Table(show_header=True, header_style="bold magenta")
|
|
nn_table.add_column("Layer", style="cyan")
|
|
nn_table.add_column("Values", style="yellow")
|
|
nn_table.add_column("Activation", style="green")
|
|
|
|
input_str = f"[{', '.join(f'{val:.1f}' for val in input_data.data.flatten())}]"
|
|
hidden_str = f"[{', '.join(f'{val:.2f}' for val in hidden.data.flatten())}]"
|
|
output_str = f"{output.data.flatten()[0]:.3f}"
|
|
|
|
nn_table.add_row("Input", input_str, "None")
|
|
nn_table.add_row("Hidden", hidden_str, "ReLU")
|
|
nn_table.add_row("Output", output_str, "Sigmoid")
|
|
|
|
console.print(nn_table)
|
|
|
|
network_panel = Panel(
|
|
"🎯 This is a complete neural network forward pass!",
|
|
title="Neural Network Success",
|
|
style="green"
|
|
)
|
|
console.print(network_panel)
|
|
console.print()
|
|
|
|
# Success summary
|
|
console.print(Panel.fit(
|
|
"🎯 Achievements:\n"
|
|
"• Visualized how activation functions shape data\n"
|
|
"• Proved why linearity fails on XOR problem\n"
|
|
"• Showed how ReLU creates learnable features\n"
|
|
"• Used Softmax for probability classification\n"
|
|
"• Built complete neural network layers\n\n"
|
|
"🔥 Next: Single layer networks with decision boundaries!",
|
|
title="🏆 TinyTorch Activations Demo Complete!",
|
|
style="bold green",
|
|
border_style="bright_green"
|
|
))
|
|
|
|
return True
|
|
|
|
except ImportError as e:
|
|
console.print(Panel(
|
|
f"Could not import TinyTorch modules: {e}\n\n💡 Make sure to run: tito export 03_activations",
|
|
title="❌ Import Error",
|
|
style="bold red"
|
|
))
|
|
return False
|
|
except Exception as e:
|
|
console.print(Panel(
|
|
f"Demo failed: {e}",
|
|
title="❌ Error",
|
|
style="bold red"
|
|
))
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = demo_activations()
|
|
sys.exit(0 if success else 1) |