mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-06-05 08:46:28 -05:00
- Add new 'tito modules' command for comprehensive module status checking - Scans all modules in modules/ directory automatically - Shows file structure (dev file, tests, README) - Runs tests with --test flag - Provides detailed breakdown with --details flag - Remove duplicate/stub commands: - Remove 'tito status' (unimplemented stub) - Remove 'tito submit' (unimplemented stub) - Update 'tito test' command: - Focus on individual module testing with detailed output - Redirect 'tito test --all' to 'tito modules --test' with recommendation - Better error handling with available modules list - Add comprehensive documentation: - docs/development/testing-separation.md - explains module vs package checking - docs/development/command-cleanup-summary.md - documents CLI cleanup Key benefit: Clear separation between module development status (tito modules) and TinyTorch package functionality (tito info) with no confusing overlaps.
531 lines
18 KiB
Python
531 lines
18 KiB
Python
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/networks/networks_dev.ipynb.
|
|
|
|
# %% auto 0
|
|
__all__ = ['Sequential', 'visualize_network_architecture', 'visualize_data_flow', 'compare_networks', 'create_mlp',
|
|
'analyze_network_behavior', 'create_classification_network', 'create_regression_network']
|
|
|
|
# %% ../../modules/networks/networks_dev.ipynb 3
|
|
import numpy as np
|
|
import sys
|
|
from typing import List, Union, Optional, Callable
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.patches as patches
|
|
from matplotlib.patches import FancyBboxPatch, ConnectionPatch
|
|
import seaborn as sns
|
|
|
|
# Import our building blocks
|
|
from .tensor import Tensor
|
|
from .layers import Dense
|
|
from .activations import ReLU, Sigmoid, Tanh
|
|
|
|
# %% ../../modules/networks/networks_dev.ipynb 4
|
|
def _should_show_plots():
|
|
"""Check if we should show plots (disable during testing)"""
|
|
return 'pytest' not in sys.modules and 'test' not in sys.argv
|
|
|
|
# %% ../../modules/networks/networks_dev.ipynb 6
|
|
class Sequential:
|
|
"""
|
|
Sequential Network: Composes layers in sequence
|
|
|
|
The most fundamental network architecture.
|
|
Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))
|
|
|
|
Args:
|
|
layers: List of layers to compose
|
|
|
|
TODO: Implement the Sequential network with forward pass.
|
|
"""
|
|
|
|
def __init__(self, layers: List):
|
|
"""
|
|
Initialize Sequential network with layers.
|
|
|
|
Args:
|
|
layers: List of layers to compose in order
|
|
|
|
TODO: Store the layers and implement forward pass
|
|
"""
|
|
raise NotImplementedError("Student implementation required")
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Forward pass through all layers in sequence.
|
|
|
|
Args:
|
|
x: Input tensor
|
|
|
|
Returns:
|
|
Output tensor after passing through all layers
|
|
|
|
TODO: Implement sequential forward pass through all layers
|
|
"""
|
|
raise NotImplementedError("Student implementation required")
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make network callable: network(x) same as network.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/networks/networks_dev.ipynb 7
|
|
class Sequential:
|
|
"""
|
|
Sequential Network: Composes layers in sequence
|
|
|
|
The most fundamental network architecture.
|
|
Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))
|
|
"""
|
|
|
|
def __init__(self, layers: List):
|
|
"""Initialize Sequential network with layers."""
|
|
self.layers = layers
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""Forward pass through all layers in sequence."""
|
|
# Apply each layer in order
|
|
for layer in self.layers:
|
|
x = layer(x)
|
|
return x
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make network callable: network(x) same as network.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/networks/networks_dev.ipynb 11
|
|
def visualize_network_architecture(network: Sequential, title: str = "Network Architecture"):
|
|
"""
|
|
Create a visual representation of network architecture.
|
|
|
|
Args:
|
|
network: Sequential network to visualize
|
|
title: Title for the plot
|
|
"""
|
|
if not _should_show_plots():
|
|
print("📊 Plots disabled during testing - this is normal!")
|
|
return
|
|
|
|
fig, ax = plt.subplots(1, 1, figsize=(12, 8))
|
|
|
|
# Network parameters
|
|
layer_count = len(network.layers)
|
|
layer_height = 0.8
|
|
layer_spacing = 1.2
|
|
|
|
# Colors for different layer types
|
|
colors = {
|
|
'Dense': '#4CAF50', # Green
|
|
'ReLU': '#2196F3', # Blue
|
|
'Sigmoid': '#FF9800', # Orange
|
|
'Tanh': '#9C27B0', # Purple
|
|
'default': '#757575' # Gray
|
|
}
|
|
|
|
# Draw layers
|
|
for i, layer in enumerate(network.layers):
|
|
# Determine layer type and color
|
|
layer_type = type(layer).__name__
|
|
color = colors.get(layer_type, colors['default'])
|
|
|
|
# Layer position
|
|
x = i * layer_spacing
|
|
y = 0
|
|
|
|
# Create layer box
|
|
layer_box = FancyBboxPatch(
|
|
(x - 0.3, y - layer_height/2),
|
|
0.6, layer_height,
|
|
boxstyle="round,pad=0.1",
|
|
facecolor=color,
|
|
edgecolor='black',
|
|
linewidth=2,
|
|
alpha=0.8
|
|
)
|
|
ax.add_patch(layer_box)
|
|
|
|
# Add layer label
|
|
ax.text(x, y, layer_type, ha='center', va='center',
|
|
fontsize=10, fontweight='bold', color='white')
|
|
|
|
# Add layer details
|
|
if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'):
|
|
details = f"{layer.input_size}→{layer.output_size}"
|
|
ax.text(x, y - 0.3, details, ha='center', va='center',
|
|
fontsize=8, color='white')
|
|
|
|
# Draw connections to next layer
|
|
if i < layer_count - 1:
|
|
next_x = (i + 1) * layer_spacing
|
|
connection = ConnectionPatch(
|
|
(x + 0.3, y), (next_x - 0.3, y),
|
|
"data", "data",
|
|
arrowstyle="->", shrinkA=5, shrinkB=5,
|
|
mutation_scale=20, fc="black", lw=2
|
|
)
|
|
ax.add_patch(connection)
|
|
|
|
# Formatting
|
|
ax.set_xlim(-0.5, (layer_count - 1) * layer_spacing + 0.5)
|
|
ax.set_ylim(-1, 1)
|
|
ax.set_aspect('equal')
|
|
ax.axis('off')
|
|
|
|
# Add title
|
|
plt.title(title, fontsize=16, fontweight='bold', pad=20)
|
|
|
|
# Add legend
|
|
legend_elements = []
|
|
for layer_type, color in colors.items():
|
|
if layer_type != 'default':
|
|
legend_elements.append(patches.Patch(color=color, label=layer_type))
|
|
|
|
ax.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(1, 1))
|
|
|
|
plt.tight_layout()
|
|
plt.show()
|
|
|
|
# %% ../../modules/networks/networks_dev.ipynb 12
|
|
def visualize_data_flow(network: Sequential, input_data: Tensor, title: str = "Data Flow Through Network"):
|
|
"""
|
|
Visualize how data flows through the network.
|
|
|
|
Args:
|
|
network: Sequential network
|
|
input_data: Input tensor
|
|
title: Title for the plot
|
|
"""
|
|
if not _should_show_plots():
|
|
print("📊 Plots disabled during testing - this is normal!")
|
|
return
|
|
|
|
# Get intermediate outputs
|
|
intermediate_outputs = []
|
|
x = input_data
|
|
|
|
for i, layer in enumerate(network.layers):
|
|
x = layer(x)
|
|
intermediate_outputs.append({
|
|
'layer': network.layers[i],
|
|
'output': x,
|
|
'layer_index': i
|
|
})
|
|
|
|
# Create visualization
|
|
fig, axes = plt.subplots(2, len(network.layers), figsize=(4*len(network.layers), 8))
|
|
if len(network.layers) == 1:
|
|
axes = axes.reshape(1, -1)
|
|
|
|
for i, (layer, output) in enumerate(zip(network.layers, intermediate_outputs)):
|
|
# Top row: Layer information
|
|
ax_top = axes[0, i] if len(network.layers) > 1 else axes[0]
|
|
|
|
# Layer type and details
|
|
layer_type = type(layer).__name__
|
|
ax_top.text(0.5, 0.8, layer_type, ha='center', va='center',
|
|
fontsize=12, fontweight='bold')
|
|
|
|
if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'):
|
|
ax_top.text(0.5, 0.6, f"{layer.input_size} → {layer.output_size}",
|
|
ha='center', va='center', fontsize=10)
|
|
|
|
# Output shape
|
|
ax_top.text(0.5, 0.4, f"Shape: {output['output'].shape}",
|
|
ha='center', va='center', fontsize=9)
|
|
|
|
# Output statistics
|
|
output_data = output['output'].data
|
|
ax_top.text(0.5, 0.2, f"Mean: {np.mean(output_data):.3f}",
|
|
ha='center', va='center', fontsize=9)
|
|
ax_top.text(0.5, 0.1, f"Std: {np.std(output_data):.3f}",
|
|
ha='center', va='center', fontsize=9)
|
|
|
|
ax_top.set_xlim(0, 1)
|
|
ax_top.set_ylim(0, 1)
|
|
ax_top.axis('off')
|
|
|
|
# Bottom row: Output visualization
|
|
ax_bottom = axes[1, i] if len(network.layers) > 1 else axes[1]
|
|
|
|
# Show output as heatmap or histogram
|
|
output_data = output['output'].data.flatten()
|
|
|
|
if len(output_data) <= 20: # Small output - show as bars
|
|
ax_bottom.bar(range(len(output_data)), output_data, alpha=0.7)
|
|
ax_bottom.set_title(f"Layer {i+1} Output")
|
|
ax_bottom.set_xlabel("Output Index")
|
|
ax_bottom.set_ylabel("Value")
|
|
else: # Large output - show histogram
|
|
ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black')
|
|
ax_bottom.set_title(f"Layer {i+1} Output Distribution")
|
|
ax_bottom.set_xlabel("Value")
|
|
ax_bottom.set_ylabel("Frequency")
|
|
|
|
ax_bottom.grid(True, alpha=0.3)
|
|
|
|
plt.suptitle(title, fontsize=14, fontweight='bold')
|
|
plt.tight_layout()
|
|
plt.show()
|
|
|
|
# %% ../../modules/networks/networks_dev.ipynb 13
|
|
def compare_networks(networks: List[Sequential], network_names: List[str],
|
|
input_data: Tensor, title: str = "Network Comparison"):
|
|
"""
|
|
Compare different network architectures side-by-side.
|
|
|
|
Args:
|
|
networks: List of networks to compare
|
|
network_names: Names for each network
|
|
input_data: Input tensor to test with
|
|
title: Title for the plot
|
|
"""
|
|
if not _should_show_plots():
|
|
print("📊 Plots disabled during testing - this is normal!")
|
|
return
|
|
|
|
fig, axes = plt.subplots(2, len(networks), figsize=(6*len(networks), 10))
|
|
if len(networks) == 1:
|
|
axes = axes.reshape(2, -1)
|
|
|
|
for i, (network, name) in enumerate(zip(networks, network_names)):
|
|
# Get network output
|
|
output = network(input_data)
|
|
|
|
# Top row: Architecture visualization
|
|
ax_top = axes[0, i] if len(networks) > 1 else axes[0]
|
|
|
|
# Count layer types
|
|
layer_types = {}
|
|
for layer in network.layers:
|
|
layer_type = type(layer).__name__
|
|
layer_types[layer_type] = layer_types.get(layer_type, 0) + 1
|
|
|
|
# Create pie chart of layer types
|
|
if layer_types:
|
|
labels = list(layer_types.keys())
|
|
sizes = list(layer_types.values())
|
|
colors = plt.cm.Set3(np.linspace(0, 1, len(labels)))
|
|
|
|
ax_top.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors)
|
|
ax_top.set_title(f"{name}\nLayer Distribution")
|
|
|
|
# Bottom row: Output comparison
|
|
ax_bottom = axes[1, i] if len(networks) > 1 else axes[1]
|
|
|
|
output_data = output.data.flatten()
|
|
|
|
# Show output statistics
|
|
ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black')
|
|
ax_bottom.axvline(np.mean(output_data), color='red', linestyle='--',
|
|
label=f'Mean: {np.mean(output_data):.3f}')
|
|
ax_bottom.axvline(np.median(output_data), color='green', linestyle='--',
|
|
label=f'Median: {np.median(output_data):.3f}')
|
|
|
|
ax_bottom.set_title(f"{name} Output Distribution")
|
|
ax_bottom.set_xlabel("Output Value")
|
|
ax_bottom.set_ylabel("Frequency")
|
|
ax_bottom.legend()
|
|
ax_bottom.grid(True, alpha=0.3)
|
|
|
|
plt.suptitle(title, fontsize=16, fontweight='bold')
|
|
plt.tight_layout()
|
|
plt.show()
|
|
|
|
# %% ../../modules/networks/networks_dev.ipynb 15
|
|
def create_mlp(input_size: int, hidden_sizes: List[int], output_size: int,
|
|
activation=ReLU, output_activation=Sigmoid) -> Sequential:
|
|
"""
|
|
Create a Multi-Layer Perceptron (MLP) network.
|
|
|
|
Args:
|
|
input_size: Number of input features
|
|
hidden_sizes: List of hidden layer sizes
|
|
output_size: Number of output features
|
|
activation: Activation function for hidden layers
|
|
output_activation: Activation function for output layer
|
|
|
|
Returns:
|
|
Sequential network
|
|
"""
|
|
layers = []
|
|
|
|
# Input layer
|
|
if hidden_sizes:
|
|
layers.append(Dense(input_size, hidden_sizes[0]))
|
|
layers.append(activation())
|
|
|
|
# Hidden layers
|
|
for i in range(len(hidden_sizes) - 1):
|
|
layers.append(Dense(hidden_sizes[i], hidden_sizes[i + 1]))
|
|
layers.append(activation())
|
|
|
|
# Output layer
|
|
layers.append(Dense(hidden_sizes[-1], output_size))
|
|
else:
|
|
# Direct input to output
|
|
layers.append(Dense(input_size, output_size))
|
|
|
|
layers.append(output_activation())
|
|
|
|
return Sequential(layers)
|
|
|
|
# %% ../../modules/networks/networks_dev.ipynb 18
|
|
def analyze_network_behavior(network: Sequential, input_data: Tensor,
|
|
title: str = "Network Behavior Analysis"):
|
|
"""
|
|
Analyze how a network behaves with different types of input.
|
|
|
|
Args:
|
|
network: Network to analyze
|
|
input_data: Input tensor
|
|
title: Title for the plot
|
|
"""
|
|
if not _should_show_plots():
|
|
print("📊 Plots disabled during testing - this is normal!")
|
|
return
|
|
|
|
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
|
|
|
|
# 1. Input vs Output relationship
|
|
ax1 = axes[0, 0]
|
|
input_flat = input_data.data.flatten()
|
|
output = network(input_data)
|
|
output_flat = output.data.flatten()
|
|
|
|
ax1.scatter(input_flat, output_flat, alpha=0.6)
|
|
ax1.plot([input_flat.min(), input_flat.max()],
|
|
[input_flat.min(), input_flat.max()], 'r--', alpha=0.5, label='y=x')
|
|
ax1.set_xlabel('Input Values')
|
|
ax1.set_ylabel('Output Values')
|
|
ax1.set_title('Input vs Output')
|
|
ax1.legend()
|
|
ax1.grid(True, alpha=0.3)
|
|
|
|
# 2. Output distribution
|
|
ax2 = axes[0, 1]
|
|
ax2.hist(output_flat, bins=20, alpha=0.7, edgecolor='black')
|
|
ax2.axvline(np.mean(output_flat), color='red', linestyle='--',
|
|
label=f'Mean: {np.mean(output_flat):.3f}')
|
|
ax2.set_xlabel('Output Values')
|
|
ax2.set_ylabel('Frequency')
|
|
ax2.set_title('Output Distribution')
|
|
ax2.legend()
|
|
ax2.grid(True, alpha=0.3)
|
|
|
|
# 3. Layer-by-layer activation patterns
|
|
ax3 = axes[0, 2]
|
|
activations = []
|
|
x = input_data
|
|
|
|
for layer in network.layers:
|
|
x = layer(x)
|
|
if hasattr(layer, 'input_size'): # Dense layer
|
|
activations.append(np.mean(x.data))
|
|
else: # Activation layer
|
|
activations.append(np.mean(x.data))
|
|
|
|
ax3.plot(range(len(activations)), activations, 'bo-', linewidth=2, markersize=8)
|
|
ax3.set_xlabel('Layer Index')
|
|
ax3.set_ylabel('Mean Activation')
|
|
ax3.set_title('Layer-by-Layer Activations')
|
|
ax3.grid(True, alpha=0.3)
|
|
|
|
# 4. Network depth analysis
|
|
ax4 = axes[1, 0]
|
|
layer_types = [type(layer).__name__ for layer in network.layers]
|
|
layer_counts = {}
|
|
for layer_type in layer_types:
|
|
layer_counts[layer_type] = layer_counts.get(layer_type, 0) + 1
|
|
|
|
if layer_counts:
|
|
ax4.bar(layer_counts.keys(), layer_counts.values(), alpha=0.7)
|
|
ax4.set_xlabel('Layer Type')
|
|
ax4.set_ylabel('Count')
|
|
ax4.set_title('Layer Type Distribution')
|
|
ax4.grid(True, alpha=0.3)
|
|
|
|
# 5. Shape transformation
|
|
ax5 = axes[1, 1]
|
|
shapes = [input_data.shape]
|
|
x = input_data
|
|
|
|
for layer in network.layers:
|
|
x = layer(x)
|
|
shapes.append(x.shape)
|
|
|
|
layer_indices = range(len(shapes))
|
|
shape_sizes = [np.prod(shape) for shape in shapes]
|
|
|
|
ax5.plot(layer_indices, shape_sizes, 'go-', linewidth=2, markersize=8)
|
|
ax5.set_xlabel('Layer Index')
|
|
ax5.set_ylabel('Tensor Size')
|
|
ax5.set_title('Shape Transformation')
|
|
ax5.grid(True, alpha=0.3)
|
|
|
|
# 6. Network summary
|
|
ax6 = axes[1, 2]
|
|
ax6.axis('off')
|
|
|
|
summary_text = f"""
|
|
Network Summary:
|
|
• Total Layers: {len(network.layers)}
|
|
• Input Shape: {input_data.shape}
|
|
• Output Shape: {output.shape}
|
|
• Parameters: {sum(np.prod(layer.weights.data.shape) if hasattr(layer, 'weights') else 0 for layer in network.layers)}
|
|
• Architecture: {' → '.join([type(layer).__name__ for layer in network.layers])}
|
|
"""
|
|
|
|
ax6.text(0.05, 0.95, summary_text, transform=ax6.transAxes,
|
|
fontsize=10, verticalalignment='top', fontfamily='monospace')
|
|
|
|
plt.suptitle(title, fontsize=16, fontweight='bold')
|
|
plt.tight_layout()
|
|
plt.show()
|
|
|
|
# %% ../../modules/networks/networks_dev.ipynb 21
|
|
def create_classification_network(input_size: int, num_classes: int,
|
|
hidden_sizes: List[int] = None) -> Sequential:
|
|
"""
|
|
Create a network for classification problems.
|
|
|
|
Args:
|
|
input_size: Number of input features
|
|
num_classes: Number of output classes
|
|
hidden_sizes: List of hidden layer sizes (default: [input_size//2])
|
|
|
|
Returns:
|
|
Sequential network for classification
|
|
"""
|
|
if hidden_sizes is None:
|
|
hidden_sizes = [input_size // 2]
|
|
|
|
return create_mlp(
|
|
input_size=input_size,
|
|
hidden_sizes=hidden_sizes,
|
|
output_size=num_classes,
|
|
activation=ReLU,
|
|
output_activation=Sigmoid
|
|
)
|
|
|
|
# %% ../../modules/networks/networks_dev.ipynb 22
|
|
def create_regression_network(input_size: int, output_size: int = 1,
|
|
hidden_sizes: List[int] = None) -> Sequential:
|
|
"""
|
|
Create a network for regression problems.
|
|
|
|
Args:
|
|
input_size: Number of input features
|
|
output_size: Number of output values (default: 1)
|
|
hidden_sizes: List of hidden layer sizes (default: [input_size//2])
|
|
|
|
Returns:
|
|
Sequential network for regression
|
|
"""
|
|
if hidden_sizes is None:
|
|
hidden_sizes = [input_size // 2]
|
|
|
|
return create_mlp(
|
|
input_size=input_size,
|
|
hidden_sizes=hidden_sizes,
|
|
output_size=output_size,
|
|
activation=ReLU,
|
|
output_activation=Tanh # No activation for regression
|
|
)
|