mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-12 14:33:33 -05:00
✅ Renamed modules for clearer pedagogical flow: - 05_networks → 05_dense (multi-layer dense/fully connected networks) - 06_cnn → 06_spatial (convolutional networks for spatial patterns) - 06_attention → 07_attention (attention mechanisms for sequences) ✅ Shifted remaining modules down by 1: - 07_dataloader → 08_dataloader - 08_autograd → 09_autograd - 09_optimizers → 10_optimizers - 10_training → 11_training - 11_compression → 12_compression - 12_kernels → 13_kernels - 13_benchmarking → 14_benchmarking - 14_mlops → 15_mlops - 15_capstone → 16_capstone ✅ Updated module metadata (module.yaml files): - Updated names, descriptions, dependencies - Fixed prerequisite chains and enables relationships - Updated export paths to match new names New learner progression: Foundation → Individual Layers → Dense Networks → Spatial Networks → Attention Networks → Training Pipeline Perfect pedagogical flow: Build one layer → Stack dense layers → Add spatial patterns → Add attention mechanisms → Learn to train them all.
127 lines
3.8 KiB
YAML
127 lines
3.8 KiB
YAML
name: "10_compression"
|
|
title: "Compression & Optimization"
|
|
description: "Making AI models efficient for real-world deployment"
|
|
version: "1.0.0"
|
|
author: "TinyTorch Team"
|
|
dependencies:
|
|
- "00_setup"
|
|
- "01_tensor"
|
|
- "03_layers"
|
|
- "04_networks"
|
|
- "09_training"
|
|
|
|
learning_goals:
|
|
- "Understand model size and deployment constraints"
|
|
- "Implement magnitude-based pruning for weight reduction"
|
|
- "Master quantization for memory efficiency"
|
|
- "Build knowledge distillation for compact models"
|
|
- "Create structured pruning for architecture optimization"
|
|
- "Compare compression techniques and their trade-offs"
|
|
|
|
components:
|
|
- name: "CompressionMetrics"
|
|
description: "Model size analysis and parameter counting"
|
|
type: "class"
|
|
|
|
- name: "prune_weights_by_magnitude"
|
|
description: "Remove unimportant weights from layers"
|
|
type: "function"
|
|
|
|
- name: "calculate_sparsity"
|
|
description: "Calculate fraction of zero weights"
|
|
type: "function"
|
|
|
|
- name: "prune_model_by_magnitude"
|
|
description: "Apply pruning to entire models"
|
|
type: "function"
|
|
|
|
- name: "quantize_layer_weights"
|
|
description: "Reduce parameter precision for memory savings"
|
|
type: "function"
|
|
|
|
- name: "DistillationLoss"
|
|
description: "Train compact models with teacher guidance"
|
|
type: "class"
|
|
|
|
- name: "prune_layer_neurons"
|
|
description: "Remove entire neurons/channels"
|
|
type: "function"
|
|
|
|
tests:
|
|
- name: "test_compression_metrics_comprehensive"
|
|
description: "Test model size analysis functionality"
|
|
|
|
- name: "test_magnitude_pruning_comprehensive"
|
|
description: "Test weight pruning algorithms"
|
|
|
|
- name: "test_quantization_comprehensive"
|
|
description: "Test precision reduction techniques"
|
|
|
|
- name: "test_distillation_comprehensive"
|
|
description: "Test knowledge distillation training"
|
|
|
|
- name: "test_structured_pruning_comprehensive"
|
|
description: "Test neuron/channel removal"
|
|
|
|
- name: "test_compression_integration_comprehensive"
|
|
description: "Test combined compression techniques"
|
|
|
|
educational_flow:
|
|
- step: 1
|
|
title: "Understanding Model Size"
|
|
description: "Learn to analyze and measure neural network parameters"
|
|
|
|
- step: 2
|
|
title: "Magnitude-Based Pruning"
|
|
description: "Remove unimportant weights based on magnitude"
|
|
|
|
- step: 3
|
|
title: "Quantization Experiments"
|
|
description: "Reduce precision for memory efficiency"
|
|
|
|
- step: 4
|
|
title: "Knowledge Distillation"
|
|
description: "Train compact models with teacher guidance"
|
|
|
|
- step: 5
|
|
title: "Structured Pruning"
|
|
description: "Remove entire neurons and channels"
|
|
|
|
- step: 6
|
|
title: "Comprehensive Comparison"
|
|
description: "Compare all techniques and combine for maximum benefit"
|
|
|
|
real_world_applications:
|
|
- "Mobile AI deployment (smartphone apps)"
|
|
- "Edge computing (IoT devices, sensors)"
|
|
- "Real-time inference (autonomous vehicles)"
|
|
- "Cost optimization (cloud inference)"
|
|
- "Battery efficiency (wearable devices)"
|
|
|
|
industry_connections:
|
|
- "MobileNet: Mobile-optimized architectures"
|
|
- "DistilBERT: Compressed language models"
|
|
- "TinyML: Microcontroller deployment"
|
|
- "Neural Architecture Search: Automated optimization"
|
|
|
|
assessment_criteria:
|
|
- "Implement 4 compression techniques correctly"
|
|
- "Understand accuracy vs efficiency trade-offs"
|
|
- "Measure compression effectiveness quantitatively"
|
|
- "Apply techniques to real neural networks"
|
|
- "Compare different compression strategies"
|
|
|
|
next_steps:
|
|
- "Module 11: Kernels - Hardware-aware optimization"
|
|
- "Module 12: Benchmarking - Performance measurement"
|
|
- "Module 13: MLOps - Production deployment"
|
|
|
|
# File Structure - What files exist in this module
|
|
files:
|
|
dev_file: "compression_dev.py"
|
|
readme: "README.md"
|
|
tests: "inline"
|
|
|
|
# Educational Metadata
|
|
difficulty: "⭐⭐⭐⭐"
|
|
time_estimate: "8-10 hours" |