mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-01 06:37:30 -05:00
✅ Renamed modules for clearer pedagogical flow: - 05_networks → 05_dense (multi-layer dense/fully connected networks) - 06_cnn → 06_spatial (convolutional networks for spatial patterns) - 06_attention → 07_attention (attention mechanisms for sequences) ✅ Shifted remaining modules down by 1: - 07_dataloader → 08_dataloader - 08_autograd → 09_autograd - 09_optimizers → 10_optimizers - 10_training → 11_training - 11_compression → 12_compression - 12_kernels → 13_kernels - 13_benchmarking → 14_benchmarking - 14_mlops → 15_mlops - 15_capstone → 16_capstone ✅ Updated module metadata (module.yaml files): - Updated names, descriptions, dependencies - Fixed prerequisite chains and enables relationships - Updated export paths to match new names New learner progression: Foundation → Individual Layers → Dense Networks → Spatial Networks → Attention Networks → Training Pipeline Perfect pedagogical flow: Build one layer → Stack dense layers → Add spatial patterns → Add attention mechanisms → Learn to train them all.
43 lines
1.2 KiB
YAML
43 lines
1.2 KiB
YAML
# TinyTorch Module Metadata
|
|
# Essential system information for CLI tools and build systems
|
|
|
|
name: "11_kernels"
|
|
title: "Kernels - Hardware-Aware Optimization"
|
|
description: "Custom operations, performance optimization, and hardware-aware computing for ML systems"
|
|
|
|
# Dependencies - Used by CLI for module ordering and prerequisites
|
|
dependencies:
|
|
prerequisites: [
|
|
"00_setup", "01_tensor", "02_activations", "03_layers",
|
|
"04_networks", "05_cnn", "06_dataloader", "07_autograd",
|
|
"08_optimizers", "09_training", "10_compression"
|
|
]
|
|
enables: ["12_benchmarking", "13_mlops"]
|
|
|
|
# Package Export - What gets built into tinytorch package
|
|
exports_to: "tinytorch.core.kernels"
|
|
|
|
# File Structure - What files exist in this module
|
|
files:
|
|
dev_file: "kernels_dev.py"
|
|
readme: "README.md"
|
|
tests: "inline"
|
|
|
|
# Educational Metadata
|
|
difficulty: "⭐⭐⭐⭐"
|
|
time_estimate: "8-10 hours"
|
|
|
|
# Components - What's implemented in this module
|
|
components:
|
|
- "matmul_custom"
|
|
- "relu_custom"
|
|
- "conv2d_custom"
|
|
- "matmul_vectorized"
|
|
- "matmul_cache_optimized"
|
|
- "matmul_parallel"
|
|
- "quantized_matmul"
|
|
- "sparse_matmul"
|
|
- "pruned_conv2d"
|
|
- "KernelProfiler"
|
|
- "PerformanceBenchmark"
|
|
- "HardwareProfiler" |