From b6f4a0bee654807286790ea8a4dc4ebc2ae1126c Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Tue, 30 Sep 2025 15:22:30 -0400 Subject: [PATCH] Add ASCII visualizations to Module 08 for understanding image data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added educational ASCII art showing: 1. **Actual pixel values** - What 8×8 digit images look like as numbers - Shows digits 5, 3, and 8 with real pixel values (0-16 range) - Helps students understand images are just 2D arrays 2. **Visual representation** - How humans see the digits - ASCII art showing recognizable digit shapes - Connects abstract numbers to concrete patterns 3. **Shape transformations** - How DataLoader batches data - Individual: (8, 8) → Batched: (32, 8, 8) - Shows what the model actually receives 4. **Complete example** - Loading and using tiny digits dataset - Real code showing datasets/tiny/digits_8x8.npz usage - Demonstrates the full DataLoader workflow Benefits: ✅ Students visualize what image data IS ✅ Understand DataLoader's batching transformation ✅ See connection between numbers and visual patterns ✅ Ready to work with real datasets in milestones This makes the abstract concept of 'image tensors' concrete and visual. --- .../source/08_dataloader/dataloader_dev.py | 51 +++++++++++++++++-- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/modules/source/08_dataloader/dataloader_dev.py b/modules/source/08_dataloader/dataloader_dev.py index 91d2577b..3b896521 100644 --- a/modules/source/08_dataloader/dataloader_dev.py +++ b/modules/source/08_dataloader/dataloader_dev.py @@ -357,7 +357,7 @@ def test_unit_tensordataset(): # Test basic functionality features = Tensor([[1, 2], [3, 4], [5, 6]]) # 3 samples, 2 features - labels = Tensor([0, 1, 0]) # 3 labels + labels = Tensor([0, 1, 0]) # 3 labels dataset = TensorDataset(features, labels) @@ -645,6 +645,45 @@ Module 08 (DataLoader) Examples & Milestones (Learn mechanics) (Apply to real data) ``` +### Understanding Image Data + +**What does image data actually look like?** + +Images are just 2D arrays of numbers (pixels). Here are actual 8×8 handwritten digits: + +``` +Digit "5" (8×8): Digit "3" (8×8): Digit "8" (8×8): + 0 0 12 13 5 0 0 0 0 0 11 12 0 0 0 0 0 0 10 14 8 1 0 0 + 0 0 13 15 10 0 0 0 0 2 16 16 16 7 0 0 0 0 16 15 15 9 0 0 + 0 3 15 13 16 7 0 0 0 0 8 16 8 0 0 0 0 0 15 5 5 13 0 0 + 0 8 13 6 15 4 0 0 0 0 0 12 13 0 0 0 0 1 16 5 5 13 0 0 + 0 0 0 6 16 5 0 0 0 0 1 16 15 9 0 0 0 6 16 16 16 16 1 0 + 0 0 5 15 16 9 0 0 0 0 14 16 16 16 7 0 1 16 3 1 1 15 1 0 + 0 0 9 16 9 0 0 0 0 5 16 8 8 16 0 0 0 9 16 16 16 15 0 0 + 0 0 0 0 0 0 0 0 0 3 16 16 16 12 0 0 0 0 0 0 0 0 0 0 + +Visual representation: +░█████░ ░█████░ ░█████░ +░█░░░█░ ░░░░░█░ █░░░░█░ +░░░░█░░ ░░███░░ ░█████░ +░░░█░░░ ░░░░█░░ █░░░░█░ +░░█░░░░ ░█████░ ░█████░ +``` + +**Shape transformations in DataLoader:** + +``` +Individual Sample (from Dataset): + image: (8, 8) ← Single 8×8 image + label: scalar ← Single digit (0-9) + +After DataLoader batching (batch_size=32): + images: (32, 8, 8) ← Stack of 32 images + labels: (32,) ← Array of 32 labels + +This is what your model sees during training! +``` + ### Quick Start with Real Data **Tiny Datasets (ships with TinyTorch):** @@ -657,12 +696,18 @@ labels = Tensor(data['labels']) # (1797,) dataset = TensorDataset(images, labels) loader = DataLoader(dataset, batch_size=32, shuffle=True) + +# Each batch contains real digit images! +for batch_images, batch_labels in loader: + # batch_images: (32, 8, 8) - 32 digit images + # batch_labels: (32,) - their labels (0-9) + break ``` **Full Datasets (for serious training):** ```python -# See milestones/03_mlp_revival_1986/ for MNIST download -# See milestones/04_cnn_revolution_1998/ for CIFAR-10 download +# See milestones/03_mlp_revival_1986/ for MNIST download (28×28 images) +# See milestones/04_cnn_revolution_1998/ for CIFAR-10 download (32×32×3 images) ``` ### What You've Accomplished