TinyTorch/modules/03_layers_ABOUT.html


<!DOCTYPE html>


<html lang="en" data-content_root="../" >

  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

    <title>03. Layers &#8212; Tiny🔥Torch</title>


  <script data-cfasync="false">
    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
  </script>

  <!-- Loaded before other Sphinx assets -->
  <link href="../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
<link href="../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />


  <link href="../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
  <link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />

    <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=03e43079" />
    <link rel="stylesheet" type="text/css" href="../_static/styles/sphinx-book-theme.css?v=eba8b062" />
    <link rel="stylesheet" type="text/css" href="../_static/togglebutton.css?v=13237357" />
    <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
    <link rel="stylesheet" type="text/css" href="../_static/mystnb.8ecb98da25f57f5357bf6f572d296f466b2cfe2517ffebfabe82451661e28f02.css" />
    <link rel="stylesheet" type="text/css" href="../_static/sphinx-thebe.css?v=4fa983c6" />
    <link rel="stylesheet" type="text/css" href="../_static/sphinx-design.min.css?v=95c83b7e" />
    <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=009d37f4" />

  <!-- Pre-loaded scripts that we'll load fully later -->
  <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
  <script src="../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>

    <script src="../_static/documentation_options.js?v=9eb32ce0"></script>
    <script src="../_static/doctools.js?v=9a2dae69"></script>
    <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
    <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
    <script src="../_static/copybutton.js?v=f281be69"></script>
    <script src="../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
    <script>let toggleHintShow = 'Click to show';</script>
    <script>let toggleHintHide = 'Click to hide';</script>
    <script>let toggleOpenOnPrint = 'true';</script>
    <script src="../_static/togglebutton.js?v=4a39c7ea"></script>
    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
    <script src="../_static/design-tabs.js?v=f930bc37"></script>
    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"; const thebe_selector = ".thebe,.cell"; const thebe_selector_input = "pre"; const thebe_selector_output = ".output, .cell_output"</script>
    <script async="async" src="../_static/sphinx-thebe.js?v=c100c467"></script>
    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"; const thebe_selector = ".thebe,.cell"; const thebe_selector_input = "pre"; const thebe_selector_output = ".output, .cell_output"</script>
    <script>DOCUMENTATION_OPTIONS.pagename = 'modules/03_layers_ABOUT';</script>
    <script src="../_static/ml-timeline.js?v=76e9b3e3"></script>
    <script src="../_static/wip-banner.js?v=04a7e74d"></script>
    <script src="../_static/marimo-badges.js?v=e6289128"></script>
    <script src="../_static/sidebar-link.js?v=404b701b"></script>
    <script src="../_static/hero-carousel.js?v=10341d2a"></script>
    <script src="../_static/subscribe-modal.js?v=42919b64"></script>
    <link rel="icon" href="../_static/favicon.svg"/>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="next" title="04. Loss Functions" href="04_losses_ABOUT.html" />
    <link rel="prev" title="02. Activations" href="02_activations_ABOUT.html" />
  <meta name="viewport" content="width=device-width, initial-scale=1"/>
  <meta name="docsearch:language" content="en"/>
  </head>


  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">


  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>

  <div id="pst-scroll-pixel-helper"></div>

  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
    <i class="fa-solid fa-arrow-up"></i>Back to top</button>


  <input type="checkbox"
          class="sidebar-toggle"
          id="pst-primary-sidebar-checkbox"/>
  <label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>

  <input type="checkbox"
          class="sidebar-toggle"
          id="pst-secondary-sidebar-checkbox"/>
  <label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>

  <div class="search-button__wrapper">
    <div class="search-button__overlay"></div>
    <div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
      action="../search.html"
      method="get">
  <i class="fa-solid fa-magnifying-glass"></i>
  <input type="search"
         class="form-control"
         name="q"
         id="search-input"
         placeholder="Search..."
         aria-label="Search..."
         autocomplete="off"
         autocorrect="off"
         autocapitalize="off"
         spellcheck="false"/>
  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
  </div>

  <div class="pst-async-banner-revealer d-none">
  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>


    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
    </header>


  <div class="bd-container">
    <div class="bd-container__inner bd-page-width">


      <div class="bd-sidebar-primary bd-sidebar">


  <div class="sidebar-header-items sidebar-primary__section">


  </div>

    <div class="sidebar-primary-items__start sidebar-primary__section">
        <div class="sidebar-primary-item">


<a class="navbar-brand logo" href="../intro.html">


    <img src="../_static/logo-tinytorch.png" class="logo__image only-light" alt="Tiny🔥Torch - Home"/>
    <script>document.write(`<img src="../_static/logo-tinytorch.png" class="logo__image only-dark" alt="Tiny🔥Torch - Home"/>`);</script>


</a></div>
        <div class="sidebar-primary-item">

 <script>
 document.write(`
   <button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <i class="fa-solid fa-magnifying-glass"></i>
    <span class="search-button__default-text">Search</span>
    <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
   </button>
 `);
 </script></div>
        <div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
    <div class="bd-toc-item navbar-nav active">
        <p aria-level="2" class="caption" role="heading"><span class="caption-text">🚀 Getting Started</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../getting-started.html">Complete Guide</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🏗 Foundation Tier (01-07)</span></p>
<ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../tiers/foundation.html">📖 Tier Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="01_tensor_ABOUT.html">01. Tensor</a></li>
<li class="toctree-l1"><a class="reference internal" href="02_activations_ABOUT.html">02. Activations</a></li>
<li class="toctree-l1 current active"><a class="current reference internal" href="#">03. Layers</a></li>
<li class="toctree-l1"><a class="reference internal" href="04_losses_ABOUT.html">04. Losses</a></li>
<li class="toctree-l1"><a class="reference internal" href="05_autograd_ABOUT.html">05. Autograd</a></li>
<li class="toctree-l1"><a class="reference internal" href="06_optimizers_ABOUT.html">06. Optimizers</a></li>
<li class="toctree-l1"><a class="reference internal" href="07_training_ABOUT.html">07. Training</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🏛️ Architecture Tier (08-13)</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../tiers/architecture.html">📖 Tier Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="08_dataloader_ABOUT.html">08. DataLoader</a></li>
<li class="toctree-l1"><a class="reference internal" href="09_spatial_ABOUT.html">09. Convolutions</a></li>
<li class="toctree-l1"><a class="reference internal" href="10_tokenization_ABOUT.html">10. Tokenization</a></li>
<li class="toctree-l1"><a class="reference internal" href="11_embeddings_ABOUT.html">11. Embeddings</a></li>
<li class="toctree-l1"><a class="reference internal" href="12_attention_ABOUT.html">12. Attention</a></li>
<li class="toctree-l1"><a class="reference internal" href="13_transformers_ABOUT.html">13. Transformers</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">⏱️ Optimization Tier (14-19)</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../tiers/optimization.html">📖 Tier Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="14_profiling_ABOUT.html">14. Profiling</a></li>
<li class="toctree-l1"><a class="reference internal" href="15_quantization_ABOUT.html">15. Quantization</a></li>
<li class="toctree-l1"><a class="reference internal" href="16_compression_ABOUT.html">16. Compression</a></li>
<li class="toctree-l1"><a class="reference internal" href="17_memoization_ABOUT.html">17. Memoization</a></li>
<li class="toctree-l1"><a class="reference internal" href="18_acceleration_ABOUT.html">18. Acceleration</a></li>
<li class="toctree-l1"><a class="reference internal" href="19_benchmarking_ABOUT.html">19. Benchmarking</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🏅 Capstone Competition</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../tiers/olympics.html">📖 Competition Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="20_capstone_ABOUT.html">20. Torch Olympics</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🧭 Course Orientation</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../chapters/00-introduction.html">Course Structure</a></li>
<li class="toctree-l1"><a class="reference internal" href="../prerequisites.html">Prerequisites &amp; Resources</a></li>
<li class="toctree-l1"><a class="reference internal" href="../chapters/learning-journey.html">Learning Journey</a></li>
<li class="toctree-l1"><a class="reference internal" href="../chapters/milestones.html">Historical Milestones</a></li>
<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🛠️ TITO CLI Reference</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../tito/overview.html">Command Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tito/modules.html">Module Workflow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tito/milestones.html">Milestone System</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tito/data.html">Progress &amp; Data</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tito/troubleshooting.html">Troubleshooting</a></li>
<li class="toctree-l1"><a class="reference internal" href="../datasets.html">Datasets Guide</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🤝 Community</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../community.html">Ecosystem</a></li>
<li class="toctree-l1"><a class="reference internal" href="../resources.html">Learning Resources</a></li>
<li class="toctree-l1"><a class="reference internal" href="../credits.html">Credits &amp; Acknowledgments</a></li>
</ul>

    </div>
</nav></div>
    </div>


  <div class="sidebar-primary-items__end sidebar-primary__section">
  </div>

  <div id="rtd-footer-container"></div>


      </div>

      <main id="main-content" class="bd-main" role="main">


<div class="sbt-scroll-pixel-helper"></div>

          <div class="bd-content">
            <div class="bd-article-container">

              <div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">

    <div class="header-article-items__start">

        <div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
  <span class="fa-solid fa-bars"></span>
</button></div>

    </div>


    <div class="header-article-items__end">

        <div class="header-article-item">

<div class="article-header-buttons">


<div class="dropdown dropdown-download-buttons">
  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
    <i class="fas fa-download"></i>
  </button>
  <ul class="dropdown-menu">


      <li><a href="../_sources/modules/03_layers_ABOUT.md" target="_blank"
   class="btn btn-sm btn-download-source-button dropdown-item"
   title="Download source file"
   data-bs-placement="left" data-bs-toggle="tooltip"
>


<span class="btn__icon-container">
  <i class="fas fa-file"></i>
  </span>
<span class="btn__text-container">.md</span>
</a>
</li>


      <li>
<button onclick="window.print()"
  class="btn btn-sm btn-download-pdf-button dropdown-item"
  title="Print to PDF"
  data-bs-placement="left" data-bs-toggle="tooltip"
>


<span class="btn__icon-container">
  <i class="fas fa-file-pdf"></i>
  </span>
<span class="btn__text-container">.pdf</span>
</button>
</li>

  </ul>
</div>


<button onclick="toggleFullScreen()"
  class="btn btn-sm btn-fullscreen-button"
  title="Fullscreen mode"
  data-bs-placement="bottom" data-bs-toggle="tooltip"
>


<span class="btn__icon-container">
  <i class="fas fa-expand"></i>
  </span>

</button>


<script>
document.write(`
  <button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
    <i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
    <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
  </button>
`);
</script>


<script>
document.write(`
  <button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
  </button>
`);
</script>
<button class="sidebar-toggle secondary-toggle btn btn-sm" title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <span class="fa-solid fa-list"></span>
</button>
</div></div>

    </div>

</div>
</div>


<div id="jb-print-docs-body" class="onlyprint">
    <h1>03. Layers</h1>
    <!-- Table of contents -->
    <div id="print-main-content">
        <div id="jb-print-toc">

            <div>
                <h2> Contents </h2>
            </div>
            <nav aria-label="Page">
                <ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#learning-objectives">Learning Objectives</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#build-use-reflect">Build → Use → Reflect</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-guide">Implementation Guide</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#linear-layer-the-neural-network-workhorse">Linear Layer: The Neural Network Workhorse</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dropout-preventing-overfitting">Dropout: Preventing Overfitting</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#layer-composition-building-neural-networks">Layer Composition: Building Neural Networks</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#getting-started">Getting Started</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prerequisites">Prerequisites</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#development-workflow">Development Workflow</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#testing">Testing</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comprehensive-test-suite">Comprehensive Test Suite</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#test-coverage-areas">Test Coverage Areas</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#inline-testing-validation">Inline Testing &amp; Validation</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#manual-testing-examples">Manual Testing Examples</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#systems-thinking-questions">Systems Thinking Questions</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#real-world-applications">Real-World Applications</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#mathematical-foundations">Mathematical Foundations</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture-design-patterns">Architecture Design Patterns</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#performance-characteristics">Performance Characteristics</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ready-to-build">Ready to Build?</a></li>
</ul>
            </nav>
        </div>
    </div>
</div>


<div id="searchbox"></div>
                <article class="bd-article">

  <section id="layers">
<h1>03. Layers<a class="headerlink" href="#layers" title="Link to this heading">#</a></h1>
<p><strong>FOUNDATION TIER</strong> | Difficulty: ⭐⭐ (2/4) | Time: 4-5 hours</p>
<section id="overview">
<h2>Overview<a class="headerlink" href="#overview" title="Link to this heading">#</a></h2>
<p>Build the fundamental building blocks that compose into neural networks. This module teaches you that layers are simply functions that transform tensors, with learnable parameters that define the transformation. You’ll implement Linear layers (the workhorse of deep learning) and Dropout regularization, understanding how these simple abstractions enable arbitrarily complex architectures through composition.</p>
</section>
<section id="learning-objectives">
<h2>Learning Objectives<a class="headerlink" href="#learning-objectives" title="Link to this heading">#</a></h2>
<p>By the end of this module, you will be able to:</p>
<ul class="simple">
<li><p><strong>Understand Layer Abstraction</strong>: Recognize layers as composable functions with parameters, mirroring PyTorch’s <code class="docutils literal notranslate"><span class="pre">torch.nn.Module</span></code> design pattern</p></li>
<li><p><strong>Implement Linear Transformations</strong>: Build <code class="docutils literal notranslate"><span class="pre">y</span> <span class="pre">=</span> <span class="pre">xW</span> <span class="pre">+</span> <span class="pre">b</span></code> with proper Xavier initialization to prevent gradient vanishing/explosion</p></li>
<li><p><strong>Master Parameter Management</strong>: Track trainable parameters using <code class="docutils literal notranslate"><span class="pre">parameters()</span></code> method for optimizer integration</p></li>
<li><p><strong>Build Dropout Regularization</strong>: Implement training/inference mode switching with proper scaling to prevent overfitting</p></li>
<li><p><strong>Analyze Memory Scaling</strong>: Calculate parameter counts and understand how network architecture affects memory footprint</p></li>
</ul>
</section>
<section id="build-use-reflect">
<h2>Build → Use → Reflect<a class="headerlink" href="#build-use-reflect" title="Link to this heading">#</a></h2>
<p>This module follows TinyTorch’s <strong>Build → Use → Reflect</strong> framework:</p>
<ol class="arabic simple">
<li><p><strong>Build</strong>: Implement Linear and Dropout layer classes with proper initialization, forward passes, and parameter tracking</p></li>
<li><p><strong>Use</strong>: Compose layers manually to create multi-layer networks for MNIST digit classification</p></li>
<li><p><strong>Reflect</strong>: Analyze memory scaling, computational complexity, and the trade-offs between model capacity and efficiency</p></li>
</ol>
</section>
<section id="implementation-guide">
<h2>Implementation Guide<a class="headerlink" href="#implementation-guide" title="Link to this heading">#</a></h2>
<section id="linear-layer-the-neural-network-workhorse">
<h3>Linear Layer: The Neural Network Workhorse<a class="headerlink" href="#linear-layer-the-neural-network-workhorse" title="Link to this heading">#</a></h3>
<p>The Linear layer implements the fundamental transformation <code class="docutils literal notranslate"><span class="pre">y</span> <span class="pre">=</span> <span class="pre">xW</span> <span class="pre">+</span> <span class="pre">b</span></code>:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">tinytorch.core.layers</span><span class="w"> </span><span class="kn">import</span> <span class="n">Linear</span>

<span class="c1"># Create a linear transformation: 784 input features → 256 output features</span>
<span class="n">layer</span> <span class="o">=</span> <span class="n">Linear</span><span class="p">(</span><span class="mi">784</span><span class="p">,</span> <span class="mi">256</span><span class="p">)</span>

<span class="c1"># Forward pass: transform input batch</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">Tensor</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span> <span class="mi">784</span><span class="p">))</span>  <span class="c1"># 32 images, 784 pixels each</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>  <span class="c1"># Output: (32, 256)</span>

<span class="c1"># Access trainable parameters</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Weight shape: </span><span class="si">{</span><span class="n">layer</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>  <span class="c1"># (784, 256)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Bias shape: </span><span class="si">{</span><span class="n">layer</span><span class="o">.</span><span class="n">bias</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>      <span class="c1"># (256,)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Total params: </span><span class="si">{</span><span class="mi">784</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">256</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">256</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>     <span class="c1"># 200,960 parameters</span>
</pre></div>
</div>
<p><strong>Key Design Decisions:</strong></p>
<ul class="simple">
<li><p><strong>Xavier Initialization</strong>: Weights scaled by <code class="docutils literal notranslate"><span class="pre">sqrt(1/in_features)</span></code> to maintain gradient flow through deep networks</p></li>
<li><p><strong>Parameter Tracking</strong>: <code class="docutils literal notranslate"><span class="pre">parameters()</span></code> method returns list of tensors with <code class="docutils literal notranslate"><span class="pre">requires_grad=True</span></code> for optimizer compatibility</p></li>
<li><p><strong>Bias Handling</strong>: Optional bias parameter (<code class="docutils literal notranslate"><span class="pre">bias=False</span></code> for architectures like batch normalization)</p></li>
</ul>
</section>
<section id="dropout-preventing-overfitting">
<h3>Dropout: Preventing Overfitting<a class="headerlink" href="#dropout-preventing-overfitting" title="Link to this heading">#</a></h3>
<p>Dropout randomly zeros elements during training to force network robustness:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">tinytorch.core.layers</span><span class="w"> </span><span class="kn">import</span> <span class="n">Dropout</span>

<span class="c1"># Create dropout with 50% probability</span>
<span class="n">dropout</span> <span class="o">=</span> <span class="n">Dropout</span><span class="p">(</span><span class="n">p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>

<span class="n">x</span> <span class="o">=</span> <span class="n">Tensor</span><span class="p">([</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">2.0</span><span class="p">,</span> <span class="mf">3.0</span><span class="p">,</span> <span class="mf">4.0</span><span class="p">])</span>

<span class="c1"># Training mode: randomly zero elements and scale by 1/(1-p)</span>
<span class="n">y_train</span> <span class="o">=</span> <span class="n">dropout</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># Example output: [2.0, 0.0, 6.0, 0.0] - survivors scaled by 2.0</span>

<span class="c1"># Inference mode: pass through unchanged</span>
<span class="n">y_eval</span> <span class="o">=</span> <span class="n">dropout</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="c1"># Output: [1.0, 2.0, 3.0, 4.0] - no dropout applied</span>
</pre></div>
</div>
<p><strong>Why Inverted Dropout?</strong>
During training, surviving elements are scaled by <code class="docutils literal notranslate"><span class="pre">1/(1-p)</span></code> so that expected values match during inference. This eliminates the need to scale during evaluation, making deployment simpler.</p>
</section>
<section id="layer-composition-building-neural-networks">
<h3>Layer Composition: Building Neural Networks<a class="headerlink" href="#layer-composition-building-neural-networks" title="Link to this heading">#</a></h3>
<p>Layers compose through sequential application - no container needed:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">tinytorch.core.layers</span><span class="w"> </span><span class="kn">import</span> <span class="n">Linear</span><span class="p">,</span> <span class="n">Dropout</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">tinytorch.core.activations</span><span class="w"> </span><span class="kn">import</span> <span class="n">ReLU</span>

<span class="c1"># Build 3-layer MNIST classifier manually</span>
<span class="n">layer1</span> <span class="o">=</span> <span class="n">Linear</span><span class="p">(</span><span class="mi">784</span><span class="p">,</span> <span class="mi">256</span><span class="p">)</span>
<span class="n">activation1</span> <span class="o">=</span> <span class="n">ReLU</span><span class="p">()</span>
<span class="n">dropout1</span> <span class="o">=</span> <span class="n">Dropout</span><span class="p">(</span><span class="mf">0.5</span><span class="p">)</span>

<span class="n">layer2</span> <span class="o">=</span> <span class="n">Linear</span><span class="p">(</span><span class="mi">256</span><span class="p">,</span> <span class="mi">128</span><span class="p">)</span>
<span class="n">activation2</span> <span class="o">=</span> <span class="n">ReLU</span><span class="p">()</span>
<span class="n">dropout2</span> <span class="o">=</span> <span class="n">Dropout</span><span class="p">(</span><span class="mf">0.3</span><span class="p">)</span>

<span class="n">layer3</span> <span class="o">=</span> <span class="n">Linear</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>

<span class="c1"># Forward pass: explicit composition shows data flow</span>
<span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">layer1</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">activation1</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">dropout1</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">layer2</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">activation2</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">dropout2</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">layer3</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
    <span class="k">return</span> <span class="n">x</span>

<span class="c1"># Process batch</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">Tensor</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">32</span><span class="p">,</span> <span class="mi">784</span><span class="p">))</span>  <span class="c1"># 32 MNIST images</span>
<span class="n">output</span> <span class="o">=</span> <span class="n">forward</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>  <span class="c1"># Shape: (32, 10) - class logits</span>

<span class="c1"># Collect all parameters for training</span>
<span class="n">all_params</span> <span class="o">=</span> <span class="n">layer1</span><span class="o">.</span><span class="n">parameters</span><span class="p">()</span> <span class="o">+</span> <span class="n">layer2</span><span class="o">.</span><span class="n">parameters</span><span class="p">()</span> <span class="o">+</span> <span class="n">layer3</span><span class="o">.</span><span class="n">parameters</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Total trainable parameters: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">all_params</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>  <span class="c1"># 6 tensors (3 weights, 3 biases)</span>
</pre></div>
</div>
</section>
</section>
<section id="getting-started">
<h2>Getting Started<a class="headerlink" href="#getting-started" title="Link to this heading">#</a></h2>
<section id="prerequisites">
<h3>Prerequisites<a class="headerlink" href="#prerequisites" title="Link to this heading">#</a></h3>
<p>Ensure you’ve completed the prerequisite modules:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># Activate TinyTorch environment</span>
<span class="nb">source</span><span class="w"> </span>scripts/activate-tinytorch

<span class="c1"># Verify Module 01 (Tensor) is complete</span>
tito<span class="w"> </span><span class="nb">test</span><span class="w"> </span>tensor

<span class="c1"># Verify Module 02 (Activations) is complete</span>
tito<span class="w"> </span><span class="nb">test</span><span class="w"> </span>activations
</pre></div>
</div>
</section>
<section id="development-workflow">
<h3>Development Workflow<a class="headerlink" href="#development-workflow" title="Link to this heading">#</a></h3>
<ol class="arabic simple">
<li><p><strong>Open the development file</strong>: <code class="docutils literal notranslate"><span class="pre">modules/03_layers/layers_dev.py</span></code></p></li>
<li><p><strong>Implement Linear layer</strong>: Build <code class="docutils literal notranslate"><span class="pre">__init__</span></code> with Xavier initialization, <code class="docutils literal notranslate"><span class="pre">forward</span></code> with matrix multiplication, and <code class="docutils literal notranslate"><span class="pre">parameters()</span></code> method</p></li>
<li><p><strong>Add Dropout layer</strong>: Implement training/inference mode switching with proper mask generation and scaling</p></li>
<li><p><strong>Test layer composition</strong>: Verify manual composition of multi-layer networks with mixed layer types</p></li>
<li><p><strong>Analyze systems behavior</strong>: Run memory analysis to understand parameter scaling with network size</p></li>
<li><p><strong>Export and verify</strong>: <code class="docutils literal notranslate"><span class="pre">tito</span> <span class="pre">module</span> <span class="pre">complete</span> <span class="pre">03</span> <span class="pre">&amp;&amp;</span> <span class="pre">tito</span> <span class="pre">test</span> <span class="pre">layers</span></code></p></li>
</ol>
</section>
</section>
<section id="testing">
<h2>Testing<a class="headerlink" href="#testing" title="Link to this heading">#</a></h2>
<section id="comprehensive-test-suite">
<h3>Comprehensive Test Suite<a class="headerlink" href="#comprehensive-test-suite" title="Link to this heading">#</a></h3>
<p>Run the full test suite to verify layer functionality:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># TinyTorch CLI (recommended)</span>
tito<span class="w"> </span><span class="nb">test</span><span class="w"> </span>layers

<span class="c1"># Direct pytest execution</span>
python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>tests/<span class="w"> </span>-k<span class="w"> </span>layers<span class="w"> </span>-v
</pre></div>
</div>
</section>
<section id="test-coverage-areas">
<h3>Test Coverage Areas<a class="headerlink" href="#test-coverage-areas" title="Link to this heading">#</a></h3>
<ul class="simple">
<li><p>✅ <strong>Linear Layer Functionality</strong>: Verify <code class="docutils literal notranslate"><span class="pre">y</span> <span class="pre">=</span> <span class="pre">xW</span> <span class="pre">+</span> <span class="pre">b</span></code> computation with correct matrix dimensions and broadcasting</p></li>
<li><p>✅ <strong>Xavier Initialization</strong>: Ensure weights scaled by <code class="docutils literal notranslate"><span class="pre">sqrt(1/in_features)</span></code> for gradient stability</p></li>
<li><p>✅ <strong>Parameter Management</strong>: Confirm <code class="docutils literal notranslate"><span class="pre">parameters()</span></code> returns all trainable tensors with <code class="docutils literal notranslate"><span class="pre">requires_grad=True</span></code></p></li>
<li><p>✅ <strong>Dropout Training Mode</strong>: Validate probabilistic masking with correct <code class="docutils literal notranslate"><span class="pre">1/(1-p)</span></code> scaling</p></li>
<li><p>✅ <strong>Dropout Inference Mode</strong>: Verify passthrough behavior without modification during evaluation</p></li>
<li><p>✅ <strong>Layer Composition</strong>: Test multi-layer forward passes with mixed layer types</p></li>
<li><p>✅ <strong>Edge Cases</strong>: Handle empty batches, single samples, no-bias configurations, and probability boundaries</p></li>
</ul>
</section>
<section id="inline-testing-validation">
<h3>Inline Testing &amp; Validation<a class="headerlink" href="#inline-testing-validation" title="Link to this heading">#</a></h3>
<p>The module includes comprehensive inline tests with educational feedback:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Example inline test output</span>
<span class="err">🔬</span> <span class="n">Unit</span> <span class="n">Test</span><span class="p">:</span> <span class="n">Linear</span> <span class="n">Layer</span><span class="o">...</span>
<span class="err">✅</span> <span class="n">Linear</span> <span class="n">layer</span> <span class="n">computes</span> <span class="n">y</span> <span class="o">=</span> <span class="n">xW</span> <span class="o">+</span> <span class="n">b</span> <span class="n">correctly</span>
<span class="err">✅</span> <span class="n">Weight</span> <span class="n">initialization</span> <span class="n">within</span> <span class="n">expected</span> <span class="n">Xavier</span> <span class="nb">range</span>
<span class="err">✅</span> <span class="n">Bias</span> <span class="n">initialized</span> <span class="n">to</span> <span class="n">zeros</span>
<span class="err">✅</span> <span class="n">Output</span> <span class="n">shape</span> <span class="n">matches</span> <span class="n">expected</span> <span class="n">dimensions</span> <span class="p">(</span><span class="mi">32</span><span class="p">,</span> <span class="mi">256</span><span class="p">)</span>
<span class="err">✅</span> <span class="n">Parameter</span> <span class="nb">list</span> <span class="n">contains</span> <span class="n">weight</span> <span class="ow">and</span> <span class="n">bias</span> <span class="n">tensors</span>
<span class="err">📈</span> <span class="n">Progress</span><span class="p">:</span> <span class="n">Linear</span> <span class="n">Layer</span> <span class="err">✓</span>

<span class="err">🔬</span> <span class="n">Unit</span> <span class="n">Test</span><span class="p">:</span> <span class="n">Dropout</span> <span class="n">Layer</span><span class="o">...</span>
<span class="err">✅</span> <span class="n">Inference</span> <span class="n">mode</span> <span class="n">passes</span> <span class="n">through</span> <span class="n">unchanged</span>
<span class="err">✅</span> <span class="n">Training</span> <span class="n">mode</span> <span class="n">zeros</span> <span class="o">~</span><span class="mi">50</span><span class="o">%</span> <span class="n">of</span> <span class="n">elements</span>
<span class="err">✅</span> <span class="n">Survivors</span> <span class="n">scaled</span> <span class="n">by</span> <span class="mi">1</span><span class="o">/</span><span class="p">(</span><span class="mi">1</span><span class="o">-</span><span class="n">p</span><span class="p">)</span> <span class="o">=</span> <span class="mf">2.0</span>
<span class="err">✅</span> <span class="n">Zero</span> <span class="n">dropout</span> <span class="p">(</span><span class="n">p</span><span class="o">=</span><span class="mf">0.0</span><span class="p">)</span> <span class="n">preserves</span> <span class="nb">all</span> <span class="n">values</span>
<span class="err">✅</span> <span class="n">Full</span> <span class="n">dropout</span> <span class="p">(</span><span class="n">p</span><span class="o">=</span><span class="mf">1.0</span><span class="p">)</span> <span class="n">zeros</span> <span class="n">everything</span>
<span class="err">📈</span> <span class="n">Progress</span><span class="p">:</span> <span class="n">Dropout</span> <span class="n">Layer</span> <span class="err">✓</span>

<span class="err">🔬</span> <span class="n">Integration</span> <span class="n">Test</span><span class="p">:</span> <span class="n">Multi</span><span class="o">-</span><span class="n">layer</span> <span class="n">Network</span><span class="o">...</span>
<span class="err">✅</span> <span class="mi">3</span><span class="o">-</span><span class="n">layer</span> <span class="n">network</span> <span class="n">processes</span> <span class="n">batch</span><span class="p">:</span> <span class="p">(</span><span class="mi">32</span><span class="p">,</span> <span class="mi">784</span><span class="p">)</span> <span class="err">→</span> <span class="p">(</span><span class="mi">32</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
<span class="err">✅</span> <span class="n">Parameter</span> <span class="n">count</span><span class="p">:</span> <span class="mi">235</span><span class="p">,</span><span class="mi">146</span> <span class="n">parameters</span> <span class="n">across</span> <span class="mi">6</span> <span class="n">tensors</span>
<span class="err">✅</span> <span class="n">All</span> <span class="n">parameters</span> <span class="n">have</span> <span class="n">requires_grad</span><span class="o">=</span><span class="kc">True</span>
<span class="err">📈</span> <span class="n">Progress</span><span class="p">:</span> <span class="n">Layer</span> <span class="n">Composition</span> <span class="err">✓</span>
</pre></div>
</div>
</section>
<section id="manual-testing-examples">
<h3>Manual Testing Examples<a class="headerlink" href="#manual-testing-examples" title="Link to this heading">#</a></h3>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">tinytorch.core.tensor</span><span class="w"> </span><span class="kn">import</span> <span class="n">Tensor</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">tinytorch.core.layers</span><span class="w"> </span><span class="kn">import</span> <span class="n">Linear</span><span class="p">,</span> <span class="n">Dropout</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">tinytorch.core.activations</span><span class="w"> </span><span class="kn">import</span> <span class="n">ReLU</span>

<span class="c1"># Test Linear layer forward pass</span>
<span class="n">layer</span> <span class="o">=</span> <span class="n">Linear</span><span class="p">(</span><span class="mi">784</span><span class="p">,</span> <span class="mi">256</span><span class="p">)</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">Tensor</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">784</span><span class="p">))</span>  <span class="c1"># Single MNIST image</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Input: </span><span class="si">{</span><span class="n">x</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s2"> → Output: </span><span class="si">{</span><span class="n">y</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>  <span class="c1"># (1, 784) → (1, 256)</span>

<span class="c1"># Test parameter counting</span>
<span class="n">params</span> <span class="o">=</span> <span class="n">layer</span><span class="o">.</span><span class="n">parameters</span><span class="p">()</span>
<span class="n">total</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">size</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">params</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Parameters: </span><span class="si">{</span><span class="n">total</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>  <span class="c1"># 200,960</span>

<span class="c1"># Test Dropout behavior</span>
<span class="n">dropout</span> <span class="o">=</span> <span class="n">Dropout</span><span class="p">(</span><span class="mf">0.5</span><span class="p">)</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">Tensor</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">((</span><span class="mi">1</span><span class="p">,</span> <span class="mi">100</span><span class="p">)))</span>
<span class="n">y_train</span> <span class="o">=</span> <span class="n">dropout</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">y_eval</span> <span class="o">=</span> <span class="n">dropout</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">training</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Training: ~</span><span class="si">{</span><span class="n">np</span><span class="o">.</span><span class="n">count_nonzero</span><span class="p">(</span><span class="n">y_train</span><span class="o">.</span><span class="n">data</span><span class="p">)</span><span class="si">}</span><span class="s2"> survived&quot;</span><span class="p">)</span>  <span class="c1"># ~50</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Inference: </span><span class="si">{</span><span class="n">np</span><span class="o">.</span><span class="n">count_nonzero</span><span class="p">(</span><span class="n">y_eval</span><span class="o">.</span><span class="n">data</span><span class="p">)</span><span class="si">}</span><span class="s2"> survived&quot;</span><span class="p">)</span>   <span class="c1"># 100</span>

<span class="c1"># Test composition</span>
<span class="n">net</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">layer3</span><span class="p">(</span><span class="n">dropout2</span><span class="p">(</span><span class="n">activation2</span><span class="p">(</span><span class="n">layer2</span><span class="p">(</span><span class="n">dropout1</span><span class="p">(</span><span class="n">activation1</span><span class="p">(</span><span class="n">layer1</span><span class="p">(</span><span class="n">x</span><span class="p">)))))))</span>
</pre></div>
</div>
</section>
</section>
<section id="systems-thinking-questions">
<h2>Systems Thinking Questions<a class="headerlink" href="#systems-thinking-questions" title="Link to this heading">#</a></h2>
<section id="real-world-applications">
<h3>Real-World Applications<a class="headerlink" href="#real-world-applications" title="Link to this heading">#</a></h3>
<ul class="simple">
<li><p><strong>Computer Vision</strong>: How do Linear layers in ResNet-50’s final classification head transform 2048 feature maps to 1000 class logits? What determines this bottleneck layer’s size?</p></li>
<li><p><strong>Language Models</strong>: GPT-3 uses Linear layers with 12,288 input features. How much memory do these layers consume, and why does this limit model deployment?</p></li>
<li><p><strong>Recommendation Systems</strong>: Netflix uses multi-layer networks with Dropout. How does <code class="docutils literal notranslate"><span class="pre">p=0.5</span></code> affect training time vs model accuracy on sparse user-item interactions?</p></li>
<li><p><strong>Edge Deployment</strong>: A mobile CNN has 5 Linear layers totaling 2MB. How do you decide which layers to quantize or prune when targeting 500KB model size?</p></li>
</ul>
</section>
<section id="mathematical-foundations">
<h3>Mathematical Foundations<a class="headerlink" href="#mathematical-foundations" title="Link to this heading">#</a></h3>
<ul class="simple">
<li><p><strong>Xavier Initialization</strong>: Why does <code class="docutils literal notranslate"><span class="pre">scale</span> <span class="pre">=</span> <span class="pre">sqrt(1/fan_in)</span></code> preserve gradient variance through layers? What happens in a 20-layer network without proper initialization?</p></li>
<li><p><strong>Matrix Multiplication Complexity</strong>: A Linear(1024, 1024) layer with batch size 128 performs how many FLOPs? How does this compare to a Dropout layer on the same tensor?</p></li>
<li><p><strong>Dropout Mathematics</strong>: During training with <code class="docutils literal notranslate"><span class="pre">p=0.5</span></code>, what’s the expected value of each element? Why must we scale by <code class="docutils literal notranslate"><span class="pre">1/(1-p)</span></code> to match inference behavior?</p></li>
<li><p><strong>Parameter Growth</strong>: If you double the hidden layer size from 256 to 512, how many times more parameters do you have in Linear(784, hidden) + Linear(hidden, 10)?</p></li>
</ul>
</section>
<section id="architecture-design-patterns">
<h3>Architecture Design Patterns<a class="headerlink" href="#architecture-design-patterns" title="Link to this heading">#</a></h3>
<ul class="simple">
<li><p><strong>Layer Width vs Depth</strong>: A 784→512→10 network vs 784→256→256→10 - which has more parameters? Which typically generalizes better and why?</p></li>
<li><p><strong>Dropout Placement</strong>: Should you place Dropout before or after activation functions? What’s the difference between <code class="docutils literal notranslate"><span class="pre">Linear</span> <span class="pre">→</span> <span class="pre">ReLU</span> <span class="pre">→</span> <span class="pre">Dropout</span></code> vs <code class="docutils literal notranslate"><span class="pre">Linear</span> <span class="pre">→</span> <span class="pre">Dropout</span> <span class="pre">→</span> <span class="pre">ReLU</span></code>?</p></li>
<li><p><strong>Bias Necessity</strong>: When can you safely use <code class="docutils literal notranslate"><span class="pre">bias=False</span></code>? How does batch normalization (Module 09) interact with bias terms?</p></li>
<li><p><strong>Composition Philosophy</strong>: We deliberately avoided a Sequential container. What trade-offs do explicit composition and container abstractions make for debugging vs convenience?</p></li>
</ul>
</section>
<section id="performance-characteristics">
<h3>Performance Characteristics<a class="headerlink" href="#performance-characteristics" title="Link to this heading">#</a></h3>
<ul class="simple">
<li><p><strong>Memory Hierarchy</strong>: A Linear(4096, 4096) layer has 16M parameters (64MB). Does this fit in L3 cache? How does cache performance affect training speed?</p></li>
<li><p><strong>Batch Size Scaling</strong>: Measuring throughput from batch_size=1 to 512, why does samples/sec increase but eventually plateau? What’s the bottleneck?</p></li>
<li><p><strong>Dropout Overhead</strong>: Profiling shows Dropout adds 2% overhead to training time. Where is this cost - mask generation, element-wise multiply, or memory bandwidth?</p></li>
<li><p><strong>Parameter Memory vs Activation Memory</strong>: In a 100-layer network, which dominates memory usage during training? How does gradient checkpointing address this?</p></li>
</ul>
</section>
</section>
<section id="ready-to-build">
<h2>Ready to Build?<a class="headerlink" href="#ready-to-build" title="Link to this heading">#</a></h2>
<p>You’re about to implement the abstractions that power every neural network in production. Linear layers might seem deceptively simple - just matrix multiplication and bias addition - but this simplicity is the foundation of extraordinary complexity. From ResNet’s 25 million parameters to GPT-3’s 175 billion, every learned transformation ultimately reduces to chains of <code class="docutils literal notranslate"><span class="pre">y</span> <span class="pre">=</span> <span class="pre">xW</span> <span class="pre">+</span> <span class="pre">b</span></code>.</p>
<p>Understanding layer composition is crucial for systems thinking. When you see “ResNet-50,” you’ll know exactly how parameter counts scale with depth. When debugging vanishing gradients, you’ll understand why Xavier initialization matters. When deploying to mobile devices, you’ll calculate memory footprints in your head.</p>
<p>Take your time with this module. Test each component thoroughly. Analyze the memory patterns. Build the intuition for how these simple building blocks compose into intelligence. This is where deep learning becomes real.</p>
<p>Choose your preferred way to engage with this module:</p>
<div class="sd-container-fluid sd-sphinx-override sd-mb-4 docutils">
<div class="sd-row sd-row-cols-1 sd-row-cols-xs-1 sd-row-cols-sm-2 sd-row-cols-md-3 sd-row-cols-lg-3 docutils">
<div class="sd-col sd-d-flex-row docutils">
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
🚀 Launch Binder</div>
<p class="sd-card-text">Run this module interactively in your browser. No installation required!</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="https://mybinder.org/v2/gh/mlsysbook/TinyTorch/main?filepath=modules/03_layers/layers_dev.ipynb"><span>https://mybinder.org/v2/gh/mlsysbook/TinyTorch/main?filepath=modules/03_layers/layers_dev.ipynb</span></a></div>
</div>
<div class="sd-col sd-d-flex-row docutils">
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
⚡ Open in Colab</div>
<p class="sd-card-text">Use Google Colab for GPU access and cloud compute power.</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="https://colab.research.google.com/github/mlsysbook/TinyTorch/blob/main/modules/03_layers/layers_dev.ipynb"><span>https://colab.research.google.com/github/mlsysbook/TinyTorch/blob/main/modules/03_layers/layers_dev.ipynb</span></a></div>
</div>
<div class="sd-col sd-d-flex-row docutils">
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
<div class="sd-card-body docutils">
<div class="sd-card-title sd-font-weight-bold docutils">
📖 View Source</div>
<p class="sd-card-text">Browse the Python source code and understand the implementation.</p>
</div>
<a class="sd-stretched-link sd-hide-link-text reference external" href="https://github.com/mlsysbook/TinyTorch/blob/main/modules/03_layers/layers_dev.py"><span>https://github.com/mlsysbook/TinyTorch/blob/main/modules/03_layers/layers_dev.py</span></a></div>
</div>
</div>
</div>
<div class="tip admonition">
<p class="admonition-title">💾 Save Your Progress</p>
<p><strong>Binder sessions are temporary!</strong> Download your completed notebook when done, or switch to local development for persistent work.</p>
</div>
<hr class="docutils" />
<div class="prev-next-area">
<a class="left-prev" href="../modules/02_activations_ABOUT.html" title="previous page">← Previous Module</a>
<a class="right-next" href="../modules/04_losses_ABOUT.html" title="next page">Next Module →</a>
</div>
</section>
</section>

    <script type="text/x-thebe-config">
    {
        requestKernel: true,
        binderOptions: {
            repo: "binder-examples/jupyter-stacks-datascience",
            ref: "master",
        },
        codeMirrorConfig: {
            theme: "abcdef",
            mode: "python"
        },
        kernelOptions: {
            name: "python3",
            path: "./modules"
        },
        predefinedOutput: true
    }
    </script>
    <script>kernelName = 'python3'</script>

                </article>


                <footer class="prev-next-footer d-print-none">

<div class="prev-next-area">
    <a class="left-prev"
       href="02_activations_ABOUT.html"
       title="previous page">
      <i class="fa-solid fa-angle-left"></i>
      <div class="prev-next-info">
        <p class="prev-next-subtitle">previous</p>
        <p class="prev-next-title">02. Activations</p>
      </div>
    </a>
    <a class="right-next"
       href="04_losses_ABOUT.html"
       title="next page">
      <div class="prev-next-info">
        <p class="prev-next-subtitle">next</p>
        <p class="prev-next-title">04. Loss Functions</p>
      </div>
      <i class="fa-solid fa-angle-right"></i>
    </a>
</div>
                </footer>

            </div>


                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">


  <div class="sidebar-secondary-item">
  <div class="page-toc tocsection onthispage">
    <i class="fa-solid fa-list"></i> Contents
  </div>
  <nav class="bd-toc-nav page-toc">
    <ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#learning-objectives">Learning Objectives</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#build-use-reflect">Build → Use → Reflect</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-guide">Implementation Guide</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#linear-layer-the-neural-network-workhorse">Linear Layer: The Neural Network Workhorse</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dropout-preventing-overfitting">Dropout: Preventing Overfitting</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#layer-composition-building-neural-networks">Layer Composition: Building Neural Networks</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#getting-started">Getting Started</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prerequisites">Prerequisites</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#development-workflow">Development Workflow</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#testing">Testing</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comprehensive-test-suite">Comprehensive Test Suite</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#test-coverage-areas">Test Coverage Areas</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#inline-testing-validation">Inline Testing &amp; Validation</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#manual-testing-examples">Manual Testing Examples</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#systems-thinking-questions">Systems Thinking Questions</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#real-world-applications">Real-World Applications</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#mathematical-foundations">Mathematical Foundations</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#architecture-design-patterns">Architecture Design Patterns</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#performance-characteristics">Performance Characteristics</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ready-to-build">Ready to Build?</a></li>
</ul>
  </nav></div>

</div></div>


          </div>
          <footer class="bd-footer-content">

<div class="bd-footer-content__inner container">

  <div class="footer-item">

<p class="component-author">
By Prof. Vijay Janapa Reddi (Harvard University)
</p>

  </div>

  <div class="footer-item">


  <p class="copyright">

      © Copyright 2025.
      <br/>

  </p>

  </div>

  <div class="footer-item">

  </div>

  <div class="footer-item">

  </div>

</div>
          </footer>


      </main>
    </div>
  </div>

  <!-- Scripts loaded after <body> so the DOM is not blocked -->
  <script src="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>

  <footer class="bd-footer">
  </footer>
  </body>
</html>