% Horizontal Module Dependency Diagram for TinyTorch Paper
% Use with: \input{module_flow_horizontal.tex}
% Requires: \usepackage{tikz}, \usetikzlibrary{shapes,arrows,positioning,shadows,calc,backgrounds}

\begin{figure*}[t]
    \centering
    \resizebox{\textwidth}{!}{%
    \begin{tikzpicture}[
        node distance=0.4cm and 0.6cm,
        every node/.style={font=\scriptsize\sffamily},
        % Tier styles
        foundation/.style={
            rectangle, 
            draw=blue!60!black, 
            top color=blue!5, 
            bottom color=blue!15, 
            text=blue!40!black,
            minimum width=1.4cm, 
            minimum height=0.6cm, 
            rounded corners=3pt,
            drop shadow={opacity=0.2, shadow xshift=1pt, shadow yshift=-1pt}
        },
        architecture/.style={
            rectangle, 
            draw=purple!60!black, 
            top color=purple!5, 
            bottom color=purple!15, 
            text=purple!40!black,
            minimum width=1.4cm, 
            minimum height=0.6cm, 
            rounded corners=3pt,
            drop shadow={opacity=0.2, shadow xshift=1pt, shadow yshift=-1pt}
        },
        optimization/.style={
            rectangle, 
            draw=orange!60!black, 
            top color=orange!5, 
            bottom color=orange!15, 
            text=orange!40!black,
            minimum width=1.4cm, 
            minimum height=0.6cm, 
            rounded corners=3pt,
            drop shadow={opacity=0.2, shadow xshift=1pt, shadow yshift=-1pt}
        },
        capstone/.style={
            rectangle, 
            draw=red!60!black, 
            top color=red!5, 
            bottom color=red!15, 
            text=red!40!black,
            minimum width=1.4cm, 
            minimum height=0.6cm, 
            rounded corners=3pt,
            drop shadow={opacity=0.2, shadow xshift=1pt, shadow yshift=-1pt}
        },
        % Arrow style
        arr/.style={->, >=stealth, thick, gray!60},
        tier/.style={draw=gray!40, dashed, rounded corners=5pt, inner sep=8pt}
    ]
    
    % === FOUNDATION TIER (01-08) ===
    % Row 1
    \node[foundation] (T) {01 Tensor};
    \node[foundation, right=of T] (A) {02 Activ.};
    \node[foundation, right=of A] (L) {03 Layers};
    \node[foundation, right=of L] (Loss) {04 Losses};
    
    % Row 2 - positioned below Row 1
    \node[foundation, below=0.6cm of T] (Data) {05 DataLoad.};
    \node[foundation, right=of Data] (Auto) {06 Autograd};
    \node[foundation, right=of Auto] (Opt) {07 Optim.};
    \node[foundation, right=of Opt] (Train) {08 Training};
    
    % Foundation arrows
    \draw[arr] (T) -- (A);
    \draw[arr] (A) -- (L);
    \draw[arr] (L) -- (Loss);
    % Wrap from end of Row 1 to start of Row 2
    % Route between the two rows to avoid cutting through nodes
    \draw[arr, rounded corners=5pt] (Loss.south) -- ++(0,-0.3) -| (Data.north);
    \draw[arr] (Data) -- (Auto);
    \draw[arr] (Auto) -- (Opt);
    \draw[arr] (Opt) -- (Train);
    
        % === ARCHITECTURE TIER (09-13) ===
    
        % Vision path (top branch) - Aligned with Row 1 (Loss)
        \node[architecture, right=of Loss, xshift=1.5cm] (Spatial) {09 CNNs};
        
        % Language path (bottom branch) - Aligned with Row 2 (Train)
        \node[architecture, right=of Train, xshift=1.5cm] (Tok) {10 Token.};
        \node[architecture, right=of Tok] (Emb) {11 Embed.};
        \node[architecture, right=of Emb] (Att) {12 Attention};
        \node[architecture, right=of Att] (Trans) {13 Transform.};
    
        % Arrows from Train
        \draw[arr, rounded corners=5pt] (Train.east) -- ++(0.5,0) |- (Spatial.west);
        \draw[arr] (Train) -- (Tok);
    
        % Internal Architecture arrows
        \draw[arr] (Tok) -- (Emb);
        \draw[arr] (Emb) -- (Att);
        \draw[arr] (Att) -- (Trans);
    
        % === OPTIMIZATION TIER (14-19) ===
        
        % Profiling - Centered vertically between Row 1 and Row 2
        % Positioned to the right of the longest Architecture branch (Trans)
        % We calculate the y-midpoint between Spatial (Row 1) and Trans (Row 2)
        % Pre-calculate coordinates to avoid nested syntax errors
    \path ($(Trans.east) + (1.5, 0)$) coordinate (ProfX);
    \path ($(Spatial)!0.5!(Trans)$) coordinate (ProfY);
    \node[optimization] (Prof) at (ProfX |- ProfY) {14 Profiling};
    
        % Converge to Profiling
        \draw[arr, rounded corners=5pt] (Spatial.east) -| (Prof.north);
        \draw[arr, rounded corners=5pt] (Trans.east) -| (Prof.south);
    
        % Parallel optimization branches
        % Quant/Compress on Row 1 (aligned with Spatial)
        \node[optimization] (Quant) at (Prof |- Spatial) [xshift=2.5cm] {15 Quant.};
        \node[optimization, right=of Quant] (Comp) {16 Compress.};
        
        % Accel/Memo on Row 2 (aligned with Trans)
        \node[optimization] (Accel) at (Prof |- Trans) [xshift=2.5cm] {17 Accel.};
        \node[optimization, right=of Accel] (Memo) {18 Memo.};
    
        % Branch from Profiling
        \draw[arr, rounded corners=5pt] (Prof.north) |- (Quant.west);
        \draw[arr, rounded corners=5pt] (Prof.south) |- (Accel.west);
    
        % Internal optimization flows
        \draw[arr] (Quant) -- (Comp);
        \draw[arr] (Accel) -- (Memo);
    
        % Benchmarking - Convergence point
        % Centered vertically between Comp and Memo. Anchored horizontally to Comp (box ends further right)
        % Pre-calculate coordinates
    \path ($(Comp.east) + (1.5, 0)$) coordinate (BenchX);
    \path ($(Comp)!0.5!(Memo)$) coordinate (BenchY);
    \node[optimization] (Bench) at (BenchX |- BenchY) {19 Benchmark};
    
        % Route to Benchmark
        \draw[arr, rounded corners=5pt] (Comp.east) -| (Bench.north);
        \draw[arr, rounded corners=5pt] (Memo.east) -| (Bench.south);
    
        % === CAPSTONE (20) ===
    \node[capstone, right=of Bench] (Cap) {20 Capstone};
    \draw[arr] (Bench) -- (Cap);

        % === TIER LABELS ===
    
    % Foundation Label - Centered over Foundation section (Row 1)
    \node[font=\scriptsize\bfseries\sffamily, blue!60!black] at ($(T.north)!0.5!(Loss.north) + (0, 0.5)$) {FOUNDATION (01-08)};

    % Architecture Label - Centered horizontally over the Architecture block
    % Y-aligned above the top row (Spatial)
    \path ($(Spatial.west)!0.5!(Trans.east)$) coordinate (ArchCenterX);
    \node[font=\scriptsize\bfseries\sffamily, purple!60!black] at (ArchCenterX |- Spatial.north) [yshift=0.5cm] {ARCHITECTURE (09-13)};

    % Optimization Label - Centered over the top Optimization row (Quant-Compress)
    \path ($(Quant.west)!0.5!(Comp.east)$) coordinate (OptCenterX);
    \node[font=\scriptsize\bfseries\sffamily, orange!60!black] at (OptCenterX |- Quant.north) [yshift=0.5cm] {OPTIMIZATION (14-19)};
    
        % === PATH LABELS ===
        % Removed Vision/Language labels as requested
        \node[above=0.1cm of Quant, font=\scriptsize\sffamily, gray] {Size};
        \node[below=0.1cm of Memo, font=\scriptsize\sffamily, gray] {Speed};
    
    \end{tikzpicture}%
    }
    \caption{\textbf{Module Dependency Graph.} TinyTorch's 20 modules form a directed acyclic graph with two architectural paths. Foundation modules (blue, M01--08) build core infrastructure sequentially, culminating in Training (M08). From Training, two paths branch: the \emph{Vision} path (M09) builds CNNs for spatial processing; the \emph{Language} path (M10--13) builds tokenization through transformers. Both paths converge at Profiling (M14), then branch into parallel optimization tracks---\emph{Model-level} (quantization, compression) and \emph{Runtime} (acceleration, memoization)---before final convergence at Benchmarking (M19) and Capstone (M20).}
    \label{fig:module-flow}
    \end{figure*}