From 1041a796749450ca288beb828fc21382062cb4f8 Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Tue, 30 Sep 2025 09:58:04 -0400 Subject: [PATCH] feat: implement selective exports for modules 12-13 - 12_attention: Export scaled_dot_product_attention, MultiHeadAttention only - 13_transformers: Export TransformerBlock, GPT only Continues professional selective export pattern across advanced modules. Clean public APIs for transformer architecture components. --- modules/source/12_attention/attention_dev.py | 2 ++ modules/source/13_transformers/transformers_dev.py | 1 + 2 files changed, 3 insertions(+) diff --git a/modules/source/12_attention/attention_dev.py b/modules/source/12_attention/attention_dev.py index 7800c15d..79618f6c 100644 --- a/modules/source/12_attention/attention_dev.py +++ b/modules/source/12_attention/attention_dev.py @@ -313,6 +313,7 @@ Step-by-Step Attention Computation: """ # %% nbgrader={"grade": false, "grade_id": "attention-function", "locked": false, "solution": true} +#| export def scaled_dot_product_attention(Q: Tensor, K: Tensor, V: Tensor, mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]: """ Compute scaled dot-product attention. @@ -526,6 +527,7 @@ This parallelization allows the model to attend to different representation subs """ # %% nbgrader={"grade": false, "grade_id": "multihead-attention", "locked": false, "solution": true} +#| export class MultiHeadAttention: """ Multi-head attention mechanism. diff --git a/modules/source/13_transformers/transformers_dev.py b/modules/source/13_transformers/transformers_dev.py index b068915a..ee34304f 100644 --- a/modules/source/13_transformers/transformers_dev.py +++ b/modules/source/13_transformers/transformers_dev.py @@ -853,6 +853,7 @@ Each layer adds information to this stream rather than replacing it, creating a """ # %% nbgrader={"grade": false, "grade_id": "transformer-block", "solution": true} +#| export class TransformerBlock: """ Complete Transformer Block with self-attention, MLP, and residual connections.