feat: implement selective exports for modules 12-13

- 12_attention: Export scaled_dot_product_attention, MultiHeadAttention only - 13_transformers: Export TransformerBlock, GPT only Continues professional selective export pattern across advanced modules. Clean public APIs for transformer architecture components.
2026-05-06 17:57:31 -05:00 · 2025-09-30 09:58:04 -04:00
parent 956efe76a7
commit 1041a79674
2 changed files with 3 additions and 0 deletions
--- a/modules/source/12_attention/attention_dev.py
+++ b/modules/source/12_attention/attention_dev.py
@@ -313,6 +313,7 @@ Step-by-Step Attention Computation:
 """

 # %% nbgrader={"grade": false, "grade_id": "attention-function", "locked": false, "solution": true}
+#| export
 def scaled_dot_product_attention(Q: Tensor, K: Tensor, V: Tensor, mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:
    """
    Compute scaled dot-product attention.
@@ -526,6 +527,7 @@ This parallelization allows the model to attend to different representation subs
 """

 # %% nbgrader={"grade": false, "grade_id": "multihead-attention", "locked": false, "solution": true}
+#| export
 class MultiHeadAttention:
    """
    Multi-head attention mechanism.
--- a/modules/source/13_transformers/transformers_dev.py
+++ b/modules/source/13_transformers/transformers_dev.py
@@ -853,6 +853,7 @@ Each layer adds information to this stream rather than replacing it, creating a
 """

 # %% nbgrader={"grade": false, "grade_id": "transformer-block", "solution": true}
+#| export
 class TransformerBlock:
    """
    Complete Transformer Block with self-attention, MLP, and residual connections.