mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-06 17:57:31 -05:00
feat: implement selective exports for modules 12-13
- 12_attention: Export scaled_dot_product_attention, MultiHeadAttention only - 13_transformers: Export TransformerBlock, GPT only Continues professional selective export pattern across advanced modules. Clean public APIs for transformer architecture components.
This commit is contained in:
@@ -313,6 +313,7 @@ Step-by-Step Attention Computation:
|
||||
"""
|
||||
|
||||
# %% nbgrader={"grade": false, "grade_id": "attention-function", "locked": false, "solution": true}
|
||||
#| export
|
||||
def scaled_dot_product_attention(Q: Tensor, K: Tensor, V: Tensor, mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:
|
||||
"""
|
||||
Compute scaled dot-product attention.
|
||||
@@ -526,6 +527,7 @@ This parallelization allows the model to attend to different representation subs
|
||||
"""
|
||||
|
||||
# %% nbgrader={"grade": false, "grade_id": "multihead-attention", "locked": false, "solution": true}
|
||||
#| export
|
||||
class MultiHeadAttention:
|
||||
"""
|
||||
Multi-head attention mechanism.
|
||||
|
||||
@@ -853,6 +853,7 @@ Each layer adds information to this stream rather than replacing it, creating a
|
||||
"""
|
||||
|
||||
# %% nbgrader={"grade": false, "grade_id": "transformer-block", "solution": true}
|
||||
#| export
|
||||
class TransformerBlock:
|
||||
"""
|
||||
Complete Transformer Block with self-attention, MLP, and residual connections.
|
||||
|
||||
Reference in New Issue
Block a user