diff --git a/modules/source/12_attention/attention_dev.py b/modules/source/12_attention/attention_dev.py
index 7800c15d..79618f6c 100644
--- a/modules/source/12_attention/attention_dev.py
+++ b/modules/source/12_attention/attention_dev.py
@@ -313,6 +313,7 @@ Step-by-Step Attention Computation:
 """
 
 # %% nbgrader={"grade": false, "grade_id": "attention-function", "locked": false, "solution": true}
+#| export
 def scaled_dot_product_attention(Q: Tensor, K: Tensor, V: Tensor, mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:
     """
     Compute scaled dot-product attention.
@@ -526,6 +527,7 @@ This parallelization allows the model to attend to different representation subs
 """
 
 # %% nbgrader={"grade": false, "grade_id": "multihead-attention", "locked": false, "solution": true}
+#| export
 class MultiHeadAttention:
     """
     Multi-head attention mechanism.
diff --git a/modules/source/13_transformers/transformers_dev.py b/modules/source/13_transformers/transformers_dev.py
index b068915a..ee34304f 100644
--- a/modules/source/13_transformers/transformers_dev.py
+++ b/modules/source/13_transformers/transformers_dev.py
@@ -853,6 +853,7 @@ Each layer adds information to this stream rather than replacing it, creating a
 """
 
 # %% nbgrader={"grade": false, "grade_id": "transformer-block", "solution": true}
+#| export
 class TransformerBlock:
     """
     Complete Transformer Block with self-attention, MLP, and residual connections.