diff --git a/modules/source/10_tokenization/tokenization_dev.py b/modules/source/10_tokenization/tokenization_dev.py index 9347ebd1..329ff38e 100644 --- a/modules/source/10_tokenization/tokenization_dev.py +++ b/modules/source/10_tokenization/tokenization_dev.py @@ -195,6 +195,7 @@ This ensures consistent behavior across different tokenization strategies. """ # %% nbgrader={"grade": false, "grade_id": "base-tokenizer", "solution": true} +#| export class Tokenizer: """ Base tokenizer class providing the interface for all tokenizers. @@ -303,6 +304,7 @@ Result: "hello" """ # %% nbgrader={"grade": false, "grade_id": "char-tokenizer", "solution": true} +#| export class CharTokenizer(Tokenizer): """ Character-level tokenizer that treats each character as a separate token. @@ -513,6 +515,7 @@ BPE discovers natural word boundaries and common patterns automatically! """ # %% nbgrader={"grade": false, "grade_id": "bpe-tokenizer", "solution": true} +#| export class BPETokenizer(Tokenizer): """ Byte Pair Encoding (BPE) tokenizer that learns subword units. diff --git a/modules/source/11_embeddings/embeddings_dev.py b/modules/source/11_embeddings/embeddings_dev.py index ebabe327..e2740848 100644 --- a/modules/source/11_embeddings/embeddings_dev.py +++ b/modules/source/11_embeddings/embeddings_dev.py @@ -201,6 +201,7 @@ Now let's build the core embedding layer that performs efficient token-to-vector """ # %% nbgrader={"grade": false, "grade_id": "embedding-class", "solution": true} +#| export class Embedding: """ Learnable embedding layer that maps token indices to dense vectors. @@ -402,6 +403,7 @@ Let's build trainable positional embeddings that can learn position-specific pat """ # %% nbgrader={"grade": false, "grade_id": "positional-encoding", "solution": true} +#| export class PositionalEncoding: """ Learnable positional encoding layer.