mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-11 22:25:29 -05:00
fix: Add missing typing imports to Module 10 tokenization
Issue: CharTokenizer was failing with NameError: name 'List' is not defined Root cause: typing imports were not marked with #| export Fix: ✅ Added #| export directive to import block in tokenization_dev.py ✅ Re-exported module using 'tito export 10_tokenization' ✅ typing.List, Dict, Tuple, Optional, Set now properly exported Verification: - CharTokenizer.build_vocab() works ✅ - encode() and decode() work ✅ - Tested on Shakespeare sample text ✅ This fixes the integration with vaswani_shakespeare.py which now properly uses CharTokenizer from Module 10 instead of manual tokenization.
This commit is contained in:
10
tinytorch/text/tokenization.py
generated
10
tinytorch/text/tokenization.py
generated
@@ -21,6 +21,16 @@ __all__ = ['Tokenizer', 'CharTokenizer', 'BPETokenizer']
|
||||
#| default_exp text.tokenization
|
||||
#| export
|
||||
|
||||
# %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 3
|
||||
import numpy as np
|
||||
from typing import List, Dict, Tuple, Optional, Set
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict, Counter
|
||||
|
||||
# Import only Module 01 (Tensor) - this module has minimal dependencies
|
||||
from ..core.tensor import Tensor
|
||||
|
||||
# %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 8
|
||||
class Tokenizer:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user