mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-06-03 08:02:13 -05:00
fix(package): Add PyTorch-style __call__ methods to exported modules
Resolved transformer training issues by adding __call__ methods to: - Embedding, PositionalEncoding, EmbeddingLayer (text.embeddings) - LayerNorm, MLP, TransformerBlock, GPT (models.transformer) - MultiHeadAttention (core.attention) This enables PyTorch-style syntax: model(x) instead of model.forward(x) All transformer diagnostic tests now pass (5/5 ✓) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
4
tinytorch/core/attention.py
generated
4
tinytorch/core/attention.py
generated
@@ -280,6 +280,10 @@ class MultiHeadAttention:
|
||||
return output
|
||||
### END SOLUTION
|
||||
|
||||
def __call__(self, x: Tensor, mask: Optional[Tensor] = None) -> Tensor:
|
||||
"""Allows the attention layer to be called like a function."""
|
||||
return self.forward(x, mask)
|
||||
|
||||
def parameters(self) -> List[Tensor]:
|
||||
"""
|
||||
Return all trainable parameters.
|
||||
|
||||
16
tinytorch/models/transformer.py
generated
16
tinytorch/models/transformer.py
generated
@@ -102,6 +102,10 @@ class LayerNorm:
|
||||
return output
|
||||
### END SOLUTION
|
||||
|
||||
def __call__(self, x):
|
||||
"""Allows the layer norm to be called like a function."""
|
||||
return self.forward(x)
|
||||
|
||||
def parameters(self):
|
||||
"""Return learnable parameters."""
|
||||
return [self.gamma, self.beta]
|
||||
@@ -176,6 +180,10 @@ class MLP:
|
||||
return output
|
||||
### END SOLUTION
|
||||
|
||||
def __call__(self, x):
|
||||
"""Allows the MLP to be called like a function."""
|
||||
return self.forward(x)
|
||||
|
||||
def parameters(self):
|
||||
"""Return all learnable parameters."""
|
||||
params = []
|
||||
@@ -273,6 +281,10 @@ class TransformerBlock:
|
||||
return output
|
||||
### END SOLUTION
|
||||
|
||||
def __call__(self, x, mask=None):
|
||||
"""Allows the transformer block to be called like a function."""
|
||||
return self.forward(x, mask)
|
||||
|
||||
def parameters(self):
|
||||
"""Return all learnable parameters."""
|
||||
params = []
|
||||
@@ -452,6 +464,10 @@ class GPT:
|
||||
return current_tokens
|
||||
### END SOLUTION
|
||||
|
||||
def __call__(self, tokens):
|
||||
"""Allows the GPT model to be called like a function."""
|
||||
return self.forward(tokens)
|
||||
|
||||
def parameters(self):
|
||||
"""Return all learnable parameters."""
|
||||
params = []
|
||||
|
||||
14
tinytorch/text/embeddings.py
generated
14
tinytorch/text/embeddings.py
generated
@@ -102,9 +102,13 @@ class Embedding:
|
||||
if self.weight.requires_grad:
|
||||
from tinytorch.core.autograd import EmbeddingBackward
|
||||
result._grad_fn = EmbeddingBackward(self.weight, indices)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
def __call__(self, indices: Tensor) -> Tensor:
|
||||
"""Allows the embedding to be called like a function."""
|
||||
return self.forward(indices)
|
||||
|
||||
def parameters(self) -> List[Tensor]:
|
||||
"""Return trainable parameters."""
|
||||
return [self.weight]
|
||||
@@ -202,6 +206,10 @@ class PositionalEncoding:
|
||||
|
||||
return result
|
||||
|
||||
def __call__(self, x: Tensor) -> Tensor:
|
||||
"""Allows the positional encoding to be called like a function."""
|
||||
return self.forward(x)
|
||||
|
||||
def parameters(self) -> List[Tensor]:
|
||||
"""Return trainable parameters."""
|
||||
return [self.position_embeddings]
|
||||
@@ -328,6 +336,10 @@ class EmbeddingLayer:
|
||||
|
||||
return output
|
||||
|
||||
def __call__(self, tokens: Tensor) -> Tensor:
|
||||
"""Allows the embedding layer to be called like a function."""
|
||||
return self.forward(tokens)
|
||||
|
||||
def parameters(self) -> List[Tensor]:
|
||||
"""Return all trainable parameters."""
|
||||
params = self.token_embedding.parameters()
|
||||
|
||||
Reference in New Issue
Block a user