diff --git a/tinytorch/core/attention.py b/tinytorch/core/attention.py index a9ce34e8..dea2bf93 100644 --- a/tinytorch/core/attention.py +++ b/tinytorch/core/attention.py @@ -280,6 +280,10 @@ class MultiHeadAttention: return output ### END SOLUTION + def __call__(self, x: Tensor, mask: Optional[Tensor] = None) -> Tensor: + """Allows the attention layer to be called like a function.""" + return self.forward(x, mask) + def parameters(self) -> List[Tensor]: """ Return all trainable parameters. diff --git a/tinytorch/models/transformer.py b/tinytorch/models/transformer.py index 96c3e1a6..728d78cb 100644 --- a/tinytorch/models/transformer.py +++ b/tinytorch/models/transformer.py @@ -102,6 +102,10 @@ class LayerNorm: return output ### END SOLUTION + def __call__(self, x): + """Allows the layer norm to be called like a function.""" + return self.forward(x) + def parameters(self): """Return learnable parameters.""" return [self.gamma, self.beta] @@ -176,6 +180,10 @@ class MLP: return output ### END SOLUTION + def __call__(self, x): + """Allows the MLP to be called like a function.""" + return self.forward(x) + def parameters(self): """Return all learnable parameters.""" params = [] @@ -273,6 +281,10 @@ class TransformerBlock: return output ### END SOLUTION + def __call__(self, x, mask=None): + """Allows the transformer block to be called like a function.""" + return self.forward(x, mask) + def parameters(self): """Return all learnable parameters.""" params = [] @@ -452,6 +464,10 @@ class GPT: return current_tokens ### END SOLUTION + def __call__(self, tokens): + """Allows the GPT model to be called like a function.""" + return self.forward(tokens) + def parameters(self): """Return all learnable parameters.""" params = [] diff --git a/tinytorch/text/embeddings.py b/tinytorch/text/embeddings.py index 3692f798..dacb0f27 100644 --- a/tinytorch/text/embeddings.py +++ b/tinytorch/text/embeddings.py @@ -102,9 +102,13 @@ class Embedding: if self.weight.requires_grad: from tinytorch.core.autograd import EmbeddingBackward result._grad_fn = EmbeddingBackward(self.weight, indices) - + return result + def __call__(self, indices: Tensor) -> Tensor: + """Allows the embedding to be called like a function.""" + return self.forward(indices) + def parameters(self) -> List[Tensor]: """Return trainable parameters.""" return [self.weight] @@ -202,6 +206,10 @@ class PositionalEncoding: return result + def __call__(self, x: Tensor) -> Tensor: + """Allows the positional encoding to be called like a function.""" + return self.forward(x) + def parameters(self) -> List[Tensor]: """Return trainable parameters.""" return [self.position_embeddings] @@ -328,6 +336,10 @@ class EmbeddingLayer: return output + def __call__(self, tokens: Tensor) -> Tensor: + """Allows the embedding layer to be called like a function.""" + return self.forward(tokens) + def parameters(self) -> List[Tensor]: """Return all trainable parameters.""" params = self.token_embedding.parameters()