fix(milestone): reset model before mixed task challenge

The transformer was being trained sequentially on reversal then copying,
which caused it to "forget" reversal before the mixed task. Now we
reinitialize the model before challenge 3 so it learns both tasks
together with proper prefix conditioning.
This commit is contained in:
Vijay Janapa Reddi
2026-01-28 18:36:16 -05:00
parent 8f9481b508
commit bcbcbf7f3d

View File

@@ -544,6 +544,20 @@ def main():
results['copying'] = (passed2, acc2)
# Challenge 3: Mixed Tasks (the real test)
# Reset model and optimizer so it learns both tasks from scratch
# with prefix conditioning (otherwise it's stuck in "copy mode")
console.print("\n[dim]Reinitializing model for mixed task learning...[/dim]")
model = AttentionTransformer(
vocab_size=vocab_size,
embed_dim=embed_dim,
num_heads=num_heads,
seq_len=seq_len + 1,
num_layers=num_layers
)
for param in model.parameters():
param.requires_grad = True
optimizer = Adam(model.parameters(), lr=lr)
train_mixed = generate_mixed_data(800, seq_len)
test_mixed = generate_mixed_data(300, seq_len)
passed3, acc3 = run_challenge(