""" Integration Tests - DataLoader and Tensor Tests real integration between DataLoader and Tensor modules. Uses actual TinyTorch components to verify they work together correctly. """ import pytest import numpy as np from test_utils import setup_integration_test # Ensure proper setup before importing setup_integration_test() # Import ONLY from TinyTorch package from tinytorch.core.tensor import Tensor from tinytorch.core.dataloader import DataLoader, Dataset, SimpleDataset from tinytorch.core.activations import ReLU from tinytorch.core.layers import Dense class TestDataLoaderTensorIntegration: """Test integration between DataLoader and Tensor components.""" def test_simple_dataset_produces_tensors(self): """Test SimpleDataset produces real Tensor objects.""" # Create SimpleDataset dataset = SimpleDataset(size=10, num_features=3, num_classes=2) # Get a sample data, label = dataset[0] # Verify outputs are tensors assert isinstance(data, Tensor), "Data should be a Tensor" assert isinstance(label, Tensor), "Label should be a Tensor" # Verify tensor properties assert data.shape == (3,), f"Expected data shape (3,), got {data.shape}" assert label.shape == (), f"Expected label shape (), got {label.shape}" assert data.dtype == np.float32, f"Expected float32, got {data.dtype}" assert label.dtype == np.int32, f"Expected int32, got {label.dtype}" def test_dataloader_produces_tensor_batches(self): """Test DataLoader produces batches of real Tensor objects.""" # Create dataset and dataloader dataset = SimpleDataset(size=20, num_features=4, num_classes=3) dataloader = DataLoader(dataset, batch_size=5, shuffle=False) # Get first batch batch_data, batch_labels = next(iter(dataloader)) # Verify batch outputs are tensors assert isinstance(batch_data, Tensor), "Batch data should be a Tensor" assert isinstance(batch_labels, Tensor), "Batch labels should be a Tensor" # Verify batch shapes assert batch_data.shape == (5, 4), f"Expected batch data shape (5, 4), got {batch_data.shape}" assert batch_labels.shape == (5,), f"Expected batch labels shape (5,), got {batch_labels.shape}" # Verify data types assert batch_data.dtype == np.float32, f"Expected float32, got {batch_data.dtype}" assert batch_labels.dtype == np.int32, f"Expected int32, got {batch_labels.dtype}" def test_dataloader_tensor_compatibility_with_activations(self): """Test DataLoader tensors work with activation functions.""" # Create dataset and dataloader dataset = SimpleDataset(size=10, num_features=3, num_classes=2) dataloader = DataLoader(dataset, batch_size=4, shuffle=False) # Get batch batch_data, batch_labels = next(iter(dataloader)) # Apply activation function relu = ReLU() activated_data = relu(batch_data) # Verify result is tensor assert isinstance(activated_data, Tensor), "Activated data should be a Tensor" assert activated_data.shape == batch_data.shape, "Shape should be preserved" # Verify ReLU applied correctly (non-negative values) assert np.all(activated_data.data >= 0), "ReLU should produce non-negative values" def test_dataloader_tensor_compatibility_with_layers(self): """Test DataLoader tensors work with neural network layers.""" # Create dataset and dataloader dataset = SimpleDataset(size=10, num_features=3, num_classes=2) dataloader = DataLoader(dataset, batch_size=4, shuffle=False) # Get batch batch_data, batch_labels = next(iter(dataloader)) # Apply dense layer dense = Dense(input_size=3, output_size=2) output = dense(batch_data) # Verify result is tensor assert isinstance(output, Tensor), "Layer output should be a Tensor" assert output.shape == (4, 2), f"Expected output shape (4, 2), got {output.shape}" assert output.dtype == np.float32, f"Expected float32, got {output.dtype}" def test_dataloader_full_pipeline_integration(self): """Test DataLoader tensors in complete ML pipeline.""" # Create dataset and dataloader dataset = SimpleDataset(size=12, num_features=4, num_classes=3) dataloader = DataLoader(dataset, batch_size=6, shuffle=False) # Get batch batch_data, batch_labels = next(iter(dataloader)) # Apply full pipeline: Dense → ReLU → Dense dense1 = Dense(input_size=4, output_size=8) relu = ReLU() dense2 = Dense(input_size=8, output_size=3) # Forward pass hidden = dense1(batch_data) activated = relu(hidden) output = dense2(activated) # Verify all outputs are tensors assert isinstance(hidden, Tensor), "Hidden layer should be Tensor" assert isinstance(activated, Tensor), "Activated layer should be Tensor" assert isinstance(output, Tensor), "Output layer should be Tensor" # Verify shapes through pipeline assert hidden.shape == (6, 8), f"Hidden shape should be (6, 8), got {hidden.shape}" assert activated.shape == (6, 8), f"Activated shape should be (6, 8), got {activated.shape}" assert output.shape == (6, 3), f"Output shape should be (6, 3), got {output.shape}" class TestDataLoaderTensorBatching: """Test DataLoader batching with tensor integration.""" def test_different_batch_sizes(self): """Test DataLoader with different batch sizes produces correct tensors.""" dataset = SimpleDataset(size=20, num_features=3, num_classes=2) batch_sizes = [1, 4, 8, 10] for batch_size in batch_sizes: dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False) batch_data, batch_labels = next(iter(dataloader)) # Verify tensor shapes assert batch_data.shape == (batch_size, 3), f"Data shape should be ({batch_size}, 3), got {batch_data.shape}" assert batch_labels.shape == (batch_size,), f"Label shape should be ({batch_size},), got {batch_labels.shape}" # Verify tensor types assert isinstance(batch_data, Tensor), "Batch data should be Tensor" assert isinstance(batch_labels, Tensor), "Batch labels should be Tensor" def test_shuffling_preserves_tensor_integrity(self): """Test that shuffling preserves tensor data integrity.""" dataset = SimpleDataset(size=10, num_features=2, num_classes=2) # Create two dataloaders with different shuffle settings dataloader_no_shuffle = DataLoader(dataset, batch_size=5, shuffle=False) dataloader_shuffle = DataLoader(dataset, batch_size=5, shuffle=True) # Get batches batch_no_shuffle = next(iter(dataloader_no_shuffle)) batch_shuffle = next(iter(dataloader_shuffle)) # Both should produce valid tensors for batch_data, batch_labels in [batch_no_shuffle, batch_shuffle]: assert isinstance(batch_data, Tensor), "Data should be Tensor" assert isinstance(batch_labels, Tensor), "Labels should be Tensor" assert batch_data.shape == (5, 2), f"Expected shape (5, 2), got {batch_data.shape}" assert batch_labels.shape == (5,), f"Expected shape (5,), got {batch_labels.shape}" def test_iteration_produces_consistent_tensors(self): """Test that iterating through DataLoader produces consistent tensors.""" dataset = SimpleDataset(size=12, num_features=3, num_classes=2) dataloader = DataLoader(dataset, batch_size=4, shuffle=False) batch_count = 0 for batch_data, batch_labels in dataloader: batch_count += 1 # Verify each batch produces valid tensors assert isinstance(batch_data, Tensor), f"Batch {batch_count} data should be Tensor" assert isinstance(batch_labels, Tensor), f"Batch {batch_count} labels should be Tensor" # Verify shapes (last batch might be smaller) assert batch_data.shape[1] == 3, f"Feature dim should be 3, got {batch_data.shape[1]}" assert batch_data.shape[0] == batch_labels.shape[0], "Batch and label sizes should match" # Verify data types assert batch_data.dtype == np.float32, "Data should be float32" assert batch_labels.dtype == np.int32, "Labels should be int32" # Should have processed all data assert batch_count == 3, f"Expected 3 batches, got {batch_count}" class TestDataLoaderTensorDataTypes: """Test DataLoader tensor data type handling.""" def test_float32_tensor_production(self): """Test DataLoader produces float32 tensors for data.""" dataset = SimpleDataset(size=8, num_features=2, num_classes=2) dataloader = DataLoader(dataset, batch_size=4, shuffle=False) batch_data, batch_labels = next(iter(dataloader)) # Verify data types assert batch_data.dtype == np.float32, f"Expected float32, got {batch_data.dtype}" assert isinstance(batch_data.data, np.ndarray), "Underlying data should be numpy array" assert batch_data.data.dtype == np.float32, "Underlying array should be float32" def test_int32_tensor_production(self): """Test DataLoader produces int32 tensors for labels.""" dataset = SimpleDataset(size=8, num_features=2, num_classes=3) dataloader = DataLoader(dataset, batch_size=4, shuffle=False) batch_data, batch_labels = next(iter(dataloader)) # Verify label types assert batch_labels.dtype == np.int32, f"Expected int32, got {batch_labels.dtype}" assert isinstance(batch_labels.data, np.ndarray), "Underlying labels should be numpy array" assert batch_labels.data.dtype == np.int32, "Underlying array should be int32" def test_tensor_data_ranges(self): """Test DataLoader produces tensors with reasonable data ranges.""" dataset = SimpleDataset(size=10, num_features=3, num_classes=2) dataloader = DataLoader(dataset, batch_size=5, shuffle=False) batch_data, batch_labels = next(iter(dataloader)) # Check data ranges assert np.all(np.isfinite(batch_data.data)), "Data should be finite" assert np.all(batch_labels.data >= 0), "Labels should be non-negative" assert np.all(batch_labels.data < 2), "Labels should be less than num_classes" class TestDataLoaderTensorRealisticScenarios: """Test DataLoader with realistic tensor scenarios.""" def test_training_loop_simulation(self): """Test DataLoader tensors in training loop simulation.""" dataset = SimpleDataset(size=16, num_features=4, num_classes=2) dataloader = DataLoader(dataset, batch_size=8, shuffle=True) # Simulate training loop epoch_batches = 0 for epoch in range(2): batch_count = 0 for batch_data, batch_labels in dataloader: batch_count += 1 # Simulate forward pass dense = Dense(input_size=4, output_size=2) output = dense(batch_data) # Verify tensor operations work assert isinstance(output, Tensor), "Forward pass should produce Tensor" assert output.shape == (8, 2), f"Expected shape (8, 2), got {output.shape}" # Simulate loss computation (simplified) loss = output.data.mean() # Simple loss assert np.isfinite(loss), "Loss should be finite" epoch_batches += 1 assert batch_count == 2, f"Expected 2 batches per epoch, got {batch_count}" assert epoch_batches == 4, f"Expected 4 total batches, got {epoch_batches}" def test_different_dataset_sizes(self): """Test DataLoader with different dataset sizes.""" test_cases = [ (5, 2), # Small dataset (32, 8), # Medium dataset (100, 16), # Large dataset ] for dataset_size, batch_size in test_cases: dataset = SimpleDataset(size=dataset_size, num_features=3, num_classes=2) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False) total_samples = 0 for batch_data, batch_labels in dataloader: # Verify tensor properties assert isinstance(batch_data, Tensor), "Data should be Tensor" assert isinstance(batch_labels, Tensor), "Labels should be Tensor" # Count samples total_samples += batch_data.shape[0] # Verify shapes assert batch_data.shape[1] == 3, "Feature dim should be 3" assert batch_data.shape[0] == batch_labels.shape[0], "Batch sizes should match" assert total_samples == dataset_size, f"Should process all {dataset_size} samples, got {total_samples}" def test_dataloader_with_complex_pipeline(self): """Test DataLoader integration with complex neural network pipeline.""" dataset = SimpleDataset(size=20, num_features=5, num_classes=3) dataloader = DataLoader(dataset, batch_size=10, shuffle=False) # Create complex pipeline dense1 = Dense(input_size=5, output_size=16) relu1 = ReLU() dense2 = Dense(input_size=16, output_size=8) relu2 = ReLU() dense3 = Dense(input_size=8, output_size=3) # Process batches for batch_data, batch_labels in dataloader: # Forward pass through complex pipeline x = dense1(batch_data) x = relu1(x) x = dense2(x) x = relu2(x) output = dense3(x) # Verify final output assert isinstance(output, Tensor), "Final output should be Tensor" assert output.shape == (10, 3), f"Expected shape (10, 3), got {output.shape}" assert output.dtype == np.float32, "Output should be float32" assert np.all(np.isfinite(output.data)), "Output should be finite" def test_dataloader_memory_efficiency(self): """Test DataLoader memory efficiency with tensor operations.""" dataset = SimpleDataset(size=50, num_features=10, num_classes=5) dataloader = DataLoader(dataset, batch_size=25, shuffle=False) # Process batches and verify memory usage patterns processed_batches = [] for batch_data, batch_labels in dataloader: # Store tensor info (not the actual tensors to avoid memory issues) batch_info = { 'data_shape': batch_data.shape, 'label_shape': batch_labels.shape, 'data_type': batch_data.dtype, 'label_type': batch_labels.dtype } processed_batches.append(batch_info) # Verify tensors are properly formed assert isinstance(batch_data, Tensor), "Data should be Tensor" assert isinstance(batch_labels, Tensor), "Labels should be Tensor" # Verify we processed expected number of batches assert len(processed_batches) == 2, f"Expected 2 batches, got {len(processed_batches)}" # Verify consistency across batches for i, batch_info in enumerate(processed_batches): assert batch_info['data_shape'][1] == 10, f"Batch {i} should have 10 features" assert batch_info['data_type'] == np.float32, f"Batch {i} data should be float32" assert batch_info['label_type'] == np.int32, f"Batch {i} labels should be int32" class TestCustomDatasetIntegration: """Test custom dataset integration with tensor operations.""" def test_custom_dataset_with_tensors(self): """Test custom dataset that produces tensors works with DataLoader.""" class CustomTensorDataset(Dataset): def __init__(self, size: int): self.size = size self.data = [Tensor(np.random.rand(3).astype(np.float32)) for _ in range(size)] self.labels = [Tensor(np.random.randint(0, 2, dtype=np.int32)) for _ in range(size)] def __len__(self): return self.size def __getitem__(self, index): return self.data[index], self.labels[index] # Create custom dataset and dataloader dataset = CustomTensorDataset(size=12) dataloader = DataLoader(dataset, batch_size=4, shuffle=False) # Test integration batch_data, batch_labels = next(iter(dataloader)) # Verify tensor properties assert isinstance(batch_data, Tensor), "Batch data should be Tensor" assert isinstance(batch_labels, Tensor), "Batch labels should be Tensor" assert batch_data.shape == (4, 3), f"Expected shape (4, 3), got {batch_data.shape}" assert batch_labels.shape == (4,), f"Expected shape (4,), got {batch_labels.shape}" # Test with neural network components dense = Dense(input_size=3, output_size=2) output = dense(batch_data) assert isinstance(output, Tensor), "Dense output should be Tensor" assert output.shape == (4, 2), f"Expected shape (4, 2), got {output.shape}"