mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-01 04:07:32 -05:00
- Create professional examples directory showcasing TinyTorch as real ML framework - Add examples: XOR, MNIST, CIFAR-10, text generation, autograd demo, optimizer comparison - Fix import paths in exported modules (training.py, dense.py) - Update training module with autograd integration for loss functions - Add progressive integration tests for all 16 modules - Document framework capabilities and usage patterns This commit establishes the examples gallery that demonstrates TinyTorch works like PyTorch/TensorFlow, validating the complete framework.
629 lines
26 KiB
Python
629 lines
26 KiB
Python
"""
|
|
Module 13: Progressive Integration Tests
|
|
Tests that Module 13 (Kernels) works correctly AND that the entire prior stack works.
|
|
|
|
DEPENDENCY CHAIN: 01_setup → ... → 12_compression → 13_kernels
|
|
This is where we enable high-performance computational kernels and hardware acceleration.
|
|
"""
|
|
|
|
import numpy as np
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add project root to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
|
|
|
|
class TestPriorStackStillWorking:
|
|
"""Quick regression checks that prior modules (01→12) still work."""
|
|
|
|
def test_complete_ml_system_stable(self):
|
|
"""Verify complete ML system remains stable."""
|
|
# Environment (Module 01)
|
|
assert sys.version_info >= (3, 8), "Foundation broken: Python version"
|
|
|
|
# Complete ML system should work
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.optimizers import Adam
|
|
from tinytorch.core.training import Trainer
|
|
from tinytorch.core.compression import prune_weights
|
|
|
|
# All ML system components should be available
|
|
model = Dense(10, 5)
|
|
optimizer = Adam(model.parameters(), lr=0.001)
|
|
trainer = Trainer(model, optimizer)
|
|
|
|
# Compression should still work
|
|
if 'prune_weights' in locals():
|
|
pruned_weights = prune_weights(model.weights, sparsity=0.3)
|
|
assert pruned_weights.shape == model.weights.shape, "Compression broken"
|
|
|
|
# Basic ML functionality should work
|
|
x = Tensor(np.random.randn(4, 10))
|
|
output = model(x)
|
|
assert output.shape == (4, 5), "ML system broken"
|
|
|
|
except ImportError:
|
|
assert True, "ML system not implemented yet"
|
|
|
|
def test_efficiency_features_stable(self):
|
|
"""Verify efficiency modules (11→12) still work."""
|
|
try:
|
|
from tinytorch.core.training import Trainer
|
|
from tinytorch.core.compression import quantize_weights
|
|
from tinytorch.core.optimizers import SGD
|
|
from tinytorch.core.layers import Dense
|
|
|
|
# Efficiency features should work
|
|
model = Dense(8, 3)
|
|
optimizer = SGD(model.parameters(), lr=0.01)
|
|
trainer = Trainer(model, optimizer)
|
|
|
|
assert hasattr(trainer, 'train') or hasattr(trainer, 'fit'), "Training broken"
|
|
|
|
# Compression should work
|
|
if 'quantize_weights' in locals():
|
|
quantized = quantize_weights(model.weights, bits=8)
|
|
assert quantized.shape == model.weights.shape, "Quantization broken"
|
|
|
|
except ImportError:
|
|
assert True, "Efficiency features not implemented yet"
|
|
|
|
|
|
class TestModule13KernelsCore:
|
|
"""Test Module 13 (Kernels) core functionality."""
|
|
|
|
def test_optimized_tensor_operations(self):
|
|
"""Test optimized tensor operation kernels."""
|
|
try:
|
|
from tinytorch.core.kernels import optimized_matmul, vectorized_add
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# Test optimized matrix multiplication
|
|
if 'optimized_matmul' in locals():
|
|
A = Tensor(np.random.randn(50, 30))
|
|
B = Tensor(np.random.randn(30, 20))
|
|
|
|
result = optimized_matmul(A, B)
|
|
expected = np.dot(A.data, B.data)
|
|
|
|
assert result.shape == (50, 20), "Optimized matmul shape broken"
|
|
assert np.allclose(result.data, expected, rtol=1e-5), "Optimized matmul accuracy broken"
|
|
|
|
# Test vectorized operations
|
|
if 'vectorized_add' in locals():
|
|
a = Tensor(np.random.randn(1000))
|
|
b = Tensor(np.random.randn(1000))
|
|
|
|
result = vectorized_add(a, b)
|
|
expected = a.data + b.data
|
|
|
|
assert result.shape == a.shape, "Vectorized add shape broken"
|
|
assert np.allclose(result.data, expected), "Vectorized add accuracy broken"
|
|
|
|
except ImportError:
|
|
assert True, "Optimized tensor operations not implemented yet"
|
|
|
|
def test_cuda_kernels(self):
|
|
"""Test CUDA acceleration kernels."""
|
|
try:
|
|
from tinytorch.core.kernels import cuda_available, CudaKernel
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# Check CUDA availability
|
|
if 'cuda_available' in locals():
|
|
has_cuda = cuda_available()
|
|
|
|
if has_cuda:
|
|
# Test CUDA tensor operations
|
|
if 'CudaKernel' in locals():
|
|
kernel = CudaKernel('matmul')
|
|
|
|
A = Tensor(np.random.randn(100, 50))
|
|
B = Tensor(np.random.randn(50, 25))
|
|
|
|
# Move to CUDA (if supported)
|
|
if hasattr(A, 'cuda'):
|
|
A_cuda = A.cuda()
|
|
B_cuda = B.cuda()
|
|
|
|
result = kernel.execute(A_cuda, B_cuda)
|
|
assert result.shape == (100, 25), "CUDA kernel shape broken"
|
|
else:
|
|
# CPU fallback should work
|
|
assert True, "CUDA not available, CPU fallback used"
|
|
|
|
except ImportError:
|
|
assert True, "CUDA kernels not implemented yet"
|
|
|
|
def test_custom_kernel_compilation(self):
|
|
"""Test custom kernel compilation and execution."""
|
|
try:
|
|
from tinytorch.core.kernels import compile_kernel, KernelCompiler
|
|
|
|
# Test kernel compilation
|
|
if 'compile_kernel' in locals():
|
|
# Simple element-wise operation kernel
|
|
kernel_code = """
|
|
def element_wise_multiply(a, b):
|
|
return a * b
|
|
"""
|
|
|
|
compiled_kernel = compile_kernel(kernel_code, 'element_wise_multiply')
|
|
|
|
# Test compiled kernel
|
|
a = np.array([1, 2, 3, 4])
|
|
b = np.array([2, 3, 4, 5])
|
|
|
|
result = compiled_kernel(a, b)
|
|
expected = a * b
|
|
|
|
assert np.array_equal(result, expected), "Custom kernel compilation broken"
|
|
|
|
# Test kernel compiler
|
|
if 'KernelCompiler' in locals():
|
|
compiler = KernelCompiler(target='cpu', optimization_level=2)
|
|
|
|
assert hasattr(compiler, 'compile'), "Kernel compiler broken: No compile method"
|
|
assert hasattr(compiler, 'target'), "Kernel compiler broken: No target"
|
|
|
|
except ImportError:
|
|
assert True, "Custom kernel compilation not implemented yet"
|
|
|
|
|
|
class TestProgressiveStackIntegration:
|
|
"""Test that the complete stack (01→13) works together."""
|
|
|
|
def test_accelerated_training_pipeline(self):
|
|
"""Test training pipeline with kernel acceleration."""
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.optimizers import Adam
|
|
from tinytorch.core.training import Trainer
|
|
from tinytorch.core.kernels import enable_optimizations
|
|
from tinytorch.core.data import Dataset, DataLoader
|
|
|
|
# Enable kernel optimizations
|
|
if 'enable_optimizations' in locals():
|
|
enable_optimizations(backend='auto')
|
|
|
|
# Create accelerated training pipeline
|
|
class AcceleratedModel:
|
|
def __init__(self):
|
|
self.layer1 = Dense(50, 100)
|
|
self.layer2 = Dense(100, 20)
|
|
self.layer3 = Dense(20, 5)
|
|
|
|
def __call__(self, x):
|
|
h1 = self.layer1(x)
|
|
h2 = self.layer2(h1)
|
|
return self.layer3(h2)
|
|
|
|
def parameters(self):
|
|
params = []
|
|
for layer in [self.layer1, self.layer2, self.layer3]:
|
|
if hasattr(layer, 'parameters'):
|
|
params.extend(layer.parameters())
|
|
return params
|
|
|
|
# Dataset for performance testing
|
|
class PerformanceDataset(Dataset):
|
|
def __init__(self):
|
|
self.data = np.random.randn(200, 50)
|
|
self.targets = np.random.randint(0, 5, 200)
|
|
|
|
def __len__(self):
|
|
return 200
|
|
|
|
def __getitem__(self, idx):
|
|
return Tensor(self.data[idx]), self.targets[idx]
|
|
|
|
# Accelerated training
|
|
model = AcceleratedModel()
|
|
optimizer = Adam(model.parameters(), lr=0.001)
|
|
trainer = Trainer(model, optimizer)
|
|
|
|
dataset = PerformanceDataset()
|
|
dataloader = DataLoader(dataset, batch_size=16)
|
|
|
|
# Test accelerated forward pass
|
|
for batch_x, batch_y in dataloader:
|
|
output = model(batch_x)
|
|
assert output.shape == (16, 5), "Accelerated training broken"
|
|
break # Test one batch
|
|
|
|
except ImportError:
|
|
assert True, "Accelerated training pipeline not ready yet"
|
|
|
|
def test_large_scale_operations(self):
|
|
"""Test large-scale operations with kernel optimizations."""
|
|
try:
|
|
from tinytorch.core.kernels import optimized_matmul, batch_operations
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# Large-scale matrix operations
|
|
if 'optimized_matmul' in locals():
|
|
# Large matrices
|
|
A = Tensor(np.random.randn(500, 300))
|
|
B = Tensor(np.random.randn(300, 200))
|
|
|
|
result = optimized_matmul(A, B)
|
|
assert result.shape == (500, 200), "Large-scale matmul broken"
|
|
|
|
# Batch operations
|
|
if 'batch_operations' in locals():
|
|
# Batch of operations
|
|
batch_size = 32
|
|
matrices = [Tensor(np.random.randn(50, 30)) for _ in range(batch_size)]
|
|
vectors = [Tensor(np.random.randn(30)) for _ in range(batch_size)]
|
|
|
|
results = batch_operations('matmul', matrices, vectors)
|
|
assert len(results) == batch_size, "Batch operations broken"
|
|
|
|
for result in results:
|
|
assert result.shape == (50,), "Batch operation result shape broken"
|
|
|
|
except ImportError:
|
|
assert True, "Large-scale operations not ready yet"
|
|
|
|
def test_memory_optimized_operations(self):
|
|
"""Test memory-optimized kernel operations."""
|
|
try:
|
|
from tinytorch.core.kernels import in_place_operations, memory_pool
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# In-place operations to save memory
|
|
if 'in_place_operations' in locals():
|
|
a = Tensor(np.random.randn(100, 100))
|
|
b = Tensor(np.random.randn(100, 100))
|
|
|
|
original_id = id(a.data)
|
|
|
|
# In-place addition
|
|
in_place_operations.add_(a, b)
|
|
|
|
# Should modify original tensor
|
|
assert id(a.data) == original_id, "In-place operation created copy"
|
|
|
|
# Memory pool for efficient allocation
|
|
if 'memory_pool' in locals():
|
|
pool = memory_pool.MemoryPool()
|
|
|
|
# Allocate from pool
|
|
tensor1 = pool.allocate_tensor(shape=(200, 200))
|
|
tensor2 = pool.allocate_tensor(shape=(200, 200))
|
|
|
|
# Should be memory efficient
|
|
assert tensor1.shape == (200, 200), "Memory pool allocation broken"
|
|
assert tensor2.shape == (200, 200), "Memory pool allocation broken"
|
|
|
|
# Release memory
|
|
pool.release(tensor1)
|
|
pool.release(tensor2)
|
|
|
|
except ImportError:
|
|
assert True, "Memory-optimized operations not ready yet"
|
|
|
|
|
|
class TestPerformanceOptimizations:
|
|
"""Test performance optimizations and benchmarking."""
|
|
|
|
def test_kernel_benchmarking(self):
|
|
"""Test kernel performance benchmarking."""
|
|
try:
|
|
from tinytorch.core.kernels import benchmark_kernel, KernelProfiler
|
|
import time
|
|
|
|
# Benchmark matrix multiplication
|
|
if 'benchmark_kernel' in locals():
|
|
sizes = [(100, 100), (200, 200), (500, 500)]
|
|
|
|
for size in sizes:
|
|
A = np.random.randn(*size)
|
|
B = np.random.randn(*size)
|
|
|
|
# Benchmark different implementations
|
|
results = benchmark_kernel('matmul', A, B, num_trials=5)
|
|
|
|
assert 'mean_time' in results, "Benchmark missing timing"
|
|
assert 'std_time' in results, "Benchmark missing std"
|
|
assert results['mean_time'] > 0, "Benchmark timing invalid"
|
|
|
|
# Kernel profiler
|
|
if 'KernelProfiler' in locals():
|
|
profiler = KernelProfiler()
|
|
|
|
# Profile operations
|
|
profiler.start()
|
|
|
|
# Some operations to profile
|
|
for _ in range(10):
|
|
a = np.random.randn(50, 50)
|
|
b = np.random.randn(50, 50)
|
|
c = np.dot(a, b)
|
|
|
|
profile_results = profiler.stop()
|
|
|
|
assert 'total_time' in profile_results, "Profiler missing total time"
|
|
assert 'operation_count' in profile_results, "Profiler missing operation count"
|
|
|
|
except ImportError:
|
|
assert True, "Kernel benchmarking not ready yet"
|
|
|
|
def test_auto_optimization(self):
|
|
"""Test automatic kernel optimization selection."""
|
|
try:
|
|
from tinytorch.core.kernels import AutoOptimizer, select_best_kernel
|
|
|
|
# Auto optimizer
|
|
if 'AutoOptimizer' in locals():
|
|
optimizer = AutoOptimizer()
|
|
|
|
# Should detect best kernels for hardware
|
|
best_config = optimizer.detect_optimal_config()
|
|
|
|
assert 'matmul_kernel' in best_config, "Auto optimizer missing matmul"
|
|
assert 'device' in best_config, "Auto optimizer missing device"
|
|
|
|
# Kernel selection
|
|
if 'select_best_kernel' in locals():
|
|
# Test different kernel options for operation
|
|
kernels = ['numpy', 'optimized_cpu', 'cuda']
|
|
operation = 'matmul'
|
|
shape = (100, 100)
|
|
|
|
best_kernel = select_best_kernel(operation, shape, available_kernels=kernels)
|
|
|
|
assert best_kernel in kernels, "Kernel selection invalid"
|
|
|
|
except ImportError:
|
|
assert True, "Auto optimization not ready yet"
|
|
|
|
def test_vectorization_optimizations(self):
|
|
"""Test vectorization and SIMD optimizations."""
|
|
try:
|
|
from tinytorch.core.kernels import vectorized_ops, simd_support
|
|
|
|
# Vectorized operations
|
|
if 'vectorized_ops' in locals():
|
|
# Large arrays for vectorization
|
|
a = np.random.randn(10000)
|
|
b = np.random.randn(10000)
|
|
|
|
# Vectorized operations should be faster
|
|
import time
|
|
|
|
# Time numpy baseline
|
|
start = time.time()
|
|
numpy_result = a + b
|
|
numpy_time = time.time() - start
|
|
|
|
# Time vectorized version
|
|
start = time.time()
|
|
vectorized_result = vectorized_ops.add(a, b)
|
|
vectorized_time = time.time() - start
|
|
|
|
# Results should be equivalent
|
|
assert np.allclose(numpy_result, vectorized_result), "Vectorization accuracy broken"
|
|
|
|
# Vectorized should be competitive or faster
|
|
assert vectorized_time <= numpy_time * 2, "Vectorization significantly slower"
|
|
|
|
# SIMD support detection
|
|
if 'simd_support' in locals():
|
|
capabilities = simd_support.detect_capabilities()
|
|
|
|
assert isinstance(capabilities, dict), "SIMD detection should return dict"
|
|
# Common SIMD instruction sets
|
|
expected_keys = ['sse', 'avx', 'avx2']
|
|
for key in expected_keys:
|
|
if key in capabilities:
|
|
assert isinstance(capabilities[key], bool), f"SIMD {key} should be boolean"
|
|
|
|
except ImportError:
|
|
assert True, "Vectorization optimizations not ready yet"
|
|
|
|
|
|
class TestHardwareAcceleration:
|
|
"""Test hardware acceleration and device management."""
|
|
|
|
def test_device_detection(self):
|
|
"""Test hardware device detection and selection."""
|
|
try:
|
|
from tinytorch.core.kernels import Device, get_available_devices
|
|
|
|
# Device detection
|
|
if 'get_available_devices' in locals():
|
|
devices = get_available_devices()
|
|
|
|
assert isinstance(devices, list), "Available devices should be list"
|
|
assert len(devices) > 0, "Should detect at least CPU"
|
|
|
|
# Should include CPU at minimum
|
|
device_types = [device.type for device in devices]
|
|
assert 'cpu' in device_types, "CPU device not detected"
|
|
|
|
# Device object
|
|
if 'Device' in locals():
|
|
cpu_device = Device('cpu')
|
|
assert cpu_device.type == 'cpu', "CPU device creation broken"
|
|
|
|
# Test CUDA device if available
|
|
try:
|
|
cuda_device = Device('cuda:0')
|
|
assert cuda_device.type == 'cuda', "CUDA device creation broken"
|
|
except RuntimeError:
|
|
# CUDA not available, which is fine
|
|
assert True, "CUDA not available on this system"
|
|
|
|
except ImportError:
|
|
assert True, "Device detection not ready yet"
|
|
|
|
def test_tensor_device_movement(self):
|
|
"""Test moving tensors between devices."""
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.kernels import Device
|
|
|
|
# Create tensor on CPU
|
|
tensor = Tensor(np.random.randn(50, 50))
|
|
|
|
# Should start on CPU
|
|
if hasattr(tensor, 'device'):
|
|
assert tensor.device.type == 'cpu', "Tensor not starting on CPU"
|
|
|
|
# Test moving to different device (if available)
|
|
if hasattr(tensor, 'to'):
|
|
# Try moving to CUDA (will fallback to CPU if not available)
|
|
try:
|
|
cuda_tensor = tensor.to('cuda')
|
|
if hasattr(cuda_tensor, 'device'):
|
|
assert cuda_tensor.device.type in ['cuda', 'cpu'], "Device movement broken"
|
|
except RuntimeError:
|
|
# CUDA not available
|
|
assert True, "CUDA not available for tensor movement"
|
|
|
|
except ImportError:
|
|
assert True, "Tensor device movement not ready yet"
|
|
|
|
def test_multi_gpu_support(self):
|
|
"""Test multi-GPU support and parallelization."""
|
|
try:
|
|
from tinytorch.core.kernels import MultiGPUManager, data_parallel
|
|
|
|
# Multi-GPU manager
|
|
if 'MultiGPUManager' in locals():
|
|
gpu_manager = MultiGPUManager()
|
|
|
|
available_gpus = gpu_manager.get_gpu_count()
|
|
|
|
if available_gpus > 1:
|
|
# Test multi-GPU operations
|
|
assert available_gpus >= 2, "Multi-GPU testing requires 2+ GPUs"
|
|
|
|
# Should be able to manage multiple devices
|
|
devices = gpu_manager.get_device_list()
|
|
assert len(devices) == available_gpus, "GPU device list incorrect"
|
|
else:
|
|
# Single GPU or CPU only
|
|
assert True, "Multi-GPU not available, single device mode"
|
|
|
|
# Data parallel operations
|
|
if 'data_parallel' in locals():
|
|
# Test data parallel wrapper
|
|
from tinytorch.core.layers import Dense
|
|
|
|
model = Dense(10, 5)
|
|
parallel_model = data_parallel(model, device_ids=[0]) # Single device for testing
|
|
|
|
assert hasattr(parallel_model, 'forward'), "Data parallel wrapper broken"
|
|
|
|
except ImportError:
|
|
assert True, "Multi-GPU support not ready yet"
|
|
|
|
|
|
class TestRegressionPrevention:
|
|
"""Ensure previous modules still work after Module 13 development."""
|
|
|
|
def test_no_complete_system_regression(self):
|
|
"""Verify complete ML system (01→12) unchanged."""
|
|
# Core functionality should remain stable
|
|
assert sys.version_info.major >= 3, "Foundation: Python detection broken"
|
|
|
|
# Complete ML system should still work
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.optimizers import Adam
|
|
from tinytorch.core.training import Trainer
|
|
from tinytorch.core.compression import prune_weights
|
|
|
|
# All components should work together
|
|
model = Dense(8, 4)
|
|
optimizer = Adam(model.parameters(), lr=0.001)
|
|
trainer = Trainer(model, optimizer)
|
|
|
|
x = Tensor(np.random.randn(2, 8))
|
|
output = model(x)
|
|
assert output.shape == (2, 4), "System regression: Forward pass broken"
|
|
|
|
# Compression should still work
|
|
if 'prune_weights' in locals():
|
|
pruned = prune_weights(model.weights, sparsity=0.2)
|
|
assert pruned.shape == model.weights.shape, "System regression: Compression broken"
|
|
|
|
except ImportError:
|
|
import numpy as np
|
|
assert np.random is not None, "System regression: Basic functionality broken"
|
|
|
|
def test_no_efficiency_regression(self):
|
|
"""Verify efficiency features (11→12) unchanged."""
|
|
try:
|
|
from tinytorch.core.training import Trainer
|
|
from tinytorch.core.compression import quantize_weights
|
|
from tinytorch.core.optimizers import SGD
|
|
from tinytorch.core.layers import Dense
|
|
|
|
# Efficiency features should still work
|
|
model = Dense(6, 3)
|
|
optimizer = SGD(model.parameters(), lr=0.01)
|
|
trainer = Trainer(model, optimizer)
|
|
|
|
assert hasattr(trainer, 'train') or hasattr(trainer, 'fit'), "Efficiency regression: Training broken"
|
|
|
|
# Compression should still work
|
|
if 'quantize_weights' in locals():
|
|
quantized = quantize_weights(model.weights, bits=8)
|
|
assert quantized.shape == model.weights.shape, "Efficiency regression: Quantization broken"
|
|
|
|
except ImportError:
|
|
# Basic functionality should work
|
|
import numpy as np
|
|
assert np is not None, "Efficiency regression: Basic functionality broken"
|
|
|
|
def test_progressive_stability(self):
|
|
"""Test the progressive stack is stable through kernel optimization."""
|
|
# Stack should be stable through: Setup → ... → Compression → Kernels
|
|
|
|
# Setup level
|
|
import numpy as np
|
|
assert np is not None, "Setup level broken"
|
|
|
|
# Complete ML system level (if available)
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.optimizers import Adam
|
|
from tinytorch.core.training import Trainer
|
|
|
|
# Complete system should work
|
|
model = Dense(10, 5)
|
|
optimizer = Adam(model.parameters(), lr=0.001)
|
|
trainer = Trainer(model, optimizer)
|
|
|
|
x = Tensor(np.random.randn(3, 10))
|
|
output = model(x)
|
|
assert output.shape == (3, 5), "ML system level broken"
|
|
|
|
except ImportError:
|
|
pass # Not implemented yet
|
|
|
|
# Kernel optimization level (if available)
|
|
try:
|
|
from tinytorch.core.kernels import optimized_matmul
|
|
|
|
# Kernel optimizations should work with existing tensors
|
|
if 'optimized_matmul' in locals():
|
|
A = np.random.randn(20, 15)
|
|
B = np.random.randn(15, 10)
|
|
result = optimized_matmul(A, B)
|
|
assert result.shape == (20, 10), "Kernel optimization level broken"
|
|
else:
|
|
# Basic kernel concepts should work
|
|
assert True, "Basic kernel optimization ready"
|
|
|
|
except ImportError:
|
|
pass # Not implemented yet |