mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-08 07:32:31 -05:00
📦 Module File Organization: - Renamed networks_dev.py → dense_dev.py in 05_dense module - Renamed cnn_dev.py → spatial_dev.py in 06_spatial module - Added new 07_attention module with attention_dev.py - Updated module.yaml files to reference correct filenames - Updated #| default_exp directives for proper package exports 🔄 Core Package Updates: - Added tinytorch.core.dense (Sequential, MLP architectures) - Added tinytorch.core.spatial (Conv2D, pooling operations) - Added tinytorch.core.attention (self-attention mechanisms) - Updated all core modules with latest implementations - Fixed tensor assignment issues in compression module 🧪 Test Integration Fixes: - Updated integration tests to use correct module imports - Fixed tensor activation tests for new module structure - Ensured compatibility with renamed components - Maintained 100% individual module test success rate Result: Complete 14-module TinyTorch framework with proper organization, working integrations, and comprehensive test coverage ready for production use.
479 lines
16 KiB
Python
479 lines
16 KiB
Python
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/01_setup/setup_dev.ipynb.
|
|
|
|
# %% auto 0
|
|
__all__ = ['personal_info', 'system_info', 'validate_environment', 'benchmark_performance', 'setup_development_environment',
|
|
'generate_system_report']
|
|
|
|
# %% ../../modules/source/01_setup/setup_dev.ipynb 1
|
|
import sys
|
|
import platform
|
|
import psutil
|
|
import os
|
|
from typing import Dict, Any
|
|
|
|
# %% ../../modules/source/01_setup/setup_dev.ipynb 6
|
|
def personal_info() -> Dict[str, str]:
|
|
"""
|
|
Return personal information for this TinyTorch installation.
|
|
|
|
This function configures your personal TinyTorch installation with your identity.
|
|
It's the foundation of proper ML engineering practices - every system needs
|
|
to know who built it and how to contact them.
|
|
|
|
TODO: Implement personal information configuration.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Create a dictionary with your personal details
|
|
2. Include all required keys: developer, email, institution, system_name, version
|
|
3. Use your actual information (not placeholder text)
|
|
4. Make system_name unique and descriptive
|
|
5. Keep version as '1.0.0' for now
|
|
|
|
EXAMPLE OUTPUT:
|
|
{
|
|
'developer': 'Vijay Janapa Reddi',
|
|
'email': 'vj@eecs.harvard.edu',
|
|
'institution': 'Harvard University',
|
|
'system_name': 'VJ-TinyTorch-Dev',
|
|
'version': '1.0.0'
|
|
}
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Replace the example with your real information
|
|
- Use a descriptive system_name (e.g., 'YourName-TinyTorch-Dev')
|
|
- Keep email format valid (contains @ and domain)
|
|
- Make sure all values are strings
|
|
- Consider how this info will be used in debugging and collaboration
|
|
|
|
LEARNING CONNECTIONS:
|
|
- This is like the 'author' field in Git commits
|
|
- Similar to maintainer info in Docker images
|
|
- Parallels author info in Python packages
|
|
- Foundation for professional ML development
|
|
"""
|
|
### BEGIN SOLUTION
|
|
return {
|
|
'developer': 'Vijay Janapa Reddi',
|
|
'email': 'vj@eecs.harvard.edu',
|
|
'institution': 'Harvard University',
|
|
'system_name': 'VJ-TinyTorch-Dev',
|
|
'version': '1.0.0'
|
|
}
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/source/01_setup/setup_dev.ipynb 8
|
|
def system_info() -> Dict[str, Any]:
|
|
"""
|
|
Query and return system information for this TinyTorch installation.
|
|
|
|
This function gathers crucial hardware and software information that affects
|
|
ML performance, compatibility, and debugging. It's the foundation of
|
|
hardware-aware ML systems.
|
|
|
|
TODO: Implement system information queries.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Get Python version using sys.version_info
|
|
2. Get platform using platform.system()
|
|
3. Get architecture using platform.machine()
|
|
4. Get CPU count using psutil.cpu_count()
|
|
5. Get memory using psutil.virtual_memory().total
|
|
6. Convert memory from bytes to GB (divide by 1024^3)
|
|
7. Return all information in a dictionary
|
|
|
|
EXAMPLE OUTPUT:
|
|
{
|
|
'python_version': '3.9.7',
|
|
'platform': 'Darwin',
|
|
'architecture': 'arm64',
|
|
'cpu_count': 8,
|
|
'memory_gb': 16.0
|
|
}
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use f-string formatting for Python version: f"{major}.{minor}.{micro}"
|
|
- Memory conversion: bytes / (1024^3) = GB
|
|
- Round memory to 1 decimal place for readability
|
|
- Make sure data types are correct (strings for text, int for cpu_count, float for memory_gb)
|
|
|
|
LEARNING CONNECTIONS:
|
|
- This is like `torch.cuda.is_available()` in PyTorch
|
|
- Similar to system info in MLflow experiment tracking
|
|
- Parallels hardware detection in TensorFlow
|
|
- Foundation for performance optimization in ML systems
|
|
|
|
PERFORMANCE IMPLICATIONS:
|
|
- cpu_count affects parallel processing capabilities
|
|
- memory_gb determines maximum model and batch sizes
|
|
- platform affects file system and process management
|
|
- architecture influences numerical precision and optimization
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Get Python version
|
|
version_info = sys.version_info
|
|
python_version = f"{version_info.major}.{version_info.minor}.{version_info.micro}"
|
|
|
|
# Get platform information
|
|
platform_name = platform.system()
|
|
architecture = platform.machine()
|
|
|
|
# Get CPU information
|
|
cpu_count = psutil.cpu_count()
|
|
|
|
# Get memory information (convert bytes to GB)
|
|
memory_bytes = psutil.virtual_memory().total
|
|
memory_gb = round(memory_bytes / (1024**3), 1)
|
|
|
|
return {
|
|
'python_version': python_version,
|
|
'platform': platform_name,
|
|
'architecture': architecture,
|
|
'cpu_count': cpu_count,
|
|
'memory_gb': memory_gb
|
|
}
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/source/01_setup/setup_dev.ipynb 18
|
|
import importlib
|
|
import pkg_resources
|
|
from typing import Dict, List, Optional
|
|
|
|
def validate_environment() -> Dict[str, Any]:
|
|
"""
|
|
Validate ML development environment and check essential dependencies.
|
|
|
|
This function checks that your system has the necessary packages for ML development.
|
|
It's like a pre-flight check before you start building ML systems.
|
|
|
|
TODO: Implement environment validation.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Define list of essential ML packages to check
|
|
2. For each package, try to import it and get version
|
|
3. Track which packages are available vs missing
|
|
4. Calculate environment health score
|
|
5. Return comprehensive environment report
|
|
|
|
ESSENTIAL PACKAGES TO CHECK:
|
|
- numpy: Numerical computing foundation
|
|
- matplotlib: Visualization and plotting
|
|
- psutil: System monitoring
|
|
- jupyter: Interactive development
|
|
- nbdev: Package development
|
|
- pytest: Testing framework
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use try/except to handle missing packages gracefully
|
|
- Use pkg_resources.get_distribution(package).version for versions
|
|
- Calculate health_score as (available_packages / total_packages) * 100
|
|
- Round health_score to 1 decimal place
|
|
"""
|
|
### BEGIN SOLUTION
|
|
essential_packages = [
|
|
'numpy', 'matplotlib', 'psutil', 'jupyter', 'nbdev', 'pytest'
|
|
]
|
|
|
|
available = {}
|
|
missing = []
|
|
|
|
for package in essential_packages:
|
|
try:
|
|
# Try to import the package
|
|
importlib.import_module(package)
|
|
# Get version information
|
|
version = pkg_resources.get_distribution(package).version
|
|
available[package] = version
|
|
except (ImportError, pkg_resources.DistributionNotFound):
|
|
missing.append(package)
|
|
|
|
# Calculate health score
|
|
total_packages = len(essential_packages)
|
|
available_packages = len(available)
|
|
health_score = round((available_packages / total_packages) * 100, 1)
|
|
|
|
return {
|
|
'available_packages': available,
|
|
'missing_packages': missing,
|
|
'health_score': health_score,
|
|
'total_checked': total_packages,
|
|
'status': 'healthy' if health_score >= 80 else 'needs_attention'
|
|
}
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/source/01_setup/setup_dev.ipynb 20
|
|
import time
|
|
import random
|
|
|
|
def benchmark_performance() -> Dict[str, Any]:
|
|
"""
|
|
Benchmark system performance for ML workloads.
|
|
|
|
This function measures computational performance to help you understand
|
|
your system's capabilities and optimize your ML development workflow.
|
|
|
|
TODO: Implement performance benchmarking.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. CPU Test: Time a computationally intensive operation
|
|
2. Memory Test: Time a memory-intensive operation
|
|
3. Calculate performance scores based on execution time
|
|
4. Determine overall system performance rating
|
|
5. Return comprehensive benchmark results
|
|
|
|
BENCHMARK TESTS:
|
|
- CPU: Nested loop calculation (computational intensity)
|
|
- Memory: Large list operations (memory bandwidth)
|
|
- Combined: Overall system performance score
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use time.time() to measure execution time
|
|
- CPU test: nested loops with mathematical operations
|
|
- Memory test: large list creation and manipulation
|
|
- Lower execution time = better performance
|
|
- Calculate scores as inverse of time (e.g., 1/time * 1000)
|
|
"""
|
|
### BEGIN SOLUTION
|
|
benchmarks = {}
|
|
|
|
# CPU Performance Test
|
|
print("⚡ Running CPU benchmark...")
|
|
start_time = time.time()
|
|
|
|
# CPU-intensive calculation
|
|
result = 0
|
|
for i in range(100000):
|
|
result += i * i + i / 2
|
|
|
|
cpu_time = time.time() - start_time
|
|
benchmarks['cpu_time'] = round(cpu_time, 3)
|
|
benchmarks['cpu_score'] = round(1000 / cpu_time, 1)
|
|
|
|
# Memory Performance Test
|
|
print("🧠 Running memory benchmark...")
|
|
start_time = time.time()
|
|
|
|
# Memory-intensive operations
|
|
large_list = list(range(1000000))
|
|
large_list.reverse()
|
|
large_list.sort()
|
|
|
|
memory_time = time.time() - start_time
|
|
benchmarks['memory_time'] = round(memory_time, 3)
|
|
benchmarks['memory_score'] = round(1000 / memory_time, 1)
|
|
|
|
# Overall Performance Score
|
|
overall_score = round((benchmarks['cpu_score'] + benchmarks['memory_score']) / 2, 1)
|
|
benchmarks['overall_score'] = overall_score
|
|
|
|
# Performance Rating
|
|
if overall_score >= 80:
|
|
rating = 'excellent'
|
|
elif overall_score >= 60:
|
|
rating = 'good'
|
|
elif overall_score >= 40:
|
|
rating = 'fair'
|
|
else:
|
|
rating = 'needs_optimization'
|
|
|
|
benchmarks['performance_rating'] = rating
|
|
|
|
return benchmarks
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/source/01_setup/setup_dev.ipynb 22
|
|
import subprocess
|
|
import json
|
|
from pathlib import Path
|
|
|
|
def setup_development_environment() -> Dict[str, Any]:
|
|
"""
|
|
Configure development environment for professional ML development.
|
|
|
|
This function sets up essential tools and configurations to make your
|
|
development workflow more efficient and professional.
|
|
|
|
TODO: Implement development environment setup.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Check if Git is installed and configured
|
|
2. Verify Jupyter installation and configuration
|
|
3. Check Python development tools
|
|
4. Configure any missing tools
|
|
5. Return setup status and recommendations
|
|
|
|
DEVELOPMENT TOOLS TO CHECK:
|
|
- Git: Version control system
|
|
- Jupyter: Interactive development
|
|
- Python tools: Code quality and formatting
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use subprocess.run() to check tool availability
|
|
- Use try/except to handle missing tools gracefully
|
|
- Provide helpful recommendations for missing tools
|
|
- Focus on tools that improve ML development workflow
|
|
"""
|
|
### BEGIN SOLUTION
|
|
setup_status = {}
|
|
recommendations = []
|
|
|
|
# Check Git installation and configuration
|
|
try:
|
|
git_version = subprocess.run(['git', '--version'],
|
|
capture_output=True, text=True, check=True)
|
|
setup_status['git_installed'] = True
|
|
setup_status['git_version'] = git_version.stdout.strip()
|
|
|
|
# Check Git configuration
|
|
try:
|
|
git_name = subprocess.run(['git', 'config', 'user.name'],
|
|
capture_output=True, text=True, check=True)
|
|
git_email = subprocess.run(['git', 'config', 'user.email'],
|
|
capture_output=True, text=True, check=True)
|
|
setup_status['git_configured'] = True
|
|
setup_status['git_name'] = git_name.stdout.strip()
|
|
setup_status['git_email'] = git_email.stdout.strip()
|
|
except subprocess.CalledProcessError:
|
|
setup_status['git_configured'] = False
|
|
recommendations.append("Configure Git: git config --global user.name 'Your Name'")
|
|
recommendations.append("Configure Git: git config --global user.email 'your.email@domain.com'")
|
|
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
setup_status['git_installed'] = False
|
|
recommendations.append("Install Git: https://git-scm.com/downloads")
|
|
|
|
# Check Jupyter installation
|
|
try:
|
|
jupyter_version = subprocess.run(['jupyter', '--version'],
|
|
capture_output=True, text=True, check=True)
|
|
setup_status['jupyter_installed'] = True
|
|
setup_status['jupyter_version'] = jupyter_version.stdout.strip()
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
setup_status['jupyter_installed'] = False
|
|
recommendations.append("Install Jupyter: pip install jupyter")
|
|
|
|
# Check Python tools
|
|
python_tools = ['pip', 'python']
|
|
for tool in python_tools:
|
|
try:
|
|
tool_version = subprocess.run([tool, '--version'],
|
|
capture_output=True, text=True, check=True)
|
|
setup_status[f'{tool}_installed'] = True
|
|
setup_status[f'{tool}_version'] = tool_version.stdout.strip()
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
setup_status[f'{tool}_installed'] = False
|
|
recommendations.append(f"Install {tool}: Check Python installation")
|
|
|
|
# Calculate setup health
|
|
total_tools = 4 # git, jupyter, pip, python
|
|
installed_tools = sum([
|
|
setup_status.get('git_installed', False),
|
|
setup_status.get('jupyter_installed', False),
|
|
setup_status.get('pip_installed', False),
|
|
setup_status.get('python_installed', False)
|
|
])
|
|
|
|
setup_score = round((installed_tools / total_tools) * 100, 1)
|
|
|
|
return {
|
|
'setup_status': setup_status,
|
|
'recommendations': recommendations,
|
|
'setup_score': setup_score,
|
|
'status': 'ready' if setup_score >= 75 else 'needs_configuration'
|
|
}
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/source/01_setup/setup_dev.ipynb 24
|
|
from datetime import datetime
|
|
|
|
def generate_system_report() -> Dict[str, Any]:
|
|
"""
|
|
Generate comprehensive system report for ML development.
|
|
|
|
This function combines all configuration and diagnostic information
|
|
into a single, actionable report for your ML development environment.
|
|
|
|
TODO: Implement comprehensive system reporting.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Gather personal information
|
|
2. Collect system information
|
|
3. Validate environment
|
|
4. Run performance benchmarks
|
|
5. Check development setup
|
|
6. Generate overall health score
|
|
7. Create comprehensive report with recommendations
|
|
|
|
REPORT SECTIONS:
|
|
- Personal configuration
|
|
- System specifications
|
|
- Environment validation
|
|
- Performance benchmarks
|
|
- Development setup
|
|
- Overall health assessment
|
|
- Recommendations for improvement
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Call all previously implemented functions
|
|
- Combine results into comprehensive report
|
|
- Calculate overall health score from all components
|
|
- Provide actionable recommendations
|
|
"""
|
|
### BEGIN SOLUTION
|
|
print("📊 Generating comprehensive system report...")
|
|
|
|
# Gather all information
|
|
personal = personal_info()
|
|
system = system_info()
|
|
environment = validate_environment()
|
|
performance = benchmark_performance()
|
|
development = setup_development_environment()
|
|
|
|
# Calculate overall health score (normalize performance score to 0-100 range)
|
|
normalized_performance = min(performance['overall_score'], 100) # Cap at 100
|
|
|
|
health_components = [
|
|
environment['health_score'],
|
|
normalized_performance,
|
|
development['setup_score']
|
|
]
|
|
|
|
overall_health = round(sum(health_components) / len(health_components), 1)
|
|
|
|
# Generate status
|
|
if overall_health >= 85:
|
|
status = 'excellent'
|
|
elif overall_health >= 70:
|
|
status = 'good'
|
|
elif overall_health >= 50:
|
|
status = 'fair'
|
|
else:
|
|
status = 'needs_attention'
|
|
|
|
# Compile recommendations
|
|
recommendations = []
|
|
|
|
if environment['health_score'] < 80:
|
|
recommendations.extend([f"Install missing package: {pkg}" for pkg in environment['missing_packages']])
|
|
|
|
if performance['overall_score'] < 50:
|
|
recommendations.append("Consider hardware upgrade for better ML performance")
|
|
|
|
recommendations.extend(development['recommendations'])
|
|
|
|
# Create comprehensive report
|
|
report = {
|
|
'timestamp': datetime.now().isoformat(),
|
|
'personal_info': personal,
|
|
'system_info': system,
|
|
'environment_validation': environment,
|
|
'performance_benchmarks': performance,
|
|
'development_setup': development,
|
|
'overall_health': overall_health,
|
|
'status': status,
|
|
'recommendations': recommendations,
|
|
'report_version': '1.0.0'
|
|
}
|
|
|
|
return report
|
|
### END SOLUTION
|