TinyTorch/tinytorch/core/setup.py

# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/01_setup/setup_dev.ipynb.

# %% auto 0
__all__ = ['personal_info', 'system_info', 'validate_environment', 'benchmark_performance', 'setup_development_environment',
           'generate_system_report']

# %% ../../modules/source/01_setup/setup_dev.ipynb 1
import sys
import platform
import psutil
import os
from typing import Dict, Any

# %% ../../modules/source/01_setup/setup_dev.ipynb 6
def personal_info() -> Dict[str, str]:
    """
    Return personal information for this TinyTorch installation.

    This function configures your personal TinyTorch installation with your identity.
    It's the foundation of proper ML engineering practices - every system needs
    to know who built it and how to contact them.

    TODO: Implement personal information configuration.

    STEP-BY-STEP IMPLEMENTATION:
    1. Create a dictionary with your personal details
    2. Include all required keys: developer, email, institution, system_name, version
    3. Use your actual information (not placeholder text)
    4. Make system_name unique and descriptive
    5. Keep version as '1.0.0' for now

    EXAMPLE OUTPUT:
    {
        'developer': 'Vijay Janapa Reddi',
        'email': 'vj@eecs.harvard.edu',
        'institution': 'Harvard University',
        'system_name': 'VJ-TinyTorch-Dev',
        'version': '1.0.0'
    }

    IMPLEMENTATION HINTS:
    - Replace the example with your real information
    - Use a descriptive system_name (e.g., 'YourName-TinyTorch-Dev')
    - Keep email format valid (contains @ and domain)
    - Make sure all values are strings
    - Consider how this info will be used in debugging and collaboration

    LEARNING CONNECTIONS:
    - This is like the 'author' field in Git commits
    - Similar to maintainer info in Docker images
    - Parallels author info in Python packages
    - Foundation for professional ML development
    """
    ### BEGIN SOLUTION
    return {
        'developer': 'Vijay Janapa Reddi',
        'email': 'vj@eecs.harvard.edu',
        'institution': 'Harvard University',
        'system_name': 'VJ-TinyTorch-Dev',
        'version': '1.0.0'
    }
    ### END SOLUTION

# %% ../../modules/source/01_setup/setup_dev.ipynb 8
def system_info() -> Dict[str, Any]:
    """
    Query and return system information for this TinyTorch installation.

    This function gathers crucial hardware and software information that affects
    ML performance, compatibility, and debugging. It's the foundation of
    hardware-aware ML systems.

    TODO: Implement system information queries.

    STEP-BY-STEP IMPLEMENTATION:
    1. Get Python version using sys.version_info
    2. Get platform using platform.system()
    3. Get architecture using platform.machine()
    4. Get CPU count using psutil.cpu_count()
    5. Get memory using psutil.virtual_memory().total
    6. Convert memory from bytes to GB (divide by 1024^3)
    7. Return all information in a dictionary

    EXAMPLE OUTPUT:
    {
        'python_version': '3.9.7',
        'platform': 'Darwin',
        'architecture': 'arm64',
        'cpu_count': 8,
        'memory_gb': 16.0
    }

    IMPLEMENTATION HINTS:
    - Use f-string formatting for Python version: f"{major}.{minor}.{micro}"
    - Memory conversion: bytes / (1024^3) = GB
    - Round memory to 1 decimal place for readability
    - Make sure data types are correct (strings for text, int for cpu_count, float for memory_gb)

    LEARNING CONNECTIONS:
    - This is like `torch.cuda.is_available()` in PyTorch
    - Similar to system info in MLflow experiment tracking
    - Parallels hardware detection in TensorFlow
    - Foundation for performance optimization in ML systems

    PERFORMANCE IMPLICATIONS:
    - cpu_count affects parallel processing capabilities
    - memory_gb determines maximum model and batch sizes
    - platform affects file system and process management
    - architecture influences numerical precision and optimization
    """
    ### BEGIN SOLUTION
    # Get Python version
    version_info = sys.version_info
    python_version = f"{version_info.major}.{version_info.minor}.{version_info.micro}"

    # Get platform information
    platform_name = platform.system()
    architecture = platform.machine()

    # Get CPU information
    cpu_count = psutil.cpu_count()

    # Get memory information (convert bytes to GB)
    memory_bytes = psutil.virtual_memory().total
    memory_gb = round(memory_bytes / (1024**3), 1)

    return {
        'python_version': python_version,
        'platform': platform_name,
        'architecture': architecture,
        'cpu_count': cpu_count,
        'memory_gb': memory_gb
    }
    ### END SOLUTION

# %% ../../modules/source/01_setup/setup_dev.ipynb 18
import importlib
import pkg_resources
from typing import Dict, List, Optional

def validate_environment() -> Dict[str, Any]:
    """
    Validate ML development environment and check essential dependencies.

    This function checks that your system has the necessary packages for ML development.
    It's like a pre-flight check before you start building ML systems.

    TODO: Implement environment validation.

    STEP-BY-STEP IMPLEMENTATION:
    1. Define list of essential ML packages to check
    2. For each package, try to import it and get version
    3. Track which packages are available vs missing
    4. Calculate environment health score
    5. Return comprehensive environment report

    ESSENTIAL PACKAGES TO CHECK:
    - numpy: Numerical computing foundation
    - matplotlib: Visualization and plotting
    - psutil: System monitoring
    - jupyter: Interactive development
    - nbdev: Package development
    - pytest: Testing framework

    IMPLEMENTATION HINTS:
    - Use try/except to handle missing packages gracefully
    - Use pkg_resources.get_distribution(package).version for versions
    - Calculate health_score as (available_packages / total_packages) * 100
    - Round health_score to 1 decimal place
    """
    ### BEGIN SOLUTION
    essential_packages = [
        'numpy', 'matplotlib', 'psutil', 'jupyter', 'nbdev', 'pytest'
    ]

    available = {}
    missing = []

    for package in essential_packages:
        try:
            # Try to import the package
            importlib.import_module(package)
            # Get version information
            version = pkg_resources.get_distribution(package).version
            available[package] = version
        except (ImportError, pkg_resources.DistributionNotFound):
            missing.append(package)

    # Calculate health score
    total_packages = len(essential_packages)
    available_packages = len(available)
    health_score = round((available_packages / total_packages) * 100, 1)

    return {
        'available_packages': available,
        'missing_packages': missing,
        'health_score': health_score,
        'total_checked': total_packages,
        'status': 'healthy' if health_score >= 80 else 'needs_attention'
    }
    ### END SOLUTION

# %% ../../modules/source/01_setup/setup_dev.ipynb 20
import time
import random

def benchmark_performance() -> Dict[str, Any]:
    """
    Benchmark system performance for ML workloads.

    This function measures computational performance to help you understand
    your system's capabilities and optimize your ML development workflow.

    TODO: Implement performance benchmarking.

    STEP-BY-STEP IMPLEMENTATION:
    1. CPU Test: Time a computationally intensive operation
    2. Memory Test: Time a memory-intensive operation
    3. Calculate performance scores based on execution time
    4. Determine overall system performance rating
    5. Return comprehensive benchmark results

    BENCHMARK TESTS:
    - CPU: Nested loop calculation (computational intensity)
    - Memory: Large list operations (memory bandwidth)
    - Combined: Overall system performance score

    IMPLEMENTATION HINTS:
    - Use time.time() to measure execution time
    - CPU test: nested loops with mathematical operations
    - Memory test: large list creation and manipulation
    - Lower execution time = better performance
    - Calculate scores as inverse of time (e.g., 1/time * 1000)
    """
    ### BEGIN SOLUTION
    benchmarks = {}

    # CPU Performance Test
    print("⚡ Running CPU benchmark...")
    start_time = time.time()

    # CPU-intensive calculation
    result = 0
    for i in range(100000):
        result += i * i + i / 2

    cpu_time = time.time() - start_time
    benchmarks['cpu_time'] = round(cpu_time, 3)
    benchmarks['cpu_score'] = round(1000 / cpu_time, 1)

    # Memory Performance Test
    print("🧠 Running memory benchmark...")
    start_time = time.time()

    # Memory-intensive operations
    large_list = list(range(1000000))
    large_list.reverse()
    large_list.sort()

    memory_time = time.time() - start_time
    benchmarks['memory_time'] = round(memory_time, 3)
    benchmarks['memory_score'] = round(1000 / memory_time, 1)

    # Overall Performance Score
    overall_score = round((benchmarks['cpu_score'] + benchmarks['memory_score']) / 2, 1)
    benchmarks['overall_score'] = overall_score

    # Performance Rating
    if overall_score >= 80:
        rating = 'excellent'
    elif overall_score >= 60:
        rating = 'good'
    elif overall_score >= 40:
        rating = 'fair'
    else:
        rating = 'needs_optimization'

    benchmarks['performance_rating'] = rating

    return benchmarks
    ### END SOLUTION

# %% ../../modules/source/01_setup/setup_dev.ipynb 22
import subprocess
import json
from pathlib import Path

def setup_development_environment() -> Dict[str, Any]:
    """
    Configure development environment for professional ML development.

    This function sets up essential tools and configurations to make your
    development workflow more efficient and professional.

    TODO: Implement development environment setup.

    STEP-BY-STEP IMPLEMENTATION:
    1. Check if Git is installed and configured
    2. Verify Jupyter installation and configuration
    3. Check Python development tools
    4. Configure any missing tools
    5. Return setup status and recommendations

    DEVELOPMENT TOOLS TO CHECK:
    - Git: Version control system
    - Jupyter: Interactive development
    - Python tools: Code quality and formatting

    IMPLEMENTATION HINTS:
    - Use subprocess.run() to check tool availability
    - Use try/except to handle missing tools gracefully
    - Provide helpful recommendations for missing tools
    - Focus on tools that improve ML development workflow
    """
    ### BEGIN SOLUTION
    setup_status = {}
    recommendations = []

    # Check Git installation and configuration
    try:
        git_version = subprocess.run(['git', '--version'],
                                   capture_output=True, text=True, check=True)
        setup_status['git_installed'] = True
        setup_status['git_version'] = git_version.stdout.strip()

        # Check Git configuration
        try:
            git_name = subprocess.run(['git', 'config', 'user.name'],
                                    capture_output=True, text=True, check=True)
            git_email = subprocess.run(['git', 'config', 'user.email'],
                                     capture_output=True, text=True, check=True)
            setup_status['git_configured'] = True
            setup_status['git_name'] = git_name.stdout.strip()
            setup_status['git_email'] = git_email.stdout.strip()
        except subprocess.CalledProcessError:
            setup_status['git_configured'] = False
            recommendations.append("Configure Git: git config --global user.name 'Your Name'")
            recommendations.append("Configure Git: git config --global user.email 'your.email@domain.com'")

    except (subprocess.CalledProcessError, FileNotFoundError):
        setup_status['git_installed'] = False
        recommendations.append("Install Git: https://git-scm.com/downloads")

    # Check Jupyter installation
    try:
        jupyter_version = subprocess.run(['jupyter', '--version'],
                                       capture_output=True, text=True, check=True)
        setup_status['jupyter_installed'] = True
        setup_status['jupyter_version'] = jupyter_version.stdout.strip()
    except (subprocess.CalledProcessError, FileNotFoundError):
        setup_status['jupyter_installed'] = False
        recommendations.append("Install Jupyter: pip install jupyter")

    # Check Python tools
    python_tools = ['pip', 'python']
    for tool in python_tools:
        try:
            tool_version = subprocess.run([tool, '--version'],
                                        capture_output=True, text=True, check=True)
            setup_status[f'{tool}_installed'] = True
            setup_status[f'{tool}_version'] = tool_version.stdout.strip()
        except (subprocess.CalledProcessError, FileNotFoundError):
            setup_status[f'{tool}_installed'] = False
            recommendations.append(f"Install {tool}: Check Python installation")

    # Calculate setup health
    total_tools = 4  # git, jupyter, pip, python
    installed_tools = sum([
        setup_status.get('git_installed', False),
        setup_status.get('jupyter_installed', False),
        setup_status.get('pip_installed', False),
        setup_status.get('python_installed', False)
    ])

    setup_score = round((installed_tools / total_tools) * 100, 1)

    return {
        'setup_status': setup_status,
        'recommendations': recommendations,
        'setup_score': setup_score,
        'status': 'ready' if setup_score >= 75 else 'needs_configuration'
    }
    ### END SOLUTION

# %% ../../modules/source/01_setup/setup_dev.ipynb 24
from datetime import datetime

def generate_system_report() -> Dict[str, Any]:
    """
    Generate comprehensive system report for ML development.

    This function combines all configuration and diagnostic information
    into a single, actionable report for your ML development environment.

    TODO: Implement comprehensive system reporting.

    STEP-BY-STEP IMPLEMENTATION:
    1. Gather personal information
    2. Collect system information
    3. Validate environment
    4. Run performance benchmarks
    5. Check development setup
    6. Generate overall health score
    7. Create comprehensive report with recommendations

    REPORT SECTIONS:
    - Personal configuration
    - System specifications
    - Environment validation
    - Performance benchmarks
    - Development setup
    - Overall health assessment
    - Recommendations for improvement

    IMPLEMENTATION HINTS:
    - Call all previously implemented functions
    - Combine results into comprehensive report
    - Calculate overall health score from all components
    - Provide actionable recommendations
    """
    ### BEGIN SOLUTION
    print("📊 Generating comprehensive system report...")

    # Gather all information
    personal = personal_info()
    system = system_info()
    environment = validate_environment()
    performance = benchmark_performance()
    development = setup_development_environment()

    # Calculate overall health score (normalize performance score to 0-100 range)
    normalized_performance = min(performance['overall_score'], 100)  # Cap at 100

    health_components = [
        environment['health_score'],
        normalized_performance,
        development['setup_score']
    ]

    overall_health = round(sum(health_components) / len(health_components), 1)

    # Generate status
    if overall_health >= 85:
        status = 'excellent'
    elif overall_health >= 70:
        status = 'good'
    elif overall_health >= 50:
        status = 'fair'
    else:
        status = 'needs_attention'

    # Compile recommendations
    recommendations = []

    if environment['health_score'] < 80:
        recommendations.extend([f"Install missing package: {pkg}" for pkg in environment['missing_packages']])

    if performance['overall_score'] < 50:
        recommendations.append("Consider hardware upgrade for better ML performance")

    recommendations.extend(development['recommendations'])

    # Create comprehensive report
    report = {
        'timestamp': datetime.now().isoformat(),
        'personal_info': personal,
        'system_info': system,
        'environment_validation': environment,
        'performance_benchmarks': performance,
        'development_setup': development,
        'overall_health': overall_health,
        'status': status,
        'recommendations': recommendations,
        'report_version': '1.0.0'
    }

    return report
    ### END SOLUTION