Files
cs249r_book/.github/workflows/infra-health-check.yml
2026-03-06 10:11:44 -05:00

419 lines
15 KiB
YAML

name: '🔧 Infra · ❤️ Health Check'
# Comprehensive health validation for build containers
# Tests essential Quarto build tools and functionality
# Runs daily to ensure container reliability
on:
workflow_dispatch:
inputs:
container_registry:
description: 'Container registry URL'
required: false
default: 'ghcr.io'
type: string
container_tag:
description: 'Container tag to test'
required: false
default: 'latest'
type: string
test_linux:
description: 'Test Linux container'
required: false
default: true
type: boolean
test_windows:
description: 'Test Windows container'
required: false
default: true
type: boolean
workflow_call:
inputs:
container_registry:
description: 'Container registry URL'
required: false
default: 'ghcr.io'
type: string
container_tag:
description: 'Container tag to test'
required: false
default: 'latest'
type: string
test_linux:
description: 'Test Linux container'
required: false
default: true
type: boolean
test_windows:
description: 'Test Windows container'
required: false
default: true
type: boolean
schedule:
# Run daily at 2 AM UTC
- cron: '0 2 * * *'
# Centralized Container Configuration - Single Source of Truth
env:
# ==========================================================================
# PATH CONFIGURATION - Uses GitHub Repository Variables (Settings > Variables)
# ==========================================================================
# MLSysBook content lives under book/ to accommodate TinyTorch at root
# Use ${{ vars.BOOK_ROOT }}, ${{ vars.BOOK_DOCKER }}, etc. in workflow steps
# Variables: BOOK_ROOT, BOOK_DOCKER, BOOK_TOOLS, BOOK_QUARTO, BOOK_DEPS
# Registry Configuration
REGISTRY: ${{ inputs.container_registry || 'ghcr.io' }}
CONTAINER_TAG: ${{ inputs.container_tag || 'latest' }}
LINUX_CONTAINER_NAME: 'quarto-linux'
# Computed full image names
LINUX_IMAGE: ${{ inputs.container_registry || 'ghcr.io' }}/${{ github.repository }}/quarto-linux:${{ inputs.container_tag || 'latest' }}
jobs:
# Matrix strategy for both container platforms
container-health-check:
runs-on: ${{ matrix.os }}
if: github.repository_owner == 'harvard-edge'
timeout-minutes: 60
strategy:
fail-fast: false # Test both containers even if one fails
matrix:
include:
- os: ubuntu-latest
platform: linux
container_name: quarto-linux
shell: bash
env:
CONTAINER_IMAGE: ${{ inputs.container_registry || 'ghcr.io' }}/${{ github.repository }}/${{ matrix.container_name }}:${{ inputs.container_tag || 'latest' }}
PLATFORM: ${{ matrix.platform }}
# Using vars.BOOK_DOCKER (repository variable) - works in all contexts
DOCKERFILE_PATH: ./${{ vars.BOOK_DOCKER }}/${{ matrix.platform }}/Dockerfile
steps:
- name: 📥 Checkout repository
if: |
(matrix.platform == 'linux' && inputs.test_linux != false) ||
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: 🔑 Log in to GitHub Container Registry
if: |
(matrix.platform == 'linux' && inputs.test_linux != false) ||
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: 🐳 Pull Docker Image
if: |
(matrix.platform == 'linux' && inputs.test_linux != false) ||
run: docker pull ${{ env.CONTAINER_IMAGE }}
- name: 📊 Container Information
if: |
(matrix.platform == 'linux' && inputs.test_linux != false) ||
run: |
echo "📊 === CONTAINER INFORMATION ==="
echo "📋 Platform: ${{ matrix.platform }}"
echo "📋 Image: ${{ env.CONTAINER_IMAGE }}"
echo "📋 Container Details:"
docker images ${{ env.CONTAINER_IMAGE }} --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}\t{{.CreatedAt}}"
echo ""
echo "📊 Container Size Analysis:"
docker images ${{ env.CONTAINER_IMAGE }} --format "{{.Size}}"
- name: 🐧 Linux Container - Tool Version Check
if: matrix.platform == 'linux' && inputs.test_linux != false
run: |
echo "🐧 === LINUX CONTAINER TOOL VERSIONS ==="
echo "📋 Testing Linux tools with version capture:"
echo "============================================"
docker run --rm ${{ env.CONTAINER_IMAGE }} bash -c "
echo '🔍 === LINUX CONTAINER TOOL VERSIONS ==='
echo ''
echo '📊 QUARTO:'
echo '----------'
if command -v quarto >/dev/null 2>&1; then
echo '📍 Location: $(which quarto)'
echo '📋 Version: $(quarto --version 2>&1 | head -1)'
echo '✅ Status: OK'
else
echo '❌ Status: NOT FOUND'
fi
echo ''
echo '📊 PYTHON:'
echo '-----------'
if command -v python3 >/dev/null 2>&1; then
echo '📍 Location: $(which python3)'
echo '📋 Version: $(python3 --version 2>&1)'
echo '✅ Status: OK'
else
echo '❌ Status: NOT FOUND'
fi
echo ''
echo '📊 R:'
echo '------'
if command -v R >/dev/null 2>&1; then
echo '📍 Location: $(which R)'
echo '📋 Version: $(R --version 2>&1 | head -1)'
echo '✅ Status: OK'
else
echo '❌ Status: NOT FOUND'
fi
echo ''
echo '📊 LUALATEX:'
echo '------------'
if command -v lualatex >/dev/null 2>&1; then
echo '📍 Location: $(which lualatex)'
echo '📋 Version: $(lualatex --version 2>&1 | head -1)'
echo '✅ Status: OK'
else
echo '❌ Status: NOT FOUND'
fi
echo ''
echo '📊 GHOSTSCRIPT:'
echo '---------------'
if command -v gs >/dev/null 2>&1; then
echo '📍 Location: $(which gs)'
echo '📋 Version: $(gs --version 2>&1)'
echo '✅ Status: OK'
else
echo '❌ Status: NOT FOUND'
fi
echo ''
echo '📊 INKSCAPE:'
echo '------------'
if command -v inkscape >/dev/null 2>&1; then
echo '📍 Location: $(which inkscape)'
echo '📋 Version: $(inkscape --version 2>&1 | head -1)'
echo '✅ Status: OK'
else
echo '❌ Status: NOT FOUND'
fi
echo ''
echo '🎯 === LINUX TOOL CHECK COMPLETE ==='
echo ''
# Summary of all tool statuses
echo '📋 LINUX TOOL STATUS SUMMARY:'
echo '=============================='
# Check each tool and show status
if command -v quarto >/dev/null 2>&1; then
echo '✅ Quarto: AVAILABLE'
else
echo '❌ Quarto: MISSING'
fi
if command -v python3 >/dev/null 2>&1; then
echo '✅ Python: AVAILABLE'
else
echo '❌ Python: MISSING'
fi
if command -v R >/dev/null 2>&1; then
echo '✅ R: AVAILABLE'
else
echo '❌ R: MISSING'
fi
if command -v lualatex >/dev/null 2>&1; then
echo '✅ LuaLaTeX: AVAILABLE'
else
echo '❌ LuaLaTeX: MISSING'
fi
if command -v gs >/dev/null 2>&1; then
echo '✅ Ghostscript: AVAILABLE'
else
echo '❌ Ghostscript: MISSING'
fi
if command -v inkscape >/dev/null 2>&1; then
echo '✅ Inkscape: AVAILABLE'
else
echo '❌ Inkscape: MISSING'
fi
echo ''
# Check if any tools are missing and fail if so
FAILED_TOOLS=0
if ! command -v quarto >/dev/null 2>&1; then
FAILED_TOOLS=$((FAILED_TOOLS + 1))
fi
if ! command -v python3 >/dev/null 2>&1; then
FAILED_TOOLS=$((FAILED_TOOLS + 1))
fi
if ! command -v R >/dev/null 2>&1; then
FAILED_TOOLS=$((FAILED_TOOLS + 1))
fi
if ! command -v lualatex >/dev/null 2>&1; then
FAILED_TOOLS=$((FAILED_TOOLS + 1))
fi
if ! command -v gs >/dev/null 2>&1; then
FAILED_TOOLS=$((FAILED_TOOLS + 1))
fi
if ! command -v inkscape >/dev/null 2>&1; then
FAILED_TOOLS=$((FAILED_TOOLS + 1))
fi
if [ $FAILED_TOOLS -eq 0 ]; then
echo '🎯 ✅ Linux container tool validation: PASSED'
echo 'All essential Quarto build tools are available!'
else
echo \"🎯 ❌ Linux container tool validation: FAILED\"
echo \"$FAILED_TOOLS essential tool(s) are missing!\"
echo 'This container is NOT ready for Quarto builds.'
exit 1
fi
" | tee linux-tool-versions.log
echo "📋 Linux tool versions saved to linux-tool-versions.log"
- name: 🐧 Linux Container - Quarto Check
if: matrix.platform == 'linux' && inputs.test_linux != false
continue-on-error: true # Don't fail workflow if quarto check fails
run: |
echo "🐧 === LINUX QUARTO CHECK (COMPREHENSIVE) ==="
echo "📋 Running quarto check to validate full installation:"
echo "===================================================="
docker run --rm ${{ env.CONTAINER_IMAGE }} bash -c "
echo '🔍 Running quarto check with enhanced raw output capture...'
echo '📋 Capturing full output for debugging...'
echo ''
# Set maximum verbosity for raw output
export QUARTO_LOG_LEVEL=DEBUG
export QUARTO_PRINT_STACK=true
echo '⏰ Quarto check started at:' \$(date '+%Y-%m-%d %H:%M:%S')
echo '--- RAW QUARTO CHECK OUTPUT START ---'
echo '┌─────────────────────────────────────────────────────────────────────────────┐'
# Capture and display raw output with error handling
if quarto check 2>&1 | while IFS= read -r line; do echo \"│ \$line\"; done; then
QUARTO_EXIT_CODE=0
else
QUARTO_EXIT_CODE=\$?
fi
echo '└─────────────────────────────────────────────────────────────────────────────┘'
echo '--- RAW QUARTO CHECK OUTPUT END ---'
echo '⏰ Quarto check completed at:' \$(date '+%Y-%m-%d %H:%M:%S')
echo ''
# Determine final status
if [ \$QUARTO_EXIT_CODE -eq 0 ]; then
echo ''
echo '✅ Quarto check: PASSED - All components verified!'
QUARTO_STATUS='PASSED'
else
echo ''
echo '❌ Quarto check: FAILED - Issues detected!'
echo '🔍 This indicates potential container configuration issues.'
echo '📋 Check the full output above for specific error details.'
QUARTO_STATUS='FAILED'
fi
echo ''
echo \"📊 QUARTO CHECK SUMMARY: \$QUARTO_STATUS\"
" | tee linux-quarto-check.log
echo "📋 Linux quarto check results saved to linux-quarto-check.log"
# Also extract just the raw quarto check output for easy viewing
if [ -f "linux-quarto-check.log" ]; then
echo "📄 Extracting raw Quarto output..."
grep -A 1000 "RAW QUARTO CHECK OUTPUT START" linux-quarto-check.log | \
grep -B 1000 "RAW QUARTO CHECK OUTPUT END" | \
sed '1d;$d' > linux-quarto-raw-output.txt || echo "Could not extract raw output"
if [ -f "linux-quarto-raw-output.txt" ]; then
echo "📄 Raw Quarto output extracted to linux-quarto-raw-output.txt"
echo "📊 Raw output preview (first 10 lines):"
head -10 linux-quarto-raw-output.txt | sed 's/^/ /'
fi
fi
}
}
- name: 📊 Linux Container Analysis
if: matrix.platform == 'linux' && inputs.test_linux != false
run: |
echo "LINUX CONTAINER TEST SUMMARY"
echo "✅ Container pulled successfully"
echo "✅ Essential tools validated with pass/fail status (Quarto, Python, R, LaTeX, Ghostscript, Inkscape)"
echo "✅ Quarto check completed"
echo "✅ Container size displayed"
echo "✅ Tool validation enforced - will FAIL if any essential tool missing"
echo "LINUX CONTAINER TESTS COMPLETE"
- name: 📤 Upload Test Artifacts
if: always() && matrix.platform == 'linux' && inputs.test_linux != false
uses: actions/upload-artifact@v6
with:
name: ${{ matrix.platform }}-container-test-results
path: |
linux-tool-versions.log
linux-quarto-check.log
linux-quarto-raw-output.txt
if-no-files-found: warn
# Final summary job
final-summary:
needs: container-health-check
runs-on: ubuntu-latest
if: always() # Always run to provide summary
steps:
- name: 🎯 Final Container Health Summary
run: |
echo "🎯 === FINAL CONTAINER HEALTH SUMMARY ==="
echo "=========================================="
echo ""
# Get container sizes
LINUX_IMAGE="${{ inputs.container_registry || 'ghcr.io' }}/${{ github.repository }}/quarto-linux:${{ inputs.container_tag || 'latest' }}"
LINUX_SIZE=$(docker images "$LINUX_IMAGE" --format "{{.Size}}" 2>/dev/null || echo "Not available")
echo "📦 CONTAINER SIZES:"
echo "------------------"
echo "🐧 Linux: $LINUX_SIZE"
echo ""
echo "🔍 TEST RESULTS:"
echo "---------------"
echo "✅ Container health checks completed"
echo "✅ All test artifacts uploaded"
echo ""
echo "🎯 FINAL STATUS:"
echo "---------------"
echo "🟢 CONTAINER TESTING: COMPLETED SUCCESSFULLY ✅"
echo "📋 Containers validated and ready for production use"
echo "📊 Detailed logs available in artifacts"
echo "========================================"