Merge branch 'dev' into feature/manage-external-images

This commit is contained in:
Vijay Janapa Reddi
2025-08-11 17:47:57 -04:00
22 changed files with 123 additions and 2528 deletions

View File

@@ -74,14 +74,14 @@ on:
description: "Whether build used cache (true/false)"
value: ${{ jobs.build.outputs.cache-hit }}
# DISABLED: Automatic triggers while working on containers
# schedule:
# - cron: '0 0 * * 0' # Weekly rebuild (Sunday at midnight)
# push:
# paths:
# - 'tools/dependencies/**'
# - 'docker/build-quarto-linux/**'
# - '.github/workflows/build-linux-container.yml'
# Re-enable automatic triggers
schedule:
- cron: '0 0 * * 0' # Weekly rebuild (Sunday at midnight)
push:
paths:
- 'tools/dependencies/**'
- 'docker/linux/**'
- '.github/workflows/build-linux-container.yml'
env:
# Container Registry Configuration (configurable via inputs)
@@ -91,7 +91,7 @@ env:
# Container Build Configuration
PLATFORM: linux/amd64
DOCKERFILE_PATH: ./docker/build-quarto-linux/Dockerfile
DOCKERFILE_PATH: ./docker/linux/Dockerfile
CONTEXT_PATH: .
jobs:

View File

@@ -73,14 +73,14 @@ on:
description: "Whether build used cache (true/false)"
value: ${{ jobs.build.outputs.cache-hit }}
# DISABLED: Automatic triggers while working on containers
# schedule:
# - cron: '0 2 * * 0' # Weekly rebuild (Sunday at 2am - after Linux container)
# push:
# paths:
# - 'tools/dependencies/**'
# - 'docker/build-quarto-windows/**'
# - '.github/workflows/build-windows-container.yml'
# Re-enable automatic triggers
schedule:
- cron: '0 2 * * 0' # Weekly rebuild (Sunday at 2am - after Linux container)
push:
paths:
- 'tools/dependencies/**'
- 'docker/windows/**'
- '.github/workflows/build-windows-container.yml'
env:
# Container Registry Configuration (configurable via inputs)
@@ -89,7 +89,7 @@ env:
CONTAINER_TAG: ${{ inputs.container_tag || 'latest' }}
# Container Build Configuration
DOCKERFILE_PATH: ./docker/build-quarto-windows/Dockerfile
DOCKERFILE_PATH: ./docker/windows/Dockerfile
CONTEXT_PATH: .
jobs:
@@ -100,10 +100,19 @@ jobs:
contents: read
packages: write
outputs:
build-status: ${{ steps.build.outputs.build-status }}
image-name: ${{ steps.build.outputs.image-name }}
image-digest: ${{ steps.build.outputs.image-digest }}
cache-hit: ${{ steps.build.outputs.cache-hit }}
steps:
- name: 📥 Checkout repository
uses: actions/checkout@v4
- name: 🛠️ Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: 🔐 Log in to Container Registry
uses: docker/login-action@v3
with:
@@ -111,27 +120,47 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: 🐳 Build Windows container
- name: 🏷️ Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=raw,value=${{ env.CONTAINER_TAG }}
- name: 🐳 Build and Push Windows container
id: build
uses: docker/build-push-action@v5
with:
context: ${{ env.CONTEXT_PATH }}
file: ${{ env.DOCKERFILE_PATH }}
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
no-cache: ${{ inputs.no_cache }}
- name: 📊 Build Summary
id: build-summary
if: always()
run: |
echo "🚀 Building Windows container..."
$useNoCache = "${{ inputs.no_cache }}" -eq "true"
if ($useNoCache) {
echo "📊 Cache mode: DISABLED (fresh build)"
docker build --no-cache -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.CONTAINER_TAG }} -f ${{ env.DOCKERFILE_PATH }} ${{ env.CONTEXT_PATH }}
} else {
echo "📊 Cache mode: ENABLED (faster build)"
docker build -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.CONTAINER_TAG }} -f ${{ env.DOCKERFILE_PATH }} ${{ env.CONTEXT_PATH }}
}
# Determine build status
if [ "${{ steps.build.outcome }}" = "success" ]; then
BUILD_STATUS="success"
else
BUILD_STATUS="failure"
fi
echo "✅ Local container build completed"
- name: 🐳 Push Windows container
run: |
echo "📦 Pushing container to registry..."
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.CONTAINER_TAG }}
echo "✅ Container push completed"
- name: Build Complete
run: |
echo "✅ Windows container build completed successfully!"
echo "📊 Container: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.CONTAINER_TAG }}"
# Extract build information
IMAGE_NAME="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.CONTAINER_TAG }}"
IMAGE_DIGEST="${{ steps.build.outputs.digest }}"
CACHE_HIT="${{ steps.build.outputs.cache-hit }}"
echo "build-status=$BUILD_STATUS" >> $GITHUB_OUTPUT
echo "image-name=$IMAGE_NAME" >> $GITHUB_OUTPUT
echo "image-digest=$IMAGE_DIGEST" >> $GITHUB_OUTPUT
echo "cache-hit=$CACHE_HIT" >> $GITHUB_OUTPUT
echo "📊 Build Status: $BUILD_STATUS"
echo "🐳 Image: $IMAGE_NAME"
echo "🔍 Digest: $IMAGE_DIGEST"
echo "💾 Cache Hit: $CACHE_HIT"

View File

@@ -84,62 +84,32 @@ jobs:
env:
CONTAINER_IMAGE: ${{ inputs.container_registry || 'ghcr.io' }}/${{ github.repository }}/${{ matrix.container_name }}:${{ inputs.container_tag || 'latest' }}
PLATFORM: ${{ matrix.platform }}
DOCKERFILE_PATH: ./docker/${{ matrix.platform }}/dockerfile
steps:
- name: 💾 Cache Docker Images
- name: 📥 Checkout repository
if: |
(matrix.platform == 'linux' && inputs.test_linux != false) ||
(matrix.platform == 'windows' && inputs.test_windows != false)
uses: actions/cache@v4
id: cache-docker-images
uses: actions/checkout@v4
with:
path: |
~/.docker
~/AppData/Local/Docker/wsl/data
key: docker-${{ matrix.platform }}-${{ env.CONTAINER_IMAGE }}-${{ hashFiles('docker/**') }}
restore-keys: |
docker-${{ matrix.platform }}-${{ env.CONTAINER_IMAGE }}-
docker-${{ matrix.platform }}-
fetch-depth: 0
- name: 🐳 Pull Linux Container
if: matrix.platform == 'linux' && inputs.test_linux != false
shell: bash
run: |
echo "🐳 Pulling Linux container..."
echo "📦 Image: ${{ env.CONTAINER_IMAGE }}"
echo "💾 Cache Status: ${{ steps.cache-docker-images.outputs.cache-hit == 'true' && '✅ HIT - faster pull expected' || '❌ MISS - full pull required' }}"
# Check if image already exists locally (from cache)
if docker image inspect ${{ env.CONTAINER_IMAGE }} >/dev/null 2>&1; then
echo "✅ Container image found locally - skipping pull"
else
echo "📥 Pulling container image..."
docker pull ${{ env.CONTAINER_IMAGE }}
echo "✅ Container pulled successfully"
fi
- name: 🔑 Log in to GitHub Container Registry
if: |
(matrix.platform == 'linux' && inputs.test_linux != false) ||
(matrix.platform == 'windows' && inputs.test_windows != false)
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: 🐳 Pull Windows Container
if: matrix.platform == 'windows' && inputs.test_windows != false
shell: pwsh
run: |
Write-Output "🐳 Pulling Windows container..."
Write-Output "📦 Image: ${{ env.CONTAINER_IMAGE }}"
$cacheHit = "${{ steps.cache-docker-images.outputs.cache-hit }}"
if ($cacheHit -eq 'true') {
Write-Output "💾 Cache Status: ✅ HIT - faster pull expected"
} else {
Write-Output "💾 Cache Status: ❌ MISS - full pull required"
}
# Check if image already exists locally (from cache)
$imageExists = docker image inspect ${{ env.CONTAINER_IMAGE }}
if ($LASTEXITCODE -eq 0) {
Write-Output "✅ Container image found locally - skipping pull"
} else {
Write-Output "📥 Pulling container image..."
docker pull ${{ env.CONTAINER_IMAGE }}
Write-Output "✅ Container pulled successfully"
}
- name: 🐳 Pull Docker Image
if: |
(matrix.platform == 'linux' && inputs.test_linux != false) ||
(matrix.platform == 'windows' && inputs.test_windows != false)
run: docker pull ${{ env.CONTAINER_IMAGE }}
- name: 📊 Container Information
if: |

View File

@@ -167,6 +167,7 @@ jobs:
env:
CONTAINER_IMAGE: ${{ format('{0}/{1}/quarto-{2}:{3}', inputs.container_registry || 'ghcr.io', github.repository, matrix.platform, inputs.container_tag || 'latest') }}
DOCKERFILE_PATH: ./docker/${{ matrix.platform }}/dockerfile
steps:
- name: 🛑 Skip build
@@ -179,41 +180,17 @@ jobs:
with:
fetch-depth: 0
- name: 💾 Cache Docker Images
- name: 🔑 Log in to GitHub Container Registry
if: matrix.enabled
uses: actions/cache@v4
id: cache-docker-images
uses: docker/login-action@v3
with:
path: |
${{ runner.os == 'Linux' && '~/.docker' || '' }}
${{ runner.os == 'Windows' && '~/AppData/Local/Docker/wsl/data' || '' }}
key: docker-${{ runner.os }}-${{ env.CONTAINER_IMAGE }}-${{ hashFiles('docker/**') }}
restore-keys: |
docker-${{ runner.os }}-${{ env.CONTAINER_IMAGE }}-
docker-${{ runner.os }}-
registry: ${{ env.CONTAINER_REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: 🐳 Pull Windows Container
if: matrix.platform == 'windows' && matrix.enabled
shell: pwsh
run: |
Write-Output "🐳 Pulling Windows container..."
Write-Output "📦 Image: ${{ env.CONTAINER_IMAGE }}"
$cacheHit = "${{ steps.cache-docker-images.outputs.cache-hit }}"
if ($cacheHit -eq 'true') {
Write-Output "💾 Cache Status: ✅ HIT - faster pull expected"
} else {
Write-Output "💾 Cache Status: ❌ MISS - full pull required"
}
# Check if image already exists locally (from cache)
$imageExists = docker image inspect ${{ env.CONTAINER_IMAGE }}
if ($LASTEXITCODE -eq 0) {
Write-Output "✅ Container image found locally - skipping pull"
} else {
Write-Output "📥 Pulling container image..."
docker pull ${{ env.CONTAINER_IMAGE }}
Write-Output "✅ Container pulled successfully"
}
- name: 🐳 Pull Docker Image
if: matrix.enabled
run: docker pull ${{ env.CONTAINER_IMAGE }}
- name: 🔨 Build ${{ matrix.format_name }} (Linux)
if: matrix.platform == 'linux' && matrix.enabled

View File

@@ -1,71 +0,0 @@
# Exclude unnecessary files from Docker build context
# This reduces build time and image size
# Build artifacts
build/
_book/
_site/
*.pdf
*.html
# Git and version control
.git/
.gitignore
# Documentation
docs/
*.md
!docker/quarto-linux-build/README.md
# IDE and editor files
.vscode/
.idea/
*.swp
*.swo
*~
# OS files
.DS_Store
Thumbs.db
# Logs and temporary files
*.log
*.tmp
*.temp
# Node modules (if any)
node_modules/
# Python cache
__pycache__/
*.pyc
*.pyo
# R cache
.Rhistory
.RData
# Large media files (not needed for build)
assets/media/
*.mp4
*.avi
*.mov
# Test files
test-*
*.test.*
# Backup files
*.bak
*.backup
# Large data files
data/
*.csv
*.json
*.xml
# Keep only essential files for build
# - tools/dependencies/ (needed for package installation)
# - book/ (needed for build testing)
# - .github/workflows/ (needed for workflow files)

View File

@@ -1,652 +0,0 @@
# MLSysBook Quarto Build Container
# Based on Ubuntu 22.04 with all dependencies pre-installed
# This container eliminates the 30-45 minute setup time for Linux builds
FROM ubuntu:22.04
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
ENV R_LIBS_USER=/usr/local/lib/R/library
ENV QUARTO_LOG_LEVEL=INFO
ENV PYTHONIOENCODING=utf-8
ENV LANG=en_US.UTF-8
ENV LC_ALL=en_US.UTF-8
ENV PATH=/usr/local/texlive/bin/x86_64-linux:$PATH
# === PHASE 0: COPY DEPENDENCY FILES EARLY (for better cache efficiency) ===
RUN echo "🚀 === STARTING DEPENDENCY FILE COPY ===" && \
echo "📋 Files to copy (early for cache efficiency):" && \
echo " - tools/dependencies/requirements/ → /tmp/requirements/" && \
echo " - tools/dependencies/requirements-build.txt → /tmp/requirements.txt" && \
echo " - tools/dependencies/install_packages.R → /tmp/install_packages.R" && \
echo " - tools/dependencies/tl_packages → /tmp/tl_packages" && \
echo " - docker/build-quarto-linux/verify_r_packages.R → /tmp/verify_r_packages.R" && \
echo "✅ Dependency file copy phase complete"
FROM build-base AS builder
# Copy dependency files
COPY tools/dependencies/requirements.txt /tmp/requirements.txt
COPY tools/dependencies/install_packages.R /tmp/install_packages.R
COPY tools/dependencies/tl_packages /tmp/tl_packages
# Install and configure locales
RUN echo "🚀 === STARTING LOCALE CONFIGURATION ===" && \
echo "🔍 Checking system readiness..." && \
if [ -f /etc/os-release ]; then \
echo "✅ OS release file found"; \
cat /etc/os-release | grep PRETTY_NAME; \
else \
echo "❌ OS release file not found"; \
exit 1; \
fi && \
echo "📦 Installing locales package..." && \
apt-get update && apt-get install -y locales && \
echo "📦 Locales package installed" && \
echo "🔧 Generating en_US.UTF-8 locale..." && \
locale-gen en_US.UTF-8 && \
echo "📄 Locale generated" && \
echo "🔧 Updating system locale..." && \
update-locale LANG=en_US.UTF-8 && \
echo "🔧 System locale updated" && \
echo "🧹 Cleaning package cache..." && \
rm -rf /var/lib/apt/lists/* && \
echo "✅ Locale configuration complete"
# === PHASE 1: LOCALE CONFIGURATION ===
RUN echo "🚀 === STARTING SYSTEM DEPENDENCIES INSTALLATION ===" && \
echo "⏰ Estimated time: 2-3 minutes" && \
echo "📊 Free disk space: $(df -h / | tail -1 | awk '{print $4}')" && \
start_time=$(date +%s) && \
\
echo "🔄 Updating package lists..." && \
apt-get update && \
\
echo "📦 Installing core system packages (25 packages)..." && \
echo "📋 Package list:" && \
echo " - fonts-dejavu" && \
echo " - fonts-freefont-ttf" && \
echo " - gdk-pixbuf2.0-bin" && \
echo " - libcairo2" && \
echo " - libfontconfig1" && \
echo " - libfontconfig1-dev" && \
echo " - libfreetype6" && \
echo " - libfreetype6-dev" && \
echo " - libpango-1.0-0" && \
echo " - libpangocairo-1.0-0" && \
echo " - libpangoft2-1.0-0" && \
echo " - libxml2-dev" && \
echo " - libcurl4-openssl-dev" && \
echo " - libjpeg-dev" && \
echo " - libtiff5-dev" && \
echo " - libpng-dev" && \
echo " - libharfbuzz-dev" && \
echo " - libfribidi-dev" && \
echo " - librsvg2-dev" && \
echo " - libgdal-dev" && \
echo " - libudunits2-dev" && \
echo " - wget" && \
echo " - curl" && \
echo " - git" && \
apt-get install -y \
fonts-dejavu \
fonts-freefont-ttf \
gdk-pixbuf2.0-bin \
libcairo2 \
libfontconfig1 \
libfontconfig1-dev \
libfreetype6 \
libfreetype6-dev \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libpangoft2-1.0-0 \
libxml2-dev \
libcurl4-openssl-dev \
libjpeg-dev \
libtiff5-dev \
libpng-dev \
libharfbuzz-dev \
libfribidi-dev \
librsvg2-dev \
libgdal-dev \
libudunits2-dev \
wget \
curl \
git && \
echo "📦 All system packages installed successfully" && \
\
echo "🔍 Verifying critical packages..." && \
if command -v wget >/dev/null 2>&1; then \
echo "📦 wget available"; \
else \
echo "❌ wget not found"; \
exit 1; \
fi && \
if command -v curl >/dev/null 2>&1; then \
echo "📦 curl available"; \
else \
echo "❌ curl not found"; \
exit 1; \
fi && \
if command -v git >/dev/null 2>&1; then \
echo "📦 git available"; \
else \
echo "❌ git not found"; \
exit 1; \
fi && \
\
echo "🧹 Cleaning package cache..." && \
rm -rf /var/lib/apt/lists/* && \
\
end_time=$(date +%s) && \
duration=$((end_time - start_time)) && \
echo "✅ === SYSTEM DEPENDENCIES COMPLETE === (${duration}s)" && \
echo "📊 Free disk space: $(df -h / | tail -1 | awk '{print $4}')"
# === PHASE 2: SYSTEM DEPENDENCIES ===
RUN echo "🚀 === STARTING INKSCAPE INSTALLATION ===" && \
echo "⏰ Estimated time: 1-2 minutes" && \
start_time=$(date +%s) && \
\
echo "🔄 Adding Inkscape PPA repository..." && \
apt-get update && \
echo "📦 Installing software-properties-common..." && \
apt-get install -y software-properties-common && \
echo "📦 software-properties-common installed" && \
echo "🔧 Adding Inkscape PPA..." && \
add-apt-repository ppa:inkscape.dev/stable -y && \
echo "📦 Inkscape PPA added" && \
\
echo "📦 Installing Inkscape..." && \
apt-get update && \
apt-get install -y inkscape && \
echo "📦 Inkscape installed" && \
\
echo "🧹 Cleaning package cache..." && \
rm -rf /var/lib/apt/lists/* && \
\
end_time=$(date +%s) && \
duration=$((end_time - start_time)) && \
echo "✅ === INKSCAPE INSTALLATION COMPLETE === (${duration}s)"
# Install font dependencies (note: fonts-freefont-ttf already installed above)
RUN echo "🚀 === STARTING FONT INSTALLATION ===" && \
echo "📦 Installing additional fonts..." && \
apt-get update && apt-get install -y \
fonts-liberation \
fontconfig && \
echo "📦 Fonts installed" && \
echo "🔧 Updating font cache..." && \
fc-cache -fv && \
echo "📄 Font cache updated" && \
echo "🧹 Cleaning package cache..." && \
rm -rf /var/lib/apt/lists/* && \
echo "✅ Font installation complete"
# Test Inkscape SVG to PDF conversion (same as your workflow)
RUN echo "🚀 === STARTING INKSCAPE TEST ===" && \
echo "📋 Creating test SVG file..." && \
echo '<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100"><circle cx="50" cy="50" r="40" fill="red"/></svg>' > test.svg && \
echo "📄 Test SVG created" && \
echo "🔄 Converting SVG to PDF..." && \
inkscape --export-type=pdf --export-filename=test.pdf test.svg && \
echo "📦 Conversion completed" && \
if [ -f test.pdf ]; then \
echo "✅ Inkscape SVG to PDF conversion successful!"; \
echo "📊 PDF file details:"; \
ls -lh test.pdf; \
else \
echo "❌ Inkscape SVG to PDF conversion failed."; \
exit 1; \
fi && \
echo "🧹 Cleaning up test files..." && \
rm -f test.svg test.pdf && \
echo "✅ Inkscape test complete"
# === PHASE 3: GHOSTSCRIPT INSTALLATION ===
RUN echo "🚀 === STARTING GHOSTSCRIPT INSTALLATION ===" && \
echo "⏰ Estimated time: 30 seconds" && \
start_time=$(date +%s) && \
\
echo "📦 Installing Ghostscript..." && \
apt-get update && apt-get install -y ghostscript && \
echo "📦 Ghostscript installed" && \
echo "📊 Ghostscript version:" && \
gs --version && \
\
echo "🧹 Cleaning package cache..." && \
rm -rf /var/lib/apt/lists/* && \
\
end_time=$(date +%s) && \
duration=$((end_time - start_time)) && \
echo "✅ === GHOSTSCRIPT INSTALLATION COMPLETE === (${duration}s)"
# === PHASE 4: TEX LIVE INSTALLATION ===
RUN echo "🚀 === STARTING TEX LIVE INSTALLATION ===" && \
echo "⏰ Estimated time: 8-12 minutes (largest phase)" && \
echo "📊 Free disk space before: $(df -h / | tail -1 | awk '{print $4}')" && \
start_time=$(date +%s) && \
\
echo "📦 Installing TeX Live prerequisites..." && \
echo "📋 Prerequisites:" && \
echo " - perl" && \
echo " - wget" && \
echo " - xzdec" && \
apt-get update && apt-get install -y \
perl \
wget \
xzdec && \
echo "📦 Prerequisites installed" && \
\
echo "🔍 Verifying prerequisites..." && \
if command -v perl >/dev/null 2>&1; then \
echo "📦 perl available"; \
else \
echo "❌ perl not found"; \
exit 1; \
fi && \
if command -v wget >/dev/null 2>&1; then \
echo "📦 wget available"; \
else \
echo "❌ wget not found"; \
exit 1; \
fi && \
if command -v xzdec >/dev/null 2>&1; then \
echo "📦 xzdec available"; \
else \
echo "❌ xzdec not found"; \
exit 1; \
fi && \
\
rm -rf /var/lib/apt/lists/* && \
\
echo "🔄 Downloading TeX Live installer (~4MB)..." && \
wget -O /tmp/install-tl-unx.tar.gz "https://mirror.ctan.org/systems/texlive/tlnet/install-tl-unx.tar.gz" && \
echo "📥 Download completed" && \
echo "📊 Downloaded file size:" && \
ls -lh /tmp/install-tl-unx.tar.gz && \
\
echo "📦 Extracting TeX Live installer..." && \
cd /tmp && tar -xzf install-tl-unx.tar.gz && \
echo "📦 Extraction completed" && \
echo "📊 Extracted files:" && \
ls -la /tmp/install-tl-* && \
\
echo "🔧 Creating TeX Live installation profile..." && \
echo "selected_scheme scheme-medium" > /tmp/texlive.profile && \
echo "tlpdbopt_install_docfiles 0" >> /tmp/texlive.profile && \
echo "tlpdbopt_install_srcfiles 0" >> /tmp/texlive.profile && \
echo "TEXDIR /usr/local/texlive" >> /tmp/texlive.profile && \
echo "TEXMFCONFIG /usr/local/texlive/texmf-config" >> /tmp/texlive.profile && \
echo "TEXMFHOME /usr/local/texlive/texmf-home" >> /tmp/texlive.profile && \
echo "TEXMFLOCAL /usr/local/texlive/texmf-local" >> /tmp/texlive.profile && \
echo "TEXMFSYSCONFIG /usr/local/texlive/texmf-config" >> /tmp/texlive.profile && \
echo "TEXMFSYSVAR /usr/local/texlive/texmf-var" >> /tmp/texlive.profile && \
echo "TEXMFVAR /usr/local/texlive/texmf-var" >> /tmp/texlive.profile && \
echo "📄 Profile created" && \
echo "📊 Profile contents:" && \
cat /tmp/texlive.profile && \
\
echo "🔄 Installing TeX Live base system..." && \
/tmp/install-tl-*/install-tl --profile=/tmp/texlive.profile && \
echo "📦 TeX Live base system installed" && \
\
echo "🔧 Setting up TeX Live PATH..." && \
echo 'export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH' >> /etc/bash.bashrc && \
export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH && \
echo "🔧 TeX Live PATH configured" && \
\
echo "📊 Analyzing tl_packages file..." && \
collection_count=$(grep -c '^collection-' /tmp/tl_packages) && \
echo "📦 Found $collection_count TeX Live collections to install" && \
\
echo "🔄 Installing TeX Live collections..." && \
export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH && \
echo "📍 Checking if tlmgr is available..." && \
if command -v tlmgr >/dev/null 2>&1; then \
echo "📦 tlmgr available"; \
tlmgr --version | head -1; \
else \
echo "❌ tlmgr not found or not working"; \
exit 1; \
fi && \
i=1 && \
while IFS= read -r collection; do \
case "$collection" in \
collection-*) \
echo "📦 [$i/$collection_count] Installing $collection..."; \
if command -v tlmgr >/dev/null 2>&1; then \
tlmgr install "$collection" || echo "⚠️ Failed to install $collection, continuing..."; \
else \
echo "⚠️ tlmgr not available, skipping $collection"; \
fi; \
i=$((i+1)); \
;; \
esac; \
done < /tmp/tl_packages && \
\
echo "🧹 Cleaning up TeX Live installer..." && \
rm -rf /tmp/install-tl-* /tmp/texlive.profile /tmp/install-tl-unx.tar.gz && \
\
end_time=$(date +%s) && \
duration=$((end_time - start_time)) && \
echo "✅ === TEX LIVE INSTALLATION COMPLETE === (${duration}s)" && \
echo "📊 Free disk space after: $(df -h / | tail -1 | awk '{print $4}')" && \
echo "📊 TeX Live disk usage: $(du -sh /usr/local/texlive 2>/dev/null || echo 'N/A')"
# Verify TeX Live installation (with error handling)
RUN echo "🔄 Verifying TeX Live installation..." && \
export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH && \
echo "📍 PATH: $PATH" && \
echo "📍 Checking TeX Live directory:" && \
ls -la /usr/local/texlive/ || echo "❌ TeX Live directory not found" && \
ls -la /usr/local/texlive/bin/ || echo "❌ TeX Live bin directory not found" && \
if [ -f /usr/local/texlive/bin/x86_64-linux/lualatex ]; then \
echo "✅ lualatex found"; \
/usr/local/texlive/bin/x86_64-linux/lualatex --version | head -1; \
else \
echo "❌ lualatex not found, checking for alternative locations"; \
find /usr/local/texlive -name "lualatex" -type f 2>/dev/null || echo "No lualatex found anywhere"; \
fi && \
echo "✅ TeX Live verification complete (allowing partial failures)"
# === PHASE 6: R INSTALLATION ===
RUN echo "🚀 === STARTING R INSTALLATION ===" && \
echo "⏰ Estimated time: 1-2 minutes" && \
start_time=$(date +%s) && \
\
echo "📦 Installing R and development packages..." && \
echo "📋 R packages:" && \
echo " - r-base" && \
echo " - r-base-dev" && \
echo " - r-recommended" && \
apt-get update && apt-get install -y \
r-base \
r-base-dev \
r-recommended && \
if [ $? -ne 0 ]; then \
echo "❌ Failed to install R packages"; \
exit 1; \
fi && \
echo "📦 R packages installed" && \
\
echo "🧹 Cleaning package cache..." && \
rm -rf /var/lib/apt/lists/* && \
\
echo "📊 R version: $(R --version | head -1)" && \
if [ $? -ne 0 ]; then \
echo "❌ Failed to verify R installation"; \
exit 1; \
fi && \
\
end_time=$(date +%s) && \
duration=$((end_time - start_time)) && \
echo "✅ === R INSTALLATION COMPLETE === (${duration}s)"
# === PHASE 7: PYTHON INSTALLATION ===
RUN echo "🚀 === STARTING PYTHON INSTALLATION ===" && \
echo "⏰ Estimated time: 1 minute" && \
start_time=$(date +%s) && \
\
echo "📦 Installing Python 3 and development packages..." && \
echo "📋 Python packages:" && \
echo " - python3" && \
echo " - python3-pip" && \
echo " - python3-dev" && \
apt-get update && apt-get install -y \
python3 \
python3-pip \
python3-dev && \
if [ $? -ne 0 ]; then \
echo "❌ Failed to install Python packages"; \
exit 1; \
fi && \
echo "📦 Python packages installed" && \
\
echo "🧹 Cleaning package cache..." && \
rm -rf /var/lib/apt/lists/* && \
\
echo "📊 Python version: $(python3 --version)" && \
if [ $? -ne 0 ]; then \
echo "❌ Failed to verify Python installation"; \
exit 1; \
fi && \
echo "📊 Pip version: $(pip3 --version)" && \
if [ $? -ne 0 ]; then \
echo "❌ Failed to verify pip installation"; \
exit 1; \
fi && \
\
end_time=$(date +%s) && \
duration=$((end_time - start_time)) && \
echo "✅ === PYTHON INSTALLATION COMPLETE === (${duration}s)"
# === PHASE 5: QUARTO INSTALLATION ===
RUN echo "🚀 === STARTING QUARTO INSTALLATION ===" && \
echo "⏰ Estimated time: 1 minute" && \
start_time=$(date +%s) && \
\
echo "📦 Downloading Quarto 1.7.31..." && \
wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.7.31/quarto-1.7.31-linux-amd64.deb && \
if [ $? -ne 0 ]; then \
echo "❌ Failed to download Quarto"; \
exit 1; \
fi && \
echo "📥 Download completed" && \
echo "📊 Downloaded file size:" && \
ls -lh quarto-1.7.31-linux-amd64.deb && \
\
echo "📦 Installing Quarto..." && \
dpkg -i quarto-1.7.31-linux-amd64.deb && \
if [ $? -ne 0 ]; then \
echo "❌ Failed to install Quarto"; \
exit 1; \
fi && \
echo "📦 Quarto installed" && \
\
echo "🧹 Cleaning up installer..." && \
rm quarto-1.7.31-linux-amd64.deb && \
echo "🧹 Installer cleaned up" && \
\
end_time=$(date +%s) && \
duration=$((end_time - start_time)) && \
echo "✅ === QUARTO INSTALLATION COMPLETE === (${duration}s)"
# Create R library directory
RUN echo "🚀 === STARTING R LIBRARY SETUP ===" && \
echo "📁 Creating R library directory..." && \
mkdir -p $R_LIBS_USER && \
echo "✅ R library directory created: $R_LIBS_USER" && \
echo "✅ R library setup complete"
# === PHASE 8: PYTHON PACKAGES ===
RUN echo "🚀 === STARTING PYTHON PACKAGE INSTALLATION ===" && \
echo "⏰ Estimated time: 1-2 minutes" && \
start_time=$(date +%s) && \
\
echo "🔄 Upgrading pip..." && \
pip3 install --upgrade pip && \
\
echo "📊 Analyzing requirements.txt..." && \
package_count=$(grep -v '^#' /tmp/requirements.txt | grep -v '^$' | wc -l) && \
echo "📦 Found $package_count Python packages to install" && \
\
echo "🔄 Installing Python packages with space optimization..." && \
pip3 install --no-cache-dir -r /tmp/requirements.txt && \
if [ $? -ne 0 ]; then \
echo "❌ Failed to install Python packages"; \
exit 1; \
fi && \
\
echo "🧹 Cleaning Python installation caches..." && \
pip3 cache purge && \
find /usr -name "*.pyc" -delete && \
find /usr -name "__pycache__" -type d -exec rm -rf {} + || true && \
\
echo "📊 Installed Python packages:" && \
pip3 list | head -10 && \
echo "📊 Total packages: $(pip3 list | wc -l)" && \
\
end_time=$(date +%s) && \
duration=$((end_time - start_time)) && \
echo "✅ === PYTHON PACKAGES COMPLETE === (${duration}s)"
# === PHASE 9: R PACKAGES ===
# Step 9.1: Set up R environment and install remotes
RUN echo "🚀 === STEP 9.1: SETTING UP R ENVIRONMENT ===" && \
R --slave -e " \
options(repos = c(CRAN = 'https://cran.rstudio.com')); \
cat('🔄 Setting up R environment...\n'); \
cat(paste('R library path:', Sys.getenv('R_LIBS_USER'), '\n')); \
lib_path <- Sys.getenv('R_LIBS_USER'); \
dir.create(lib_path, showWarnings = FALSE, recursive = TRUE); \
.libPaths(lib_path); \
cat('📦 Installing remotes package...\n'); \
install.packages('remotes'); \
cat('✅ R environment setup complete\n'); \
" && \
if [ $? -ne 0 ]; then \
echo "❌ Failed to set up R environment"; \
exit 1; \
fi && \
echo "✅ R environment setup successful"
# Step 9.2: Install R packages from install_packages.R or fallback
RUN echo "🚀 === STEP 9.2: INSTALLING R PACKAGES ===" && \
R --slave -e " \
options(repos = c(CRAN = 'https://cran.rstudio.com')); \
lib_path <- Sys.getenv('R_LIBS_USER'); \
.libPaths(lib_path); \
if (file.exists('/tmp/install_packages.R')) { \
cat('📦 Installing packages from tools/dependencies/install_packages.R...\n'); \
source('/tmp/install_packages.R'); \
} else { \
cat('⚠️ No tools/dependencies/install_packages.R found, installing common packages\n'); \
pkgs <- c('rmarkdown', 'knitr', 'tidyverse', 'ggplot2', 'bookdown'); \
cat(paste('📦 Installing packages:', paste(pkgs, collapse=', '), '\n')); \
install.packages(pkgs); \
}; \
cat('✅ R package installation complete\n'); \
" && \
if [ $? -ne 0 ]; then \
echo "❌ Failed to install R packages"; \
exit 1; \
fi && \
echo "✅ R packages installed successfully"
# Step 9.3: Verify R package installation
RUN echo "🚀 === STEP 9.3: VERIFYING R PACKAGES ===" && \
R --slave -e " \
lib_path <- Sys.getenv('R_LIBS_USER'); \
.libPaths(lib_path); \
cat('📊 Installed packages:\n'); \
ip <- installed.packages()[, 'Package']; \
print(head(ip, 10)); \
cat(paste('Total packages installed:', nrow(ip), '\n')); \
" && \
if [ $? -ne 0 ]; then \
echo "❌ Failed to verify R packages"; \
exit 1; \
fi && \
echo "✅ R package verification successful"
# === PHASE 10: R PACKAGE VERIFICATION ===
RUN echo "🔍 Verifying R package installation..." && \
Rscript /tmp/verify_r_packages.R && \
if [ $? -ne 0 ]; then \
echo "❌ R package verification failed"; \
exit 1; \
fi && \
echo "✅ R package verification successful"
# === PHASE 11: COMPREHENSIVE CLEANUP ===
RUN echo "🚀 === STARTING COMPREHENSIVE CLEANUP ===" && \
echo "📊 Disk space before cleanup: $(df -h / | tail -1 | awk '{print $4}')" && \
start_time=$(date +%s) && \
\
echo "🧹 Removing temporary files..." && \
rm -rf /tmp/* && \
rm -rf /var/tmp/* && \
\
echo "🧹 Cleaning package caches..." && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
rm -rf /var/cache/apt/* && \
\
echo "🧹 Cleaning Python caches..." && \
find /usr -name "*.pyc" -delete && \
find /usr -name "__pycache__" -type d -exec rm -rf {} + || true && \
pip3 cache purge || true && \
\
echo "🧹 Cleaning R temporary files..." && \
rm -rf /tmp/Rtmp* || true && \
rm -rf /var/lib/R/site-library/*/help || true && \
\
echo "🧹 Cleaning TeX Live caches and docs..." && \
rm -rf /usr/local/texlive/*/texmf-var/luatex-cache/* || true && \
rm -rf /usr/local/texlive/*/texmf-var/web2c/* || true && \
\
echo "🧹 Removing unnecessary system files..." && \
rm -rf /usr/share/doc/* && \
rm -rf /usr/share/man/* && \
rm -rf /usr/share/info/* && \
rm -rf /var/log/* && \
\
end_time=$(date +%s) && \
duration=$((end_time - start_time)) && \
echo "✅ === COMPREHENSIVE CLEANUP COMPLETE === (${duration}s)" && \
echo "📊 Final disk space: $(df -h / | tail -1 | awk '{print $4}')"
# Set working directory
WORKDIR /workspace
# Verify installations
RUN echo "🚀 === STARTING FINAL VERIFICATION ===" && \
export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH && \
echo "📊 Checking Quarto..." && \
if command -v quarto >/dev/null 2>&1; then \
quarto --version && \
echo "✅ Quarto verified"; \
else \
echo "❌ Quarto not found in PATH"; \
echo "📍 Checking for quarto in common locations:"; \
find /usr -name "quarto" -type f 2>/dev/null || echo "No quarto found"; \
exit 1; \
fi && \
echo "📊 Checking Python..." && \
if command -v python3 >/dev/null 2>&1; then \
python3 --version && \
echo "✅ Python verified"; \
else \
echo "❌ Python3 not found"; \
exit 1; \
fi && \
echo "📊 Checking R..." && \
if command -v R >/dev/null 2>&1; then \
R --version && \
echo "✅ R verified"; \
else \
echo "❌ R not found"; \
exit 1; \
fi && \
echo "📊 Checking LaTeX..." && \
if command -v lualatex >/dev/null 2>&1; then \
lualatex --version && \
echo "✅ LaTeX verified"; \
else \
echo "❌ lualatex not found"; \
echo "📍 Checking for lualatex in TeX Live:"; \
find /usr/local/texlive -name "lualatex" -type f 2>/dev/null || echo "No lualatex found"; \
exit 1; \
fi && \
echo "✅ Final verification complete"
# Health check
RUN export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH && \
echo "✅ Container build completed successfully" && \
echo "📊 Quarto version: $(quarto --version)" && \
echo "📊 Python version: $(python3 --version)" && \
echo "📊 R version: $(R --version | head -1)" && \
echo "📊 TeX Live: $(lualatex --version | head -1)"

View File

@@ -1,109 +0,0 @@
# Quarto Build Container
This directory contains the Docker container configuration for the MLSysBook build system.
## Purpose
The container pre-installs all dependencies to eliminate the 30-45 minute setup time for Linux builds, reducing build times from 45 minutes to 5-10 minutes.
## Structure
```
docker/quarto-build/
├── Dockerfile # Container definition
├── README.md # This file
└── .dockerignore # Files to exclude from build
```
## Container Contents
- **Base**: Ubuntu 22.04
- **TeX Live**: Full distribution (texlive-full)
- **R**: R-base with all required packages
- **Python**: Python 3.13 with all requirements
- **Quarto**: Version 1.7.31
- **Tools**: Inkscape, Ghostscript, fonts
- **Dependencies**: All from `tools/dependencies/`
## Build Process
The container is built and tested via GitHub Actions:
```bash
# Trigger container build
gh workflow run build-container.yml
```
## Usage
The container is used in the containerized build workflow:
```yaml
container:
image: ghcr.io/harvard-edge/cs249r_book/quarto-build:latest
options: --user root
```
## Testing
The container build includes 17 comprehensive tests:
1. Quarto functionality
2. Python packages (all from requirements.txt)
3. R packages (all from install_packages.R)
4. TeX Live and LaTeX engines
5. Inkscape SVG to PDF conversion
6. Ghostscript PDF compression
7. Fonts and graphics libraries
8. Quarto render test
9. TikZ compilation test
10. System resources check
11. Network connectivity
12. Book structure compatibility
13. Quarto configuration files
14. Dependencies files accessibility
15. Quarto check (same as workflow)
16. Actual build process simulation
17. Memory and disk space verification
## Registry
- **Registry**: GitHub Container Registry (ghcr.io)
- **Image**: `ghcr.io/harvard-edge/cs249r_book/quarto-build`
- **Tags**: `latest`, `main`, `dev`, branch-specific tags
- **Size**: ~2-3GB (includes TeX Live, R, Python packages)
## Performance
The container reduces build times significantly:
- **Traditional Linux build**: 45 minutes (including dependency installation)
- **Containerized build**: 5-10 minutes (dependencies pre-installed)
- **Container size**: ~2-3GB (optimized with multi-layer cleanup)
- **Build phases**: 11 optimized phases with progress tracking
## Recent Improvements (2025)
- Fixed dependency path issues after repository restructuring
- Improved error handling and progress tracking
- Optimized TeX Live package installation
- Enhanced cleanup procedures for smaller image size
- Added comprehensive testing (17 test scenarios)
- Fixed PATH environment variables for all tools
## Build Phases
1. **System Dependencies** - Core Ubuntu packages and libraries
2. **Inkscape Installation** - SVG to PDF conversion capability
3. **Quarto Installation** - Latest Quarto CLI (v1.7.31)
4. **TeX Live Installation** - Complete LaTeX distribution
5. **Ghostscript Installation** - PDF processing capabilities
6. **R Installation** - R base and development packages
7. **Python Installation** - Python 3 with pip
8. **Python Packages** - All production requirements
9. **R Packages** - All required R libraries
10. **R Package Verification** - Validation of successful installation
11. **Comprehensive Cleanup** - Size optimization and cache clearing
- **Traditional build**: 45 minutes
- **Containerized build**: 5-10 minutes
- **Improvement**: 80-90% time reduction

View File

@@ -1,13 +0,0 @@
#!/usr/bin/env Rscript
# Verify R package installation
source('/tmp/install_packages.R')
missing_packages <- required_packages[!sapply(required_packages, requireNamespace, quietly = TRUE)]
if(length(missing_packages) > 0) {
cat('❌ Missing packages:', paste(missing_packages, collapse = ', '), '\n')
quit(status = 1)
} else {
cat('✅ All required R packages installed successfully\n')
}

View File

@@ -1,71 +0,0 @@
# Exclude unnecessary files from Docker build context
# This reduces build time and image size
# Build artifacts
build/
_book/
_site/
*.pdf
*.html
# Git and version control
.git/
.gitignore
# Documentation
docs/
*.md
!docker/quarto-build-windows/README.md
# IDE and editor files
.vscode/
.idea/
*.swp
*.swo
*~
# OS files
.DS_Store
Thumbs.db
# Logs and temporary files
*.log
*.tmp
*.temp
# Node modules (if any)
node_modules/
# Python cache
__pycache__/
*.pyc
*.pyo
# R cache
.Rhistory
.RData
# Large media files (not needed for build)
assets/media/
*.mp4
*.avi
*.mov
# Test files
test-*
*.test.*
# Backup files
*.bak
*.backup
# Large data files
data/
*.csv
*.json
*.xml
# Keep only essential files for build
# - tools/dependencies/ (needed for package installation)
# - book/ (needed for build testing)
# - .github/workflows/ (needed for workflow files)

View File

@@ -1,369 +0,0 @@
# escape=`
# MLSysBook Windows Quarto Build Container (Windows Server 2022)
# - PowerShell 7 via ZIP (no MSI)
# - Quarto 1.7.31 via ZIP (no MSI)
# - Python 3.13.1 + requirements
# - Ghostscript + Inkscape (Chocolatey)
# - TeX Live pinned to 2025 snapshot + packages from tl_packages
# - R 4.3.2 + packages via install_packages.R
# - Verifications: versions, kpsewhich font files, TikZ smoke test
FROM mcr.microsoft.com/windows/server:ltsc2022
# Use Windows PowerShell initially
SHELL ["powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass", "-Command"]
# ------------------------------------------------------------
# PHASE 0: Base dirs and env (same as quarto-build workflow)
# ------------------------------------------------------------
ENV R_LIBS_USER="C:/r-lib"
ENV QUARTO_LOG_LEVEL="INFO"
ENV PYTHONIOENCODING="utf-8"
ENV LANG="en_US.UTF-8"
ENV LC_ALL="en_US.UTF-8"
RUN Write-Host '=== STARTING BASE SETUP ===' ; `
Write-Host 'Creating base directories...' ; `
New-Item -ItemType Directory -Force -Path 'C:\temp' | Out-Null ; `
Write-Host '📁 Created C:\temp' ; `
New-Item -ItemType Directory -Force -Path 'C:\r-lib' | Out-Null ; `
Write-Host '📁 Created C:\r-lib' ; `
Write-Host 'Environment variables set:' ; `
Write-Host " R_LIBS_USER: $env:R_LIBS_USER" ; `
Write-Host " QUARTO_LOG_LEVEL: $env:QUARTO_LOG_LEVEL" ; `
Write-Host " PYTHONIOENCODING: $env:PYTHONIOENCODING" ; `
Write-Host " LANG: $env:LANG" ; `
Write-Host " LC_ALL: $env:LC_ALL" ; `
Write-Host '✅ Base setup complete'
# ------------------------------------------------------------
# PHASE 1: PowerShell 7 (ZIP install, container-safe)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING POWERSHELL 7 INSTALLATION ===' ; `
Write-Host 'Using ZIP install for container compatibility' ; `
Write-Host 'Download URL: https://github.com/PowerShell/PowerShell/releases/download/v7.4.1/PowerShell-7.4.1-win-x64.zip' ; `
$Url = 'https://github.com/PowerShell/PowerShell/releases/download/v7.4.1/PowerShell-7.4.1-win-x64.zip' ; `
$Zip = 'C:\PowerShell-7.4.1.zip' ; `
Write-Host "Downloading PowerShell 7 to: $Zip" ; `
Invoke-WebRequest -Uri $Url -OutFile $Zip -UseBasicParsing ; `
Write-Host '📥 Download completed' ; `
Write-Host 'Creating PowerShell directory...' ; `
New-Item -ItemType Directory -Force -Path 'C:\Program Files\PowerShell\7' | Out-Null ; `
Write-Host '📁 Directory created' ; `
Write-Host 'Extracting ZIP file...' ; `
Expand-Archive -Path $Zip -DestinationPath 'C:\Program Files\PowerShell\7' -Force ; `
Write-Host '📦 Extraction completed' ; `
Write-Host 'Cleaning up ZIP file...' ; `
Remove-Item $Zip -Force ; `
Write-Host '🧹 Cleanup completed' ; `
Write-Host 'Adding PowerShell to PATH...' ; `
$mach = [Environment]::GetEnvironmentVariable('PATH','Machine') ; `
Write-Host "Current PATH: $mach" ; `
if ($mach -notmatch [regex]::Escape('C:\Program Files\PowerShell\7')) { `
[Environment]::SetEnvironmentVariable('PATH', ('C:\Program Files\PowerShell\7;' + $mach), 'Machine') ; `
Write-Host '🔗 PowerShell added to PATH' ; `
} else { `
Write-Host '⚠️ PowerShell already in PATH' ; `
} ; `
Write-Host 'Verifying PowerShell installation...' ; `
& 'C:\Program Files\PowerShell\7\pwsh.exe' -NoLogo -Command '$PSVersionTable.PSVersion ; Write-Host ''PowerShell 7 installation verified ✅'''
# Switch to PowerShell 7 for subsequent layers
SHELL ["C:\\Program Files\\PowerShell\\7\\pwsh.exe", "-NoLogo", "-ExecutionPolicy", "Bypass", "-Command"]
# ------------------------------------------------------------
# PHASE 2: Chocolatey (package manager for Windows)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING CHOCOLATEY INSTALLATION ===' ; `
Write-Host 'Installing Chocolatey package manager...' ; `
Write-Host 'Setting TLS 1.2 for download...' ; `
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 ; `
Write-Host '🔒 TLS 1.2 enabled' ; `
Write-Host 'Downloading and executing Chocolatey install script...' ; `
iex ((New-Object Net.WebClient).DownloadString('https://chocolatey.org/install.ps1')) ; `
Write-Host '📦 Chocolatey install script executed' ; `
Write-Host 'Verifying Chocolatey installation...' ; `
choco --version ; `
Write-Host '✅ Chocolatey installation complete'
# ------------------------------------------------------------
# PHASE 3: Copy dependency files (same as quarto-build workflow)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING DEPENDENCY FILE COPY ==='
COPY tools/dependencies/requirements.txt C:/temp/requirements.txt
COPY tools/dependencies/install_packages.R C:/temp/install_packages.R
COPY tools/dependencies/tl_packages C:/temp/tl_packages
RUN Write-Host '✅ Dependency file copy complete'
# ------------------------------------------------------------
# PHASE 4: Install TeX Live FIRST (Most complex, fail fast)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING TEX LIVE INSTALLATION (2025) ===' ; `
Write-Host '📦 Installing TeX Live via Chocolatey...' ; `
choco install texlive -y ; `
Write-Host '✅ TeX Live installed via Chocolatey' ; `
`
Write-Host '🔍 Finding TeX Live installation directory...' ; `
$texRoot = Join-Path $env:SystemDrive 'texlive' ; `
Write-Host "📁 TeX Live root: $texRoot" ; `
`
Write-Host '🔍 Looking for year-based directories...' ; `
$texYearDir = Get-ChildItem $texRoot -Directory | `
Where-Object { $_.Name -match '^\d{4}$' } | `
Sort-Object Name -Descending | `
Select-Object -First 1 ; `
Write-Host "📁 Found year directory: $($texYearDir.FullName)" ; `
`
$texLiveBin = Join-Path $texYearDir.FullName 'bin\windows' ; `
Write-Host "📁 TeX Live bin directory: $texLiveBin" ; `
`
Write-Host '🔧 Adding TeX Live to PATH...' ; `
$env:PATH = "$texLiveBin;$env:PATH" ; `
Write-Host "✅ PATH updated with: $texLiveBin" ; `
`
Write-Host '📋 Reading collections from tl_packages...' ; `
if (Test-Path 'C:\temp\tl_packages') { `
$collections = Get-Content 'C:\temp\tl_packages' | `
Where-Object { $_.Trim() -ne '' -and -not $_.Trim().StartsWith('#') } ; `
Write-Host "📦 Found $($collections.Count) collections to install" ; `
Write-Host '📋 Collections:' ; `
$collections | ForEach-Object { Write-Host " - $_" } ; `
`
Write-Host '🔄 Installing collections...' ; `
$i = 1 ; `
foreach ($collection in $collections) { `
Write-Host "📦 [$i/$($collections.Count)] Installing $collection..." ; `
& "$texLiveBin\tlmgr.bat" install $collection ; `
if ($LASTEXITCODE -eq 0) { `
Write-Host "$collection installed successfully" ; `
} else { `
Write-Host "⚠️ Failed to install $collection, continuing..." ; `
} ; `
$i++ ; `
} ; `
Write-Host '✅ Collection installation complete' ; `
} else { `
Write-Host '⚠️ No tl_packages file found, skipping collection installation' ; `
} ; `
`
Write-Host '🔄 Updating tlmgr...' ; `
& "$texLiveBin\tlmgr.bat" update --self --all ; `
Write-Host '✅ tlmgr updated' ; `
`
Write-Host '🔍 Verifying lualatex installation...' ; `
& "$texLiveBin\lualatex.exe" --version ; `
Write-Host '✅ TeX Live installation verified'
# ------------------------------------------------------------
# PHASE 5: Install Scoop (Package manager setup)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING SCOOP INSTALLATION ===' ; `
Write-Host 'Setting UTF-8 encoding...' ; `
[Console]::OutputEncoding = [System.Text.Encoding]::UTF8 ; `
$OutputEncoding = [System.Text.Encoding]::UTF8 ; `
Write-Host '🔤 UTF-8 encoding set' ; `
Write-Host 'Setting execution policy...' ; `
Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser -Force ; `
Write-Host '🔐 Execution policy set' ; `
Write-Host 'Installing Scoop package manager...' ; `
Invoke-WebRequest -useb get.scoop.sh -outfile 'install.ps1' ; `
Write-Host '📥 Scoop install script downloaded' ; `
& .\install.ps1 -RunAsAdmin ; `
Write-Host '📦 Scoop installed' ; `
Write-Host 'Adding Scoop shims to PATH...' ; `
$scoopShims = Join-Path (Resolve-Path ~).Path 'scoop\shims' ; `
Write-Host "Scoop shims path: $scoopShims" ; `
$mach = [Environment]::GetEnvironmentVariable('PATH','Machine') ; `
[Environment]::SetEnvironmentVariable('PATH', ($scoopShims + ';' + $mach), 'Machine') ; `
Write-Host '🔗 Added Scoop shims to PATH' ; `
Write-Host 'Installing Git (required for buckets)...' ; `
scoop install git ; `
Write-Host '📦 Git installed' ; `
Write-Host 'Adding r-bucket...' ; `
scoop bucket add r-bucket https://github.com/cderv/r-bucket.git ; `
Write-Host '📦 r-bucket added' ; `
Write-Host 'Adding extras bucket...' ; `
scoop bucket add extras ; `
Write-Host '📦 extras bucket added' ; `
Write-Host '✅ Scoop installation completed!'
# ------------------------------------------------------------
# PHASE 6: Install Quarto (Main tool)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING QUARTO INSTALLATION ===' ; `
Write-Host 'Installing Quarto via Scoop...' ; `
scoop install quarto ; `
Write-Host '📦 Quarto installed' ; `
Write-Host 'Verifying Quarto installation...' ; `
quarto --version ; `
Write-Host '✅ Quarto installation completed!'
# ------------------------------------------------------------
# PHASE 7: Install Python (Medium complexity)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING PYTHON INSTALLATION ===' ; `
Write-Host 'Installing Python via Scoop (same as quarto-build workflow)...' ; `
Write-Host 'Installing Python from main bucket...' ; `
scoop install main/python ; `
Write-Host '📦 Python installed' ; `
Write-Host 'Verifying Python installation...' ; `
python --version ; `
Write-Host '✅ Python installation complete'
# ------------------------------------------------------------
# PHASE 8: Install Python packages (Medium complexity)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING PYTHON PACKAGE INSTALLATION ===' ; `
Write-Host 'Installing Python packages from requirements.txt (same as quarto-build workflow)...' ; `
Write-Host 'Upgrading pip...' ; `
python -m pip install --upgrade pip ; `
Write-Host '📦 pip upgraded' ; `
Write-Host 'Installing packages from requirements.txt...' ; `
Write-Host 'Requirements file contents:' ; `
Get-Content C:/temp/requirements.txt | Write-Host ; `
python -m pip install -r C:/temp/requirements.txt ; `
Write-Host '✅ Python package installation complete'
# ------------------------------------------------------------
# PHASE 9: Install Visual C++ Redistributable (Required for Quarto DLLs)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING VISUAL C++ REDISTRIBUTABLE INSTALLATION ===' ; `
Write-Host 'Installing Microsoft Visual C++ Redistributable...' ; `
Write-Host 'This is required for Quarto DLL dependencies on Windows' ; `
choco install vcredist-all -y ; `
Write-Host '📦 Visual C++ Redistributable installed' ; `
Write-Host '✅ Visual C++ Redistributable installation complete'
# ------------------------------------------------------------
# PHASE 10: Install Ghostscript (required for PDF generation)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING GHOSTSCRIPT INSTALLATION ===' ; `
Write-Host 'Installing Ghostscript via Scoop...' ; `
scoop install main/ghostscript ; `
Write-Host '📦 Ghostscript installed' ; `
Write-Host 'Verifying Ghostscript installation...' ; `
gs --version ; `
Write-Host '✅ Ghostscript installation complete'
# ------------------------------------------------------------
# PHASE 11: Install Inkscape (required for SVG processing)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING INKSCAPE INSTALLATION ===' ; `
Write-Host 'Installing Inkscape via Scoop...' ; `
scoop install inkscape ; `
Write-Host '📦 Inkscape installed' ; `
Write-Host 'Verifying Inkscape installation...' ; `
inkscape --version ; `
Write-Host '✅ Inkscape installation complete'
# ------------------------------------------------------------
# PHASE 12: Install R (Medium complexity)
# ------------------------------------------------------------
RUN Write-Host '=== STARTING R INSTALLATION ===' ; `
Write-Host 'Installing R via Scoop (same as quarto-build workflow)...' ; `
Write-Host 'Installing R from main bucket...' ; `
scoop install main/r ; `
Write-Host '📦 R installed' ; `
Write-Host 'Verifying R installation...' ; `
R --version ; `
Write-Host '✅ R installation complete'
# ------------------------------------------------------------
# PHASE 13: Install R packages (Medium complexity)
# ------------------------------------------------------------
RUN Write-Host '=== INSTALLING R PACKAGES ===' ; `
Write-Host 'Installing R packages from install_packages.R (same as quarto-build workflow)...' ; `
Write-Host 'Setting up R environment...' ; `
Write-Host "R_LIBS_USER: $env:R_LIBS_USER" ; `
Write-Host 'Installing R packages...' ; `
Rscript -e 'options(repos=c(CRAN=\"https://cran.rstudio.com\"))' ; `
Rscript -e 'dir.create(Sys.getenv(\"R_LIBS_USER\"), recursive=TRUE, showWarnings=FALSE)' ; `
Rscript -e '.libPaths(Sys.getenv(\"R_LIBS_USER\"))' ; `
Rscript -e 'install.packages(\"remotes\")' ; `
if (Test-Path 'C:/temp/install_packages.R') { `
Write-Host 'Found install_packages.R, sourcing it...' ; `
Rscript 'C:/temp/install_packages.R' ; `
} else { `
Write-Host 'No install_packages.R found, installing basic packages...' ; `
Rscript -e 'install.packages(c(\"rmarkdown\",\"knitr\",\"ggplot2\"))' ; `
} ; `
Rscript -e 'for (p in c(\"rmarkdown\",\"knitr\")) if (!require(p, character.only=TRUE, quietly=TRUE)) stop(\"missing: \", p)' ; `
Write-Host '📦 R packages installed' ; `
Write-Host 'Verifying R packages...' ; `
Rscript C:/temp/verify_r_packages.R ; `
Write-Host '✅ R package installation complete'
# ------------------------------------------------------------
# PHASE 14: Cleanup and Environment Setup
# ------------------------------------------------------------
RUN Write-Host '=== STARTING CLEANUP AND ENVIRONMENT SETUP ===' ; `
Write-Host 'Cleaning temporary files and setting up environment...' ; `
Write-Host 'Removing temporary files...' ; `
Remove-Item C:/temp/requirements.txt -ErrorAction SilentlyContinue ; `
Write-Host '🗑️ requirements.txt removed' ; `
Remove-Item C:/temp/install_packages.R -ErrorAction SilentlyContinue ; `
Write-Host '🗑️ install_packages.R removed' ; `
Remove-Item C:/temp/verify_r_packages.R -ErrorAction SilentlyContinue ; `
Write-Host '🗑️ verify_r_packages.R removed' ; `
Remove-Item C:/temp/tl_packages -ErrorAction SilentlyContinue ; `
Write-Host '🗑️ tl_packages removed' ; `
Remove-Item C:/temp/requirements/ -Recurse -Force -ErrorAction SilentlyContinue ; `
Write-Host '🗑️ requirements/ directory removed' ; `
Write-Host 'Setting up environment variables for Quarto...' ; `
$env:QUARTO_LOG_LEVEL = 'DEBUG' ; `
[Environment]::SetEnvironmentVariable('QUARTO_LOG_LEVEL', 'DEBUG', 'Machine') ; `
Write-Host '🔧 QUARTO_LOG_LEVEL set to DEBUG' ; `
Write-Host '✅ Cleanup and environment setup complete'
# ------------------------------------------------------------
# FINAL CHECKS: Comprehensive verification with diagnostics
# ------------------------------------------------------------
WORKDIR C:/workspace
RUN Write-Host '=== FINAL VERIFICATION WITH ENHANCED DIAGNOSTICS ===' ; `
Write-Host 'Verifying all installations with comprehensive checks...' ; `
Write-Host '' ; `
Write-Host '🔍 SYSTEM DIAGNOSTICS:' ; `
Write-Host '----------------------' ; `
Write-Host 'PATH environment variable:' ; `
Write-Host $env:PATH ; `
Write-Host '' ; `
Write-Host 'Visual C++ Redistributable check:' ; `
Get-ChildItem 'C:\Windows\System32' -Filter 'msvcp*.dll' | Select-Object Name, Length, LastWriteTime ; `
Write-Host '' ; `
Write-Host '📊 TOOL VERIFICATION:' ; `
Write-Host '---------------------' ; `
Write-Host 'Checking Quarto...' ; `
try { `
quarto --version ; `
Write-Host '✅ Quarto version check: PASSED' ; `
Write-Host 'Running Quarto check for comprehensive validation...' ; `
& quarto check 2>&1 | Write-Host ; `
if ($LASTEXITCODE -eq 0) { `
Write-Host '✅ Quarto check: PASSED' ; `
} else { `
Write-Host '⚠️ Quarto check: ISSUES DETECTED' ; `
Write-Host "Exit code: $LASTEXITCODE" ; `
} ; `
} catch { `
Write-Host '❌ Quarto verification failed:' ; `
Write-Host $_.Exception.Message ; `
} ; `
Write-Host 'Checking Python...' ; `
python --version ; `
Write-Host '✅ Python verified' ; `
Write-Host 'Checking R...' ; `
R --version ; `
Write-Host '✅ R verified' ; `
Write-Host 'Checking LaTeX...' ; `
lualatex --version ; `
Write-Host '✅ LaTeX verified' ; `
Write-Host 'Checking Ghostscript...' ; `
gs --version ; `
Write-Host '✅ Ghostscript verified' ; `
Write-Host 'Checking Inkscape...' ; `
inkscape --version ; `
Write-Host '✅ Inkscape verified' ; `
Write-Host '' ; `
Write-Host '🎯 FINAL STATUS:' ; `
Write-Host '----------------' ; `
Write-Host '✅ Windows container build completed with enhanced diagnostics'

View File

@@ -1,227 +0,0 @@
# MINIMAL WINDOWS DOCKERFILE - GHOSTSCRIPT ONLY
# Focus on fixing Ghostscript download/install, then add other components back
# Try Windows Server with more services (closer to GitHub Actions environment)
FROM mcr.microsoft.com/windows/server:ltsc2022
# === PHASE 1: POWERSHELL 7 INSTALLATION (Required for our scripts) ===
RUN curl -L -o PowerShell.msi https://github.com/PowerShell/PowerShell/releases/download/v7.4.1/PowerShell-7.4.1-win-x64.msi && \
if not exist PowerShell.msi (echo ❌ PowerShell download failed - aborting build && exit 1) && \
msiexec /i PowerShell.msi /quiet /norestart && \
if %ERRORLEVEL% neq 0 (echo ❌ PowerShell installation failed - aborting build && exit 1) && \
del PowerShell.msi
# Add PowerShell to PATH and test
RUN setx PATH "%PATH%;C:\Program Files\PowerShell\7" /M && \
"C:\Program Files\PowerShell\7\pwsh.exe" -Command "Write-Host 'PowerShell 7 installation verified'"
# === MINIMAL SETUP ===
RUN mkdir C:\temp
# === CHOCOLATEY INSTALLATION (for alternative Ghostscript method) ===
RUN "C:\Program Files\PowerShell\7\pwsh.exe" -Command " \
Write-Host '🚀 === INSTALLING CHOCOLATEY ==='; \
try { \
Set-ExecutionPolicy Bypass -Scope Process -Force; \
[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; \
iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1')); \
Write-Host '✅ Chocolatey installation complete'; \
# Test chocolatey installation \
$chocoVersion = & choco --version 2>&1; \
if ($LASTEXITCODE -ne 0) { throw 'Chocolatey test failed' }; \
Write-Host ('📊 Chocolatey version: ' + $chocoVersion); \
} catch { \
Write-Host ('❌ Chocolatey installation failed: ' + $_.Exception.Message); \
Write-Host '⚠️ Will skip Chocolatey method in Ghostscript testing'; \
} \
"
# === GHOSTSCRIPT TESTING - MULTIPLE STRATEGIES ===
RUN "C:\Program Files\PowerShell\7\pwsh.exe" -Command " \
Write-Host '🚀 === GHOSTSCRIPT DOWNLOAD TESTING ==='; \
Write-Host '⏰ Testing multiple download methods...'; \
$startTime = Get-Date; \
\
# Test URLs first \
$gsUrls = @( \
'https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs10051/gs10051w64.exe', \
'https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/latest/download/gs10051w64.exe', \
'https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs10051/gs10051w32.exe' \
); \
\
Write-Host '📊 Testing URL accessibility...'; \
foreach ($url in $gsUrls) { \
Write-Host ('🔍 Testing: ' + $url); \
try { \
$response = Invoke-WebRequest -Uri $url -Method Head -TimeoutSec 30 -ErrorAction Stop; \
Write-Host ('✅ URL accessible - Status: ' + $response.StatusCode + ' Size: ' + $response.Headers['Content-Length']); \
} catch { \
Write-Host ('❌ URL test failed: ' + $_.Exception.Message); \
} \
} \
\
Write-Host '📦 === DOWNLOAD METHOD 1: Chocolatey (should work with full Windows image) ==='; \
try { \
Write-Host '🔄 Testing Chocolatey availability...'; \
$chocoTest = & choco --version 2>&1; \
if ($LASTEXITCODE -eq 0) { \
Write-Host '✅ Chocolatey available, trying Ghostscript installation...'; \
Write-Host '📦 Installing Ghostscript via simple Chocolatey command (matching quarto-build.yml)...'; \
# Use simple approach like working quarto-build.yml \
choco install ghostscript -y; \
if ($LASTEXITCODE -eq 0) { \
Write-Host '✅ Chocolatey Ghostscript installation SUCCESS!'; \
$downloadSuccess = $true; \
} else { \
Write-Host ('❌ Chocolatey installation failed with exit code: ' + $LASTEXITCODE); \
} \
} else { \
Write-Host '⚠️ Chocolatey not available, trying direct download methods...'; \
} \
} catch { \
Write-Host ('❌ Chocolatey method failed: ' + $_.Exception.Message); \
Write-Host '⚠️ Will try direct download methods as fallback...'; \
} \
\
if (-not $downloadSuccess) { \
Write-Host '📦 === DOWNLOAD METHOD 2: WebClient (fallback) ==='; \
$gsInstaller = 'C:/temp/gs_installer_method2.exe'; \
\
foreach ($gsUrl in $gsUrls) { \
Write-Host ('🔄 WebClient trying: ' + $gsUrl + ' (3 minute timeout)'); \
try { \
[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; \
$webClient = New-Object System.Net.WebClient; \
$webClient.Headers.Add('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); \
$webClient.Timeout = 180000; # 3 minutes in milliseconds \
$webClient.DownloadFile($gsUrl, $gsInstaller); \
$webClient.Dispose(); \
\
if ((Test-Path $gsInstaller) -and ((Get-Item $gsInstaller).Length -gt 1MB)) { \
Write-Host ('✅ WebClient SUCCESS - Size: ' + ((Get-Item $gsInstaller).Length / 1MB).ToString('F1') + ' MB'); \
$downloadSuccess = $true; \
break; \
} else { \
Write-Host '⚠️ WebClient downloaded but file too small'; \
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
} \
} catch { \
Write-Host ('❌ WebClient failed: ' + $_.Exception.Message); \
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
} \
} \
} \
\
if (-not $downloadSuccess) { \
Write-Host '📦 === DOWNLOAD METHOD 3: Invoke-WebRequest ==='; \
$gsInstaller = 'C:/temp/gs_installer_method3.exe'; \
\
foreach ($gsUrl in $gsUrls) { \
Write-Host ('🔄 Invoke-WebRequest trying: ' + $gsUrl + ' (5 minute timeout)'); \
try { \
Invoke-WebRequest -Uri $gsUrl -OutFile $gsInstaller -TimeoutSec 300 -UserAgent 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' -UseBasicParsing; \
\
if ((Test-Path $gsInstaller) -and ((Get-Item $gsInstaller).Length -gt 1MB)) { \
Write-Host ('✅ Invoke-WebRequest SUCCESS - Size: ' + ((Get-Item $gsInstaller).Length / 1MB).ToString('F1') + ' MB'); \
$downloadSuccess = $true; \
break; \
} else { \
Write-Host '⚠️ Invoke-WebRequest downloaded but file too small'; \
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
} \
} catch { \
Write-Host ('❌ Invoke-WebRequest failed: ' + $_.Exception.Message); \
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
} \
} \
} \
\
if (-not $downloadSuccess) { \
Write-Host '📦 === DOWNLOAD METHOD 4: curl (final fallback) ==='; \
$gsInstaller = 'C:/temp/gs_installer_method4.exe'; \
\
foreach ($gsUrl in $gsUrls) { \
Write-Host ('🔄 curl trying: ' + $gsUrl + ' (5 minute timeout)'); \
try { \
$curlResult = & curl -L -o $gsInstaller $gsUrl --user-agent 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' --max-time 300 2>&1; \
\
if ((Test-Path $gsInstaller) -and ((Get-Item $gsInstaller).Length -gt 1MB)) { \
Write-Host ('✅ curl SUCCESS - Size: ' + ((Get-Item $gsInstaller).Length / 1MB).ToString('F1') + ' MB'); \
$downloadSuccess = $true; \
break; \
} else { \
Write-Host '⚠️ curl downloaded but file too small'; \
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
} \
} catch { \
Write-Host ('❌ curl failed: ' + $_.Exception.Message); \
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
} \
} \
} \
\
\
\
if ($downloadSuccess) { \
Write-Host '🎉 === DOWNLOAD/INSTALL SUCCESSFUL ==='; \
\
# If we have an installer file, run it \
if ((Test-Path $gsInstaller) -and ($gsInstaller -ne '')) { \
Write-Host '🔄 Testing manual installation...'; \
$installProcess = Start-Process -FilePath $gsInstaller -ArgumentList '/S' -Wait -PassThru -NoNewWindow; \
if ($installProcess.ExitCode -ne 0) { \
Write-Host ('❌ Manual installation failed with exit code: ' + $installProcess.ExitCode); \
throw 'Ghostscript manual installation failed'; \
} \
Write-Host '✅ Manual installation SUCCESS!'; \
} else { \
Write-Host '📦 Chocolatey installation already completed, skipping manual install'; \
} \
\
# Test final installation \
Write-Host '🧪 Testing Ghostscript functionality...'; \
$gsPath = Get-ChildItem 'C:/Program Files/gs' -ErrorAction SilentlyContinue | Sort-Object Name -Descending | Select-Object -First 1; \
if ($gsPath) { \
$binPath = Join-Path $gsPath.FullName 'bin'; \
Write-Host ('📍 Found Ghostscript at: ' + $binPath); \
try { \
$gsVersion = & \"$binPath/gs.exe\" --version 2>&1; \
Write-Host ('✅ Ghostscript version: ' + $gsVersion); \
Write-Host '🎉 === GHOSTSCRIPT FULLY WORKING ==='; \
} catch { \
Write-Host '⚠️ Ghostscript installed but version test failed (may be normal in containers)'; \
} \
} else { \
Write-Host '⚠️ Installation succeeded but Ghostscript directory not found'; \
# Check alternative locations \
$altPaths = @('C:/Program Files (x86)/gs', 'C:/ProgramData/chocolatey/lib/ghostscript'); \
foreach ($altPath in $altPaths) { \
if (Test-Path $altPath) { \
Write-Host ('📍 Found Ghostscript at alternative location: ' + $altPath); \
break; \
} \
} \
} \
} else { \
Write-Host '❌ === ALL DOWNLOAD METHODS FAILED ==='; \
Write-Host 'This will help us debug the root cause of download failures'; \
throw 'All Ghostscript download methods failed'; \
} \
\
$endTime = Get-Date; \
$duration = ($endTime - $startTime).TotalMinutes; \
Write-Host ('✅ === GHOSTSCRIPT TEST COMPLETE === (' + $duration.ToString('F1') + ' minutes)'); \
"
# === BASIC VERIFICATION ===
RUN "C:\Program Files\PowerShell\7\pwsh.exe" -Command " \
Write-Host '🔍 === FINAL VERIFICATION ==='; \
Write-Host 'PowerShell 7: OK'; \
Write-Host 'Temp directory: OK'; \
if (Get-Command gs -ErrorAction SilentlyContinue) { \
Write-Host 'Ghostscript: AVAILABLE'; \
} else { \
Write-Host 'Ghostscript: Not in PATH (expected)'; \
} \
Write-Host '✅ Minimal container ready for testing'; \
"

View File

@@ -1,181 +0,0 @@
# Windows Dockerfile Fixes Summary
## 🔧 Critical Issues Fixed
### 1. PowerShell 7 Path Resolution
**Problem**: Using `pwsh` shorthand can fail in Windows containers
```dockerfile
# BEFORE (problematic)
SHELL ["pwsh", "-NoLogo", "-ExecutionPolicy", "Bypass", "-Command"]
# AFTER (fixed)
SHELL ["C:\\Program Files\\PowerShell\\7\\pwsh.exe", "-NoLogo", "-ExecutionPolicy", "Bypass", "-Command"]
```
**Why**: Windows containers may not have `pwsh` in PATH, requiring full path specification.
### 2. TeX Live Installation Process
**Problem**: `Start-Process` without `-NoNewWindow` can hang in containers
```dockerfile
# BEFORE (problematic)
Start-Process -FilePath $Installer -ArgumentList '-repository', $Repo, '-profile', 'C:\temp\texlive.profile' -Wait
# AFTER (fixed)
Start-Process -FilePath $Installer -ArgumentList '-repository', $Repo, '-profile', 'C:\temp\texlive.profile' -Wait -NoNewWindow
```
**Why**: Container environments need `-NoNewWindow` to prevent GUI-related hangs.
### 3. TeX Package Installation
**Problem**: Comments in `tl_packages` file causing installation failures
```dockerfile
# BEFORE (problematic)
$pkgs = Get-Content 'C:\temp\tl_packages' | Where-Object { $_.Trim() -ne '' }
# AFTER (fixed)
$pkgs = Get-Content 'C:\temp\tl_packages' | Where-Object { $_.Trim() -ne '' -and -not $_.Trim().StartsWith('#') }
```
**Why**: Comments starting with `#` were being passed to `tlmgr install`, causing errors.
### 4. TikZ Test Document
**Problem**: Complex here-string with backticks causing parsing issues
```dockerfile
# BEFORE (problematic)
Set-Content -Path C:\temp\test_tikz.tex -Value @'`n\documentclass{standalone}`n\usepackage{tikz}`n...
# AFTER (fixed)
Set-Content -Path C:\temp\test_tikz.tex -Value @'
\documentclass{standalone}
\usepackage{tikz}
...
'@ -Encoding ASCII
```
**Why**: Backticks in here-strings can cause parsing issues in PowerShell.
### 5. Package Installation Verbosity
**Problem**: Silent failures in package installation
```dockerfile
# BEFORE (problematic)
foreach ($p in $pkgs) { & $tlmgr install $p.Trim() }
# AFTER (fixed)
foreach ($p in $pkgs) { Write-Host "Installing TeX package: $p" ; & $tlmgr install $p.Trim() }
```
**Why**: Added verbose output to help debug installation issues.
## 🐛 Windows Container Quirks Addressed
### 1. PATH Environment Variable
- **Issue**: Windows PATH manipulation requires regex escaping
- **Solution**: Used `[regex]::Escape()` for proper path matching
### 2. File Path Handling
- **Issue**: Mixed forward/backward slashes
- **Solution**: Consistent use of Windows-style paths with proper escaping
### 3. PowerShell Execution Policy
- **Issue**: Default execution policy blocks scripts
- **Solution**: Used `-ExecutionPolicy Bypass` consistently
### 4. Chocolatey Installation
- **Issue**: TLS 1.2 requirement for downloads
- **Solution**: Added `[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12`
### 5. Container-Safe Installations
- **Issue**: MSI installers can hang in containers
- **Solution**: Used ZIP installations for PowerShell 7 and Quarto
## 📋 Validation Improvements
### 1. Comprehensive Testing
- Added version checks for all major components
- Included `kpsewhich` font verification
- Added TikZ smoke test with PDF generation
- Enhanced R package verification
### 2. Error Handling
- Added explicit error checking with `throw` statements
- Included progress indicators for long operations
- Added fallback mechanisms for critical components
### 3. File Existence Checks
- Verified all required files exist before copying
- Added validation for installation paths
- Included cleanup procedures
## 🚀 Performance Optimizations
### 1. Minimal TeX Live Installation
- Used `scheme-infraonly` for faster installation
- Disabled documentation and source files
- Targeted package installation instead of full distribution
### 2. Efficient Package Management
- Used Chocolatey for reliable Windows package installation
- Implemented proper PATH management
- Added cleanup procedures to reduce image size
### 3. Build Phase Optimization
- Organized into logical phases for better caching
- Separated dependency installation from verification
- Added progress indicators for long-running operations
## 🔍 Testing Strategy
### 1. Pre-Build Validation
- Created test scripts to validate Dockerfile syntax
- Checked for common Windows container issues
- Verified all required files exist
### 2. Component Verification
- PowerShell 7: Version and command availability
- Quarto: Version and functionality
- Python: Package installation and imports
- TeX Live: Package and font verification
- R: Package installation and library loading
### 3. Integration Testing
- TikZ smoke test with PDF generation
- Cross-component dependency verification
- End-to-end build process validation
## 📊 Expected Performance
- **Build Time**: 45-60 minutes (down from 90+ minutes)
- **Image Size**: 8-12GB (optimized for Windows)
- **Memory Usage**: 4-6GB during build, 2-3GB runtime
- **Success Rate**: >95% (with proper error handling)
## 🛠️ Maintenance Notes
### 1. Version Updates
- PowerShell 7: Update URL and version number
- Quarto: Update version and download URL
- Python: Update version in Chocolatey command
- TeX Live: Update repository URL and packages
### 2. Package Management
- Add new TeX packages to `tl_packages` file
- Update Python requirements in `requirements-build.txt`
- Add R packages to `install_packages.R`
### 3. Testing Procedures
- Run validation script before building
- Test all components after updates
- Verify cross-platform compatibility
## ✅ Verification Checklist
- [x] PowerShell 7 installation and PATH setup
- [x] Chocolatey installation and package management
- [x] Quarto installation and verification
- [x] Python installation and package management
- [x] TeX Live installation with package filtering
- [x] R installation and package verification
- [x] Graphics tools (Ghostscript, Inkscape)
- [x] Font verification and TikZ testing
- [x] Error handling and progress indicators
- [x] Cleanup procedures and optimization

View File

@@ -1,171 +0,0 @@
# Windows Quarto Build Container
This directory contains the Windows Server 2022 container configuration for building the MLSysBook with Quarto.
## 🐳 Container Features
- **Base Image**: Windows Server 2022 LTSC
- **PowerShell**: 7.4.1 (ZIP install, container-safe)
- **Quarto**: 1.7.31 (ZIP install)
- **Python**: 3.13.1 + production dependencies
- **TeX Live**: 2025 snapshot with required packages
- **R**: 4.3.2 + R Markdown packages
- **Graphics**: Ghostscript + Inkscape (via Chocolatey)
## 🔧 Key Fixes Applied
### 1. PowerShell 7 Path Issues
- **Problem**: Using `pwsh` shorthand can fail in containers
- **Fix**: Use full path `C:\Program Files\PowerShell\7\pwsh.exe`
### 2. TeX Live Installation
- **Problem**: `Start-Process` without `-NoNewWindow` can hang
- **Fix**: Added `-NoNewWindow` flag for container compatibility
- **Problem**: Comments in `tl_packages` file
- **Fix**: Filter out comment lines when installing packages
### 3. TikZ Test Document
- **Problem**: Complex here-string with backticks
- **Fix**: Simplified to standard multi-line string
### 4. Package Installation
- **Problem**: Silent failures in package installation
- **Fix**: Added verbose output and better error handling
## 🚀 Building the Container
### Prerequisites
- Windows Docker Desktop or Windows Server with Docker
- At least 8GB RAM available for Docker
- 20GB+ free disk space
### Build Command
```powershell
# From project root
docker build -f docker/build-quarto-windows/Dockerfile -t mlsysbook-windows .
```
### Test Before Building
```powershell
# Run validation script
.\docker\build-quarto-windows\test_dockerfile.ps1
```
## 📋 Build Phases
1. **Base Setup**: Directories, environment variables
2. **PowerShell 7**: ZIP installation (container-safe)
3. **Chocolatey**: Package manager installation
4. **Dependencies**: Copy required files
5. **Quarto**: ZIP installation with PATH setup
6. **Python**: 3.13.1 + production requirements
7. **Graphics**: Ghostscript + Inkscape
8. **TeX Live**: 2025 snapshot + packages
9. **R**: 4.3.2 + R Markdown packages
10. **Cleanup**: Remove temporary files
## 🔍 Verification Steps
The container includes comprehensive verification:
- **PowerShell 7**: Version check
- **Quarto**: Version and command availability
- **Python**: Version and pip functionality
- **TeX Live**: Package verification with `kpsewhich`
- **Fonts**: Helvetica font files verification
- **TikZ**: Smoke test with PDF generation
- **R**: Package installation verification
## ⚠️ Common Issues & Solutions
### 1. Build Timeouts
- **Cause**: Large downloads (TeX Live, Python packages)
- **Solution**: Increased timeout values in Dockerfile
### 2. PATH Issues
- **Cause**: Windows PATH not properly updated
- **Solution**: Explicit PATH manipulation with regex escaping
### 3. Package Installation Failures
- **Cause**: Network issues or missing dependencies
- **Solution**: Added verbose output and error checking
### 4. Memory Issues
- **Cause**: TeX Live installation requires significant memory
- **Solution**: Use `scheme-infraonly` for minimal installation
## 🧪 Testing
### Run Container
```powershell
docker run -it mlsysbook-windows pwsh
```
### Test Quarto
```powershell
quarto --version
quarto check
```
### Test Python
```powershell
python --version
python -c "import nltk; print('NLTK available')"
```
### Test R
```powershell
R --version
Rscript -e "library(rmarkdown); print('R Markdown available')"
```
### Test TeX Live
```powershell
lualatex --version
kpsewhich pgf.sty
```
## 📊 Performance Notes
- **Build Time**: ~45-60 minutes on typical hardware
- **Image Size**: ~8-12GB (includes TeX Live, R, Python)
- **Memory Usage**: 4-6GB during build, 2-3GB runtime
- **Disk Space**: 15-20GB for build cache
## 🔧 Troubleshooting
### Build Fails on TeX Live
```powershell
# Check available memory
docker system df
docker system prune -f
```
### PowerShell Issues
```powershell
# Verify PowerShell 7 installation
docker run mlsysbook-windows pwsh -Command "Get-Host"
```
### Package Installation Issues
```powershell
# Check Chocolatey installation
docker run mlsysbook-windows choco --version
```
## 📝 Maintenance
### Updating Dependencies
1. Update version numbers in Dockerfile
2. Test with validation script
3. Rebuild and verify all components
### Adding New Packages
1. Add to appropriate phase in Dockerfile
2. Update verification steps
3. Test thoroughly
### Security Updates
- Regularly update base image
- Monitor for CVE reports
- Update package versions as needed

View File

@@ -1,96 +0,0 @@
#!/usr/bin/env pwsh
# Test script for Windows Dockerfile validation
# Run this before building to catch common issues
# Variables
$headline = "🚀 Testing Dockerfile: Windows"
$dockerfile = "docker/build-quarto-windows/Dockerfile"
$image_name = "mlsysbook-windows-test"
$container_name = "mlsysbook-windows-test-container"
Write-Host $headline -ForegroundColor Green
# Check if required files exist
$requiredFiles = @(
"tools/dependencies/requirements/",
"tools/dependencies/requirements-build.txt",
"tools/dependencies/install_packages.R",
"tools/dependencies/tl_packages",
"docker/build-quarto-windows/verify_r_packages.R"
)
Write-Host "📁 Checking required files..." -ForegroundColor Yellow
foreach ($file in $requiredFiles) {
if (Test-Path $file) {
Write-Host "$file" -ForegroundColor Green
} else {
Write-Host "$file (MISSING)" -ForegroundColor Red
exit 1
}
}
# Check Dockerfile syntax
Write-Host "🐳 Validating Dockerfile syntax..." -ForegroundColor Yellow
if (Test-Path $dockerfile) {
$content = Get-Content $dockerfile -Raw
# Check for common Windows container issues
$issues = @()
# Check for proper escape character
if ($content -notmatch "# escape=`") {
$issues += "Missing escape character at top"
}
# Check for proper SHELL commands
if ($content -match 'SHELL \["pwsh"') {
$issues += "Using 'pwsh' instead of full path - should use 'C:\\Program Files\\PowerShell\\7\\pwsh.exe'"
}
# Check for proper line continuation
if ($content -match '`\s*$') {
$issues += "Trailing backticks found - should be removed"
}
# Check for proper PowerShell commands
if ($content -match 'Start-Process.*-Wait(?!.*-NoNewWindow)') {
$issues += "Start-Process should include -NoNewWindow for container builds"
}
if ($issues.Count -eq 0) {
Write-Host " Dockerfile syntax looks good" -ForegroundColor Green
} else {
Write-Host " Potential issues found:" -ForegroundColor Yellow
foreach ($issue in $issues) {
Write-Host " - $issue" -ForegroundColor Yellow
}
}
} else {
Write-Host " Dockerfile not found" -ForegroundColor Red
exit 1
}
# Check tl_packages content
Write-Host "📦 Checking TeX Live packages..." -ForegroundColor Yellow
$tlPackages = "tools/dependencies/tl_packages"
if (Test-Path $tlPackages) {
$packages = Get-Content $tlPackages | Where-Object { $_.Trim() -ne '' -and -not $_.Trim().StartsWith('#') }
Write-Host " Found $($packages.Count) TeX packages to install" -ForegroundColor Green
} else {
Write-Host " tl_packages file missing" -ForegroundColor Red
}
# Check requirements
Write-Host "🐍 Checking Python requirements..." -ForegroundColor Yellow
$requirements = "tools/dependencies/requirements-build.txt"
if (Test-Path $requirements) {
Write-Host " Requirements file found" -ForegroundColor Green
} else {
Write-Host " Requirements file missing" -ForegroundColor Red
}
Write-Host " Dockerfile validation complete!" -ForegroundColor Green
Write-Host ""
Write-Host "To build the container:" -ForegroundColor Cyan
Write-Host " docker build -f docker/build-quarto-windows/Dockerfile -t mlsysbook-windows ." -ForegroundColor White

View File

@@ -1,93 +0,0 @@
#!/bin/bash
# Test script for Windows Dockerfile validation (bash version)
# Run this before building to catch common issues
# Variables
headline="🚀 Testing Dockerfile: Windows"
dockerfile="docker/build-quarto-windows/Dockerfile"
image_name="mlsysbook-windows-test"
container_name="mlsysbook-windows-test-container"
echo "$headline"
# Check if required files exist
required_files=(
"tools/dependencies/requirements/"
"tools/dependencies/requirements-build.txt"
"tools/dependencies/install_packages.R"
"tools/dependencies/tl_packages"
"docker/build-quarto-windows/verify_r_packages.R"
)
echo "📁 Checking required files..."
for file in "${required_files[@]}"; do
if [ -e "$file" ]; then
echo "$file"
else
echo "$file (MISSING)"
exit 1
fi
done
# Check Dockerfile syntax
echo "🐳 Validating Dockerfile syntax..."
if [ -f "$dockerfile" ]; then
issues=()
# Check for proper escape character
if ! grep -q "^# escape=\`" "$dockerfile"; then
issues+=("Missing escape character at top")
fi
# Check for proper SHELL commands (should use full path)
if grep -q 'SHELL \["pwsh"' "$dockerfile"; then
issues+=("Using 'pwsh' instead of full path - should use 'C:\\\\Program Files\\\\PowerShell\\\\7\\\\pwsh.exe'")
fi
# Check for proper PowerShell commands
if grep -q 'Start-Process.*-Wait' "$dockerfile" && ! grep -q 'Start-Process.*-Wait.*-NoNewWindow' "$dockerfile"; then
issues+=("Start-Process should include -NoNewWindow for container builds")
fi
# Check for comment filtering in tl_packages
if ! grep -q "StartsWith('#')" "$dockerfile"; then
issues+=("Missing comment filtering for tl_packages")
fi
if [ ${#issues[@]} -eq 0 ]; then
echo " ✅ Dockerfile syntax looks good"
else
echo " ⚠️ Potential issues found:"
for issue in "${issues[@]}"; do
echo " - $issue"
done
fi
else
echo " ❌ Dockerfile not found"
exit 1
fi
# Check tl_packages content
echo "📦 Checking TeX Live packages..."
tl_packages="tools/dependencies/tl_packages"
if [ -f "$tl_packages" ]; then
package_count=$(grep -v '^#' "$tl_packages" | grep -v '^$' | wc -l)
echo " ✅ Found $package_count TeX packages to install"
else
echo " ❌ tl_packages file missing"
fi
# Check requirements
echo "🐍 Checking Python requirements..."
requirements="tools/dependencies/requirements-build.txt"
if [ -f "$requirements" ]; then
echo " ✅ Requirements file found"
else
echo " ❌ Requirements file missing"
fi
echo "✅ Dockerfile validation complete!"
echo ""
echo "To build the container:"
echo " docker build -f docker/build-quarto-windows/Dockerfile -t mlsysbook-windows ."

View File

@@ -1,13 +0,0 @@
#!/usr/bin/env Rscript
# Verify R package installation
source('C:/temp/install_packages.R')
missing_packages <- required_packages[!sapply(required_packages, requireNamespace, quietly = TRUE)]
if(length(missing_packages) > 0) {
cat('❌ Missing packages:', paste(missing_packages, collapse = ', '), '\n')
quit(status = 1)
} else {
cat('✅ All required R packages installed successfully\n')
}

View File

@@ -30,13 +30,12 @@ Containerized Linux Build (5-10 minutes):
## Files
### Core Files
- `docker/build-quarto-linux/Dockerfile` - A single Dockerfile for Linux builds.
- `docker/build-quarto-linux/README.md` - Linux container documentation
- `docker/build-quarto-linux/.dockerignore` - Build exclusions
- `docker/build-quarto-windows/Dockerfile` - A single Dockerfile for Windows builds.
- `.github/workflows/build-linux-container.yml` - Builds and pushes Linux container
- `.github/workflows/build-windows-container.yml` - Builds and pushes Windows container
- `.github/workflows/quarto-build-container.yml` - Containerized build workflow
- `docker/linux/Dockerfile` - A single Dockerfile for Linux builds.
- `docker/linux/README.md` - Linux container documentation
- `docker/linux/.dockerignore` - Build exclusions
- `docker/windows/Dockerfile` - A single Dockerfile for Windows builds.
- `docker/windows/README.md` - Windows container documentation
- `docker/windows/.dockerignore` - Build exclusions
### Container Lifecycle
1. **Build**: Weekly automatic rebuilds + manual triggers
@@ -48,14 +47,20 @@ Containerized Linux Build (5-10 minutes):
## Usage
### Manual Container Build
```bash
# Trigger Linux container build manually
gh workflow run build-linux-container.yml
### Registry Paths
- **Linux Registry**: `ghcr.io/harvard-edge/cs249r_book/quarto-linux`
- **Windows Registry**: `ghcr.io/harvard-edge/cs249r_book/quarto-windows`
# Trigger Windows container build manually
gh workflow run build-windows-container.yml
```
### Manual Builds
You can build the containers locally using these commands:
- **Linux**:
```bash
docker build -f docker/linux/Dockerfile -t mlsysbook-linux .
```
- **Windows**:
```powershell
docker build -f docker/windows/Dockerfile -t mlsysbook-windows .
```
### Manual Build Test
```bash
@@ -64,8 +69,8 @@ gh workflow run quarto-build-container.yml --field os=ubuntu-latest --field form
```
### Container Information
- **Linux Registry**: `ghcr.io/harvard-edge/cs249r_book/quarto-build`
- **Windows Registry**: `ghcr.io/harvard-edge/cs249r_book/build-quarto-windows`
- **Linux Registry**: `ghcr.io/harvard-edge/cs249r_book/quarto-linux`
- **Windows Registry**: `ghcr.io/harvard-edge/cs249r_book/quarto-windows`
- **Tags**: `latest`, `main`, `dev`, branch-specific tags
- **Linux Size**: ~2-3GB (includes TeX Live, R, Python packages)
- **Windows Size**: ~4-5GB (includes Windows Server Core + dependencies)
@@ -120,7 +125,7 @@ LC_ALL=en_US.UTF-8
3. Test locally with `docker build -t test .`
### Build Issues
1. Check if container exists: `ghcr.io/harvard-edge/cs249r_book/quarto-build:latest`
1. Check if container exists: `ghcr.io/harvard-edge/cs249r_book/quarto-linux:latest`
2. Verify container has all dependencies
3. Compare with traditional build logs
@@ -170,8 +175,8 @@ To build the containers, use the standard `docker build` command:
```bash
# For Linux
docker build -f docker/build-quarto-linux/Dockerfile -t mlsysbook-linux .
docker build -f docker/linux/Dockerfile -t mlsysbook-linux .
# For Windows
docker build -f docker/build-quarto-windows/Dockerfile -t mlsysbook-windows .
docker build -f docker/windows/Dockerfile -t mlsysbook-windows .
```

View File

@@ -1,224 +0,0 @@
# Container Build Fixes - January 2025
## Overview
This document summarizes the comprehensive fixes applied to the Docker container build system for MLSysBook. These fixes address critical issues that were preventing successful container builds and deployments.
## Issues Fixed
### 1. Linux Container (docker/build-quarto-linux/Dockerfile)
**Problems Identified:**
- Incorrect dependency file paths after repository restructuring
- Missing progress indicators and error handling
- Suboptimal build phase organization
- Inefficient TeX Live package installation loop
- Missing proper PATH configuration for LaTeX tools
**Fixes Applied:**
- ✅ Fixed COPY commands to use correct paths for dependency files
- ✅ Added comprehensive progress tracking with emojis and timing
- ✅ Reorganized build phases (1-11) for better clarity and debugging
- ✅ Improved TeX Live installation with better error handling
- ✅ Enhanced cleanup procedures for smaller image size
- ✅ Fixed PATH environment variables for all tools
- ✅ Added proper error handling in shell loops
### 2. Windows Container (docker/build-quarto-windows/Dockerfile)
**Problems Identified:**
- Complex and error-prone PowerShell syntax
- Inconsistent use of PowerShell commands
- Missing progress indicators
- Poor error handling in installation phases
- **CRITICAL**: Ghostscript installation hanging due to complex direct download method
**Fixes Applied:**
- ✅ Simplified and standardized PowerShell command syntax
- ✅ Added comprehensive progress tracking with timing
- ✅ Reorganized build phases (1-12) for better organization
- ✅ Enhanced error handling and validation
- ✅ Improved cleanup procedures
- ✅ Fixed dependency file path references
-**CRITICAL FIX**: Replaced hanging Ghostscript direct download with reliable chocolatey installation (most stable for containers)
### 3. Linux Container Workflow (.github/workflows/build-linux-container.yml)
**Problems Identified:**
- Outdated Python package list in tests
- Inefficient container image handling
- Missing platform specification
**Fixes Applied:**
- ✅ Updated Python package imports to match current requirements
- ✅ Optimized container testing to use local images
- ✅ Added platform specification (linux/amd64)
- ✅ Fixed LOCAL_IMAGE variable handling
### 4. Windows Container Workflow (.github/workflows/build-windows-container.yml)
**Problems Identified:**
- Using bash commands instead of PowerShell in Windows containers
- Incorrect volume mounting paths for Windows
- Inefficient container testing approach
**Fixes Applied:**
- ✅ Converted all test commands from bash to PowerShell
- ✅ Fixed volume mounting to use Windows paths (C:/workspace)
- ✅ Updated all docker run commands to use pwsh instead of bash
- ✅ Improved error handling in test scenarios
- ✅ Optimized to use local container instead of pulling
## Container Build Phases
### Linux Container (11 Phases)
1. **System Dependencies** - Core Ubuntu packages and libraries
2. **Inkscape Installation** - SVG to PDF conversion capability
3. **Quarto Installation** - Latest Quarto CLI (v1.7.31)
4. **TeX Live Installation** - Complete LaTeX distribution
5. **Ghostscript Installation** - PDF processing capabilities
6. **R Installation** - R base and development packages
7. **Python Installation** - Python 3 with pip
8. **Python Packages** - All production requirements
9. **R Packages** - All required R libraries
10. **R Package Verification** - Validation of successful installation
11. **Comprehensive Cleanup** - Size optimization and cache clearing
### Windows Container (12 Phases)
1. **PowerShell 7 Installation** - Modern PowerShell for better scripting
2. **Chocolatey Installation** - Package manager for Windows
3. **Quarto Installation** - Latest Quarto CLI (v1.7.31)
4. **Python 3.13 Installation** - Latest Python with full package support
5. **Python Package Installation** - All production requirements
6. **Ghostscript Installation** - PDF processing capabilities
7. **Inkscape Installation** - SVG to PDF conversion capability
8. **TeX Live Installation** - Complete LaTeX distribution for Windows
9. **R Installation** - R base with development packages
10. **R Package Installation** - All required R libraries
11. **R Package Verification** - Validation of successful installation
12. **Cleanup** - Temporary file removal and optimization
## Testing Improvements
### Linux Container Tests (17 scenarios)
All tests run successfully with proper error handling and validation:
- ✅ Quarto functionality
- ✅ Python packages (updated to match current requirements)
- ✅ R packages (all from install_packages.R)
- ✅ TeX Live and LaTeX engines
- ✅ Inkscape SVG to PDF conversion
- ✅ Ghostscript PDF compression
- ✅ Fonts and graphics libraries
- ✅ Quarto render test
- ✅ TikZ compilation test
- ✅ SVG to PDF conversion test
- ✅ System resources check
- ✅ Network connectivity
- ✅ Book structure compatibility
- ✅ Quarto configuration files
- ✅ Dependencies files accessibility
- ✅ Quarto check (same as workflow)
- ✅ Actual build process simulation
### Windows Container Tests (11 scenarios)
Converted from bash to PowerShell with proper Windows paths:
- ✅ Quarto functionality (using pwsh commands)
- ✅ Python packages (using Windows python command)
- ✅ R packages (using Windows Rscript)
- ✅ TeX Live and LaTeX engines
- ✅ Ghostscript PDF compression
- ✅ Quarto render test (with Windows file checking)
- ✅ TikZ compilation test (with Windows file checking)
- ✅ System resources (using Windows WMI commands)
- ✅ Network connectivity (using PowerShell web requests)
- ✅ Book structure compatibility (using Windows file system commands)
- ✅ Quarto check test
## Performance Impact
### Before Fixes:
- Build failures due to missing dependencies
- Path errors preventing tool execution
- Inefficient testing causing false positives
- Large container sizes due to poor cleanup
### After Fixes:
- **Linux Container**: ~2-3GB (optimized with multi-layer cleanup)
- **Windows Container**: ~4-5GB (optimized for Windows base requirements)
- **Build Time**: 5-10 minutes (Linux), 10-15 minutes (Windows)
- **Reliability**: Comprehensive testing with proper error handling
- **Maintainability**: Clear phase organization and progress tracking
## Files Modified
### Container Definitions:
- `docker/build-quarto-linux/Dockerfile` - A single, unified Dockerfile for Linux builds.
- `docker/build-quarto-windows/Dockerfile` - A single, unified Dockerfile for Windows builds.
### Workflow Files:
- `.github/workflows/build-linux-container.yml` - Updated tests and platform specification
- `.github/workflows/build-windows-container.yml` - Converted to PowerShell commands throughout
### Documentation:
- `docker/build-quarto-linux/README.md` - Updated with new phase information
- `docker/build-quarto-windows/README.md` - Enhanced with Windows-specific details
- `docs/CONTAINER_FIXES_2025.md` - This comprehensive summary
## Verification Steps
To verify the fixes work:
1. **Trigger Linux Container Build:**
```bash
gh workflow run build-linux-container.yml
```
2. **Trigger Windows Container Build:**
```bash
gh workflow run build-windows-container.yml
```
3. **Test Containerized Builds:**
```bash
gh workflow run quarto-build-container.yml --field os=ubuntu-latest --field format=html
```
## Future Improvements
1. **Multi-stage builds** for even smaller container sizes
2. **Parallel package installation** where possible
3. **Container image caching** optimization
4. **Health checks** for running containers
5. **Security scanning** integration
## Critical Fix: Ghostscript Installation
The most important fix addresses the **hanging Ghostscript installation** in the Windows container. The original approach used a complex direct download method that would hang during installation:
### Before (Problematic):
```powershell
# Complex direct download approach that hangs
$url = 'https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs10051/gs10051w64.exe'
Invoke-WebRequest -Uri $url -OutFile $installer -UseBasicParsing
Start-Process -FilePath $installer -ArgumentList '/S', '/D=C:/Program Files/gs/gs10.05.1' -Wait -NoNewWindow
```
### After (Working Solution):
```powershell
# Simplified chocolatey-only approach (most reliable for containers)
choco install ghostscript -y
Write-Host '✅ Ghostscript installed via chocolatey'
```
This change ensures reliable, non-hanging Ghostscript installation using chocolatey, which is the most reliable package manager for Windows containers.
## Conclusion
These comprehensive fixes restore the container build system to full functionality, providing:
- Reliable, reproducible builds
- Significant time savings (from 45 minutes to 5-15 minutes)
- Better error handling and debugging
- Comprehensive testing coverage
- Clear documentation and progress tracking
The container build system is now ready for production use and will provide consistent, fast builds for the MLSysBook project.

View File

@@ -1 +1 @@
config/_quarto-pdf.yml
config/_quarto-html.yml

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.3 KiB

View File

@@ -87,8 +87,7 @@ We must note that we use large models beyond text, calling them *multi-modal mod
Open models are particularly relevant for running models on edge devices like Raspberry Pi as they can be more easily adapted, optimized, and deployed in resource-constrained environments. Still, it is crucial to verify their Licenses. Open models come with various open-source licenses that may affect their use in commercial applications, while closed models have clear, albeit restrictive, terms of service.
::: {.content-visible when-format="pdf"}
![Adapted from \
[arXiv](https://arxiv.org/pdf/2304.13712)](images/png/llms_slm.png)
![Adapted from [arXiv](https://arxiv.org/pdf/2304.13712)](images/png/llms-slm.png)
:::
::: {.content-visible when-format="html"}
@@ -125,7 +124,7 @@ For more information on SLMs, the paper, [LLM Pruning and Distillation in Practi
## Ollama {#sec-small-language-models-slm-ollama-bd3e}
![ollama logo](images/ollama.png)
![ollama logo](images/png/ollama.png)
[Ollama](https://ollama.com/) is an open-source framework that allows us to run language models (LMs), large or small, locally on our machines. Here are some critical points about Ollama:

View File

@@ -1,95 +0,0 @@
#!/usr/bin/env python3
import os
import re
import requests
import hashlib
from pathlib import Path
from urllib.parse import urlparse
import argparse
def find_qmd_files(directory):
return list(Path(directory).rglob("*.qmd"))
def process_file(qmd_file, dry_run=False):
try:
with open(qmd_file, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
print(f"❌ Failed to read {qmd_file}: {e}")
return
# A simpler regex to find any markdown image with an external URL
pattern = r'!\[(.*?)\]\((https?://[^\)]+)\)'
matches = list(re.finditer(pattern, content))
if not matches:
return
print(f"📄 Processing {qmd_file}")
images_dir = qmd_file.parent / "images"
new_content = content
for match in matches:
caption = match.group(1)
url = match.group(2)
print(f" 🔍 Found external image: {url}")
try:
image_name = Path(urlparse(url).path).name
if not image_name:
# If the URL path ends in a slash, there's no name, so we'll make one
url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
image_name = f"image_{url_hash}.png" # Assume png, or we can try to guess
local_path = images_dir / image_name
relative_path = os.path.join("images", image_name)
if dry_run:
print(f" 🧪 [DRY RUN] Would download to {local_path}")
print(f" 🧪 [DRY RUN] Would replace with {relative_path}")
continue
images_dir.mkdir(parents=True, exist_ok=True)
response = requests.get(url, stream=True, timeout=30)
response.raise_for_status()
with open(local_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f" ✅ Downloaded to {local_path}")
# Replace the old URL with the new relative path
original_md_image = f"![{caption}]({url})"
replacement_md_image = f"![{caption}]({relative_path})"
new_content = new_content.replace(original_md_image, replacement_md_image)
except Exception as e:
print(f" ❌ Failed to process {url}: {e}")
if not dry_run and new_content != content:
try:
with open(qmd_file, 'w', encoding='utf-8') as f:
f.write(new_content)
print(f" ✅ Updated {qmd_file}")
except Exception as e:
print(f" ❌ Failed to write updated file {qmd_file}: {e}")
def main():
parser = argparse.ArgumentParser(description="Download external images from Quarto markdown files")
parser.add_argument("-d", "--directory", type=str, required=True, help="Directory to process")
parser.add_argument("--dry-run", action="store_true", help="Show what would be downloaded without actually downloading")
args = parser.parse_args()
qmd_files = find_qmd_files(args.directory)
print(f"🔍 Found {len(qmd_files)} .qmd files to process")
for qmd_file in qmd_files:
process_file(qmd_file, args.dry_run)
if __name__ == "__main__":
main()