mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-29 09:08:54 -05:00
Merge branch 'dev' into feature/manage-external-images
This commit is contained in:
18
.github/workflows/build-linux-container.yml
vendored
18
.github/workflows/build-linux-container.yml
vendored
@@ -74,14 +74,14 @@ on:
|
||||
description: "Whether build used cache (true/false)"
|
||||
value: ${{ jobs.build.outputs.cache-hit }}
|
||||
|
||||
# DISABLED: Automatic triggers while working on containers
|
||||
# schedule:
|
||||
# - cron: '0 0 * * 0' # Weekly rebuild (Sunday at midnight)
|
||||
# push:
|
||||
# paths:
|
||||
# - 'tools/dependencies/**'
|
||||
# - 'docker/build-quarto-linux/**'
|
||||
# - '.github/workflows/build-linux-container.yml'
|
||||
# Re-enable automatic triggers
|
||||
schedule:
|
||||
- cron: '0 0 * * 0' # Weekly rebuild (Sunday at midnight)
|
||||
push:
|
||||
paths:
|
||||
- 'tools/dependencies/**'
|
||||
- 'docker/linux/**'
|
||||
- '.github/workflows/build-linux-container.yml'
|
||||
|
||||
env:
|
||||
# Container Registry Configuration (configurable via inputs)
|
||||
@@ -91,7 +91,7 @@ env:
|
||||
|
||||
# Container Build Configuration
|
||||
PLATFORM: linux/amd64
|
||||
DOCKERFILE_PATH: ./docker/build-quarto-linux/Dockerfile
|
||||
DOCKERFILE_PATH: ./docker/linux/Dockerfile
|
||||
CONTEXT_PATH: .
|
||||
|
||||
jobs:
|
||||
|
||||
91
.github/workflows/build-windows-container.yml
vendored
91
.github/workflows/build-windows-container.yml
vendored
@@ -73,14 +73,14 @@ on:
|
||||
description: "Whether build used cache (true/false)"
|
||||
value: ${{ jobs.build.outputs.cache-hit }}
|
||||
|
||||
# DISABLED: Automatic triggers while working on containers
|
||||
# schedule:
|
||||
# - cron: '0 2 * * 0' # Weekly rebuild (Sunday at 2am - after Linux container)
|
||||
# push:
|
||||
# paths:
|
||||
# - 'tools/dependencies/**'
|
||||
# - 'docker/build-quarto-windows/**'
|
||||
# - '.github/workflows/build-windows-container.yml'
|
||||
# Re-enable automatic triggers
|
||||
schedule:
|
||||
- cron: '0 2 * * 0' # Weekly rebuild (Sunday at 2am - after Linux container)
|
||||
push:
|
||||
paths:
|
||||
- 'tools/dependencies/**'
|
||||
- 'docker/windows/**'
|
||||
- '.github/workflows/build-windows-container.yml'
|
||||
|
||||
env:
|
||||
# Container Registry Configuration (configurable via inputs)
|
||||
@@ -89,7 +89,7 @@ env:
|
||||
CONTAINER_TAG: ${{ inputs.container_tag || 'latest' }}
|
||||
|
||||
# Container Build Configuration
|
||||
DOCKERFILE_PATH: ./docker/build-quarto-windows/Dockerfile
|
||||
DOCKERFILE_PATH: ./docker/windows/Dockerfile
|
||||
CONTEXT_PATH: .
|
||||
|
||||
jobs:
|
||||
@@ -100,10 +100,19 @@ jobs:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
outputs:
|
||||
build-status: ${{ steps.build.outputs.build-status }}
|
||||
image-name: ${{ steps.build.outputs.image-name }}
|
||||
image-digest: ${{ steps.build.outputs.image-digest }}
|
||||
cache-hit: ${{ steps.build.outputs.cache-hit }}
|
||||
|
||||
steps:
|
||||
- name: 📥 Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: 🛠️ Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: 🔐 Log in to Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
@@ -111,27 +120,47 @@ jobs:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: 🐳 Build Windows container
|
||||
- name: 🏷️ Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=raw,value=${{ env.CONTAINER_TAG }}
|
||||
|
||||
- name: 🐳 Build and Push Windows container
|
||||
id: build
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ${{ env.CONTEXT_PATH }}
|
||||
file: ${{ env.DOCKERFILE_PATH }}
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
no-cache: ${{ inputs.no_cache }}
|
||||
|
||||
- name: 📊 Build Summary
|
||||
id: build-summary
|
||||
if: always()
|
||||
run: |
|
||||
echo "🚀 Building Windows container..."
|
||||
$useNoCache = "${{ inputs.no_cache }}" -eq "true"
|
||||
if ($useNoCache) {
|
||||
echo "📊 Cache mode: DISABLED (fresh build)"
|
||||
docker build --no-cache -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.CONTAINER_TAG }} -f ${{ env.DOCKERFILE_PATH }} ${{ env.CONTEXT_PATH }}
|
||||
} else {
|
||||
echo "📊 Cache mode: ENABLED (faster build)"
|
||||
docker build -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.CONTAINER_TAG }} -f ${{ env.DOCKERFILE_PATH }} ${{ env.CONTEXT_PATH }}
|
||||
}
|
||||
# Determine build status
|
||||
if [ "${{ steps.build.outcome }}" = "success" ]; then
|
||||
BUILD_STATUS="success"
|
||||
else
|
||||
BUILD_STATUS="failure"
|
||||
fi
|
||||
|
||||
echo "✅ Local container build completed"
|
||||
|
||||
- name: 🐳 Push Windows container
|
||||
run: |
|
||||
echo "📦 Pushing container to registry..."
|
||||
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.CONTAINER_TAG }}
|
||||
echo "✅ Container push completed"
|
||||
|
||||
- name: Build Complete
|
||||
run: |
|
||||
echo "✅ Windows container build completed successfully!"
|
||||
echo "📊 Container: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.CONTAINER_TAG }}"
|
||||
# Extract build information
|
||||
IMAGE_NAME="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.CONTAINER_TAG }}"
|
||||
IMAGE_DIGEST="${{ steps.build.outputs.digest }}"
|
||||
CACHE_HIT="${{ steps.build.outputs.cache-hit }}"
|
||||
|
||||
echo "build-status=$BUILD_STATUS" >> $GITHUB_OUTPUT
|
||||
echo "image-name=$IMAGE_NAME" >> $GITHUB_OUTPUT
|
||||
echo "image-digest=$IMAGE_DIGEST" >> $GITHUB_OUTPUT
|
||||
echo "cache-hit=$CACHE_HIT" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "📊 Build Status: $BUILD_STATUS"
|
||||
echo "🐳 Image: $IMAGE_NAME"
|
||||
echo "🔍 Digest: $IMAGE_DIGEST"
|
||||
echo "💾 Cache Hit: $CACHE_HIT"
|
||||
|
||||
66
.github/workflows/container-health-check.yml
vendored
66
.github/workflows/container-health-check.yml
vendored
@@ -84,62 +84,32 @@ jobs:
|
||||
env:
|
||||
CONTAINER_IMAGE: ${{ inputs.container_registry || 'ghcr.io' }}/${{ github.repository }}/${{ matrix.container_name }}:${{ inputs.container_tag || 'latest' }}
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
DOCKERFILE_PATH: ./docker/${{ matrix.platform }}/dockerfile
|
||||
|
||||
steps:
|
||||
- name: 💾 Cache Docker Images
|
||||
- name: 📥 Checkout repository
|
||||
if: |
|
||||
(matrix.platform == 'linux' && inputs.test_linux != false) ||
|
||||
(matrix.platform == 'windows' && inputs.test_windows != false)
|
||||
uses: actions/cache@v4
|
||||
id: cache-docker-images
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: |
|
||||
~/.docker
|
||||
~/AppData/Local/Docker/wsl/data
|
||||
key: docker-${{ matrix.platform }}-${{ env.CONTAINER_IMAGE }}-${{ hashFiles('docker/**') }}
|
||||
restore-keys: |
|
||||
docker-${{ matrix.platform }}-${{ env.CONTAINER_IMAGE }}-
|
||||
docker-${{ matrix.platform }}-
|
||||
fetch-depth: 0
|
||||
|
||||
- name: 🐳 Pull Linux Container
|
||||
if: matrix.platform == 'linux' && inputs.test_linux != false
|
||||
shell: bash
|
||||
run: |
|
||||
echo "🐳 Pulling Linux container..."
|
||||
echo "📦 Image: ${{ env.CONTAINER_IMAGE }}"
|
||||
echo "💾 Cache Status: ${{ steps.cache-docker-images.outputs.cache-hit == 'true' && '✅ HIT - faster pull expected' || '❌ MISS - full pull required' }}"
|
||||
|
||||
# Check if image already exists locally (from cache)
|
||||
if docker image inspect ${{ env.CONTAINER_IMAGE }} >/dev/null 2>&1; then
|
||||
echo "✅ Container image found locally - skipping pull"
|
||||
else
|
||||
echo "📥 Pulling container image..."
|
||||
docker pull ${{ env.CONTAINER_IMAGE }}
|
||||
echo "✅ Container pulled successfully"
|
||||
fi
|
||||
- name: 🔑 Log in to GitHub Container Registry
|
||||
if: |
|
||||
(matrix.platform == 'linux' && inputs.test_linux != false) ||
|
||||
(matrix.platform == 'windows' && inputs.test_windows != false)
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: 🐳 Pull Windows Container
|
||||
if: matrix.platform == 'windows' && inputs.test_windows != false
|
||||
shell: pwsh
|
||||
run: |
|
||||
Write-Output "🐳 Pulling Windows container..."
|
||||
Write-Output "📦 Image: ${{ env.CONTAINER_IMAGE }}"
|
||||
$cacheHit = "${{ steps.cache-docker-images.outputs.cache-hit }}"
|
||||
if ($cacheHit -eq 'true') {
|
||||
Write-Output "💾 Cache Status: ✅ HIT - faster pull expected"
|
||||
} else {
|
||||
Write-Output "💾 Cache Status: ❌ MISS - full pull required"
|
||||
}
|
||||
|
||||
# Check if image already exists locally (from cache)
|
||||
$imageExists = docker image inspect ${{ env.CONTAINER_IMAGE }}
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Output "✅ Container image found locally - skipping pull"
|
||||
} else {
|
||||
Write-Output "📥 Pulling container image..."
|
||||
docker pull ${{ env.CONTAINER_IMAGE }}
|
||||
Write-Output "✅ Container pulled successfully"
|
||||
}
|
||||
- name: 🐳 Pull Docker Image
|
||||
if: |
|
||||
(matrix.platform == 'linux' && inputs.test_linux != false) ||
|
||||
(matrix.platform == 'windows' && inputs.test_windows != false)
|
||||
run: docker pull ${{ env.CONTAINER_IMAGE }}
|
||||
|
||||
- name: 📊 Container Information
|
||||
if: |
|
||||
|
||||
41
.github/workflows/quarto-build-container.yml
vendored
41
.github/workflows/quarto-build-container.yml
vendored
@@ -167,6 +167,7 @@ jobs:
|
||||
|
||||
env:
|
||||
CONTAINER_IMAGE: ${{ format('{0}/{1}/quarto-{2}:{3}', inputs.container_registry || 'ghcr.io', github.repository, matrix.platform, inputs.container_tag || 'latest') }}
|
||||
DOCKERFILE_PATH: ./docker/${{ matrix.platform }}/dockerfile
|
||||
|
||||
steps:
|
||||
- name: 🛑 Skip build
|
||||
@@ -179,41 +180,17 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: 💾 Cache Docker Images
|
||||
- name: 🔑 Log in to GitHub Container Registry
|
||||
if: matrix.enabled
|
||||
uses: actions/cache@v4
|
||||
id: cache-docker-images
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
path: |
|
||||
${{ runner.os == 'Linux' && '~/.docker' || '' }}
|
||||
${{ runner.os == 'Windows' && '~/AppData/Local/Docker/wsl/data' || '' }}
|
||||
key: docker-${{ runner.os }}-${{ env.CONTAINER_IMAGE }}-${{ hashFiles('docker/**') }}
|
||||
restore-keys: |
|
||||
docker-${{ runner.os }}-${{ env.CONTAINER_IMAGE }}-
|
||||
docker-${{ runner.os }}-
|
||||
registry: ${{ env.CONTAINER_REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: 🐳 Pull Windows Container
|
||||
if: matrix.platform == 'windows' && matrix.enabled
|
||||
shell: pwsh
|
||||
run: |
|
||||
Write-Output "🐳 Pulling Windows container..."
|
||||
Write-Output "📦 Image: ${{ env.CONTAINER_IMAGE }}"
|
||||
$cacheHit = "${{ steps.cache-docker-images.outputs.cache-hit }}"
|
||||
if ($cacheHit -eq 'true') {
|
||||
Write-Output "💾 Cache Status: ✅ HIT - faster pull expected"
|
||||
} else {
|
||||
Write-Output "💾 Cache Status: ❌ MISS - full pull required"
|
||||
}
|
||||
|
||||
# Check if image already exists locally (from cache)
|
||||
$imageExists = docker image inspect ${{ env.CONTAINER_IMAGE }}
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Output "✅ Container image found locally - skipping pull"
|
||||
} else {
|
||||
Write-Output "📥 Pulling container image..."
|
||||
docker pull ${{ env.CONTAINER_IMAGE }}
|
||||
Write-Output "✅ Container pulled successfully"
|
||||
}
|
||||
- name: 🐳 Pull Docker Image
|
||||
if: matrix.enabled
|
||||
run: docker pull ${{ env.CONTAINER_IMAGE }}
|
||||
|
||||
- name: 🔨 Build ${{ matrix.format_name }} (Linux)
|
||||
if: matrix.platform == 'linux' && matrix.enabled
|
||||
|
||||
@@ -1,71 +0,0 @@
|
||||
# Exclude unnecessary files from Docker build context
|
||||
# This reduces build time and image size
|
||||
|
||||
# Build artifacts
|
||||
build/
|
||||
_book/
|
||||
_site/
|
||||
*.pdf
|
||||
*.html
|
||||
|
||||
# Git and version control
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# Documentation
|
||||
docs/
|
||||
*.md
|
||||
!docker/quarto-linux-build/README.md
|
||||
|
||||
# IDE and editor files
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Logs and temporary files
|
||||
*.log
|
||||
*.tmp
|
||||
*.temp
|
||||
|
||||
# Node modules (if any)
|
||||
node_modules/
|
||||
|
||||
# Python cache
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
|
||||
# R cache
|
||||
.Rhistory
|
||||
.RData
|
||||
|
||||
# Large media files (not needed for build)
|
||||
assets/media/
|
||||
*.mp4
|
||||
*.avi
|
||||
*.mov
|
||||
|
||||
# Test files
|
||||
test-*
|
||||
*.test.*
|
||||
|
||||
# Backup files
|
||||
*.bak
|
||||
*.backup
|
||||
|
||||
# Large data files
|
||||
data/
|
||||
*.csv
|
||||
*.json
|
||||
*.xml
|
||||
|
||||
# Keep only essential files for build
|
||||
# - tools/dependencies/ (needed for package installation)
|
||||
# - book/ (needed for build testing)
|
||||
# - .github/workflows/ (needed for workflow files)
|
||||
@@ -1,652 +0,0 @@
|
||||
# MLSysBook Quarto Build Container
|
||||
# Based on Ubuntu 22.04 with all dependencies pre-installed
|
||||
# This container eliminates the 30-45 minute setup time for Linux builds
|
||||
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# Set environment variables
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV R_LIBS_USER=/usr/local/lib/R/library
|
||||
ENV QUARTO_LOG_LEVEL=INFO
|
||||
ENV PYTHONIOENCODING=utf-8
|
||||
ENV LANG=en_US.UTF-8
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
ENV PATH=/usr/local/texlive/bin/x86_64-linux:$PATH
|
||||
|
||||
# === PHASE 0: COPY DEPENDENCY FILES EARLY (for better cache efficiency) ===
|
||||
RUN echo "🚀 === STARTING DEPENDENCY FILE COPY ===" && \
|
||||
echo "📋 Files to copy (early for cache efficiency):" && \
|
||||
echo " - tools/dependencies/requirements/ → /tmp/requirements/" && \
|
||||
echo " - tools/dependencies/requirements-build.txt → /tmp/requirements.txt" && \
|
||||
echo " - tools/dependencies/install_packages.R → /tmp/install_packages.R" && \
|
||||
echo " - tools/dependencies/tl_packages → /tmp/tl_packages" && \
|
||||
echo " - docker/build-quarto-linux/verify_r_packages.R → /tmp/verify_r_packages.R" && \
|
||||
echo "✅ Dependency file copy phase complete"
|
||||
|
||||
FROM build-base AS builder
|
||||
|
||||
# Copy dependency files
|
||||
COPY tools/dependencies/requirements.txt /tmp/requirements.txt
|
||||
COPY tools/dependencies/install_packages.R /tmp/install_packages.R
|
||||
COPY tools/dependencies/tl_packages /tmp/tl_packages
|
||||
|
||||
# Install and configure locales
|
||||
RUN echo "🚀 === STARTING LOCALE CONFIGURATION ===" && \
|
||||
echo "🔍 Checking system readiness..." && \
|
||||
if [ -f /etc/os-release ]; then \
|
||||
echo "✅ OS release file found"; \
|
||||
cat /etc/os-release | grep PRETTY_NAME; \
|
||||
else \
|
||||
echo "❌ OS release file not found"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "📦 Installing locales package..." && \
|
||||
apt-get update && apt-get install -y locales && \
|
||||
echo "📦 Locales package installed" && \
|
||||
echo "🔧 Generating en_US.UTF-8 locale..." && \
|
||||
locale-gen en_US.UTF-8 && \
|
||||
echo "📄 Locale generated" && \
|
||||
echo "🔧 Updating system locale..." && \
|
||||
update-locale LANG=en_US.UTF-8 && \
|
||||
echo "🔧 System locale updated" && \
|
||||
echo "🧹 Cleaning package cache..." && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
echo "✅ Locale configuration complete"
|
||||
|
||||
# === PHASE 1: LOCALE CONFIGURATION ===
|
||||
RUN echo "🚀 === STARTING SYSTEM DEPENDENCIES INSTALLATION ===" && \
|
||||
echo "⏰ Estimated time: 2-3 minutes" && \
|
||||
echo "📊 Free disk space: $(df -h / | tail -1 | awk '{print $4}')" && \
|
||||
start_time=$(date +%s) && \
|
||||
\
|
||||
echo "🔄 Updating package lists..." && \
|
||||
apt-get update && \
|
||||
\
|
||||
echo "📦 Installing core system packages (25 packages)..." && \
|
||||
echo "📋 Package list:" && \
|
||||
echo " - fonts-dejavu" && \
|
||||
echo " - fonts-freefont-ttf" && \
|
||||
echo " - gdk-pixbuf2.0-bin" && \
|
||||
echo " - libcairo2" && \
|
||||
echo " - libfontconfig1" && \
|
||||
echo " - libfontconfig1-dev" && \
|
||||
echo " - libfreetype6" && \
|
||||
echo " - libfreetype6-dev" && \
|
||||
echo " - libpango-1.0-0" && \
|
||||
echo " - libpangocairo-1.0-0" && \
|
||||
echo " - libpangoft2-1.0-0" && \
|
||||
echo " - libxml2-dev" && \
|
||||
echo " - libcurl4-openssl-dev" && \
|
||||
echo " - libjpeg-dev" && \
|
||||
echo " - libtiff5-dev" && \
|
||||
echo " - libpng-dev" && \
|
||||
echo " - libharfbuzz-dev" && \
|
||||
echo " - libfribidi-dev" && \
|
||||
echo " - librsvg2-dev" && \
|
||||
echo " - libgdal-dev" && \
|
||||
echo " - libudunits2-dev" && \
|
||||
echo " - wget" && \
|
||||
echo " - curl" && \
|
||||
echo " - git" && \
|
||||
apt-get install -y \
|
||||
fonts-dejavu \
|
||||
fonts-freefont-ttf \
|
||||
gdk-pixbuf2.0-bin \
|
||||
libcairo2 \
|
||||
libfontconfig1 \
|
||||
libfontconfig1-dev \
|
||||
libfreetype6 \
|
||||
libfreetype6-dev \
|
||||
libpango-1.0-0 \
|
||||
libpangocairo-1.0-0 \
|
||||
libpangoft2-1.0-0 \
|
||||
libxml2-dev \
|
||||
libcurl4-openssl-dev \
|
||||
libjpeg-dev \
|
||||
libtiff5-dev \
|
||||
libpng-dev \
|
||||
libharfbuzz-dev \
|
||||
libfribidi-dev \
|
||||
librsvg2-dev \
|
||||
libgdal-dev \
|
||||
libudunits2-dev \
|
||||
wget \
|
||||
curl \
|
||||
git && \
|
||||
echo "📦 All system packages installed successfully" && \
|
||||
\
|
||||
echo "🔍 Verifying critical packages..." && \
|
||||
if command -v wget >/dev/null 2>&1; then \
|
||||
echo "📦 wget available"; \
|
||||
else \
|
||||
echo "❌ wget not found"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
if command -v curl >/dev/null 2>&1; then \
|
||||
echo "📦 curl available"; \
|
||||
else \
|
||||
echo "❌ curl not found"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
if command -v git >/dev/null 2>&1; then \
|
||||
echo "📦 git available"; \
|
||||
else \
|
||||
echo "❌ git not found"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
\
|
||||
echo "🧹 Cleaning package cache..." && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
\
|
||||
end_time=$(date +%s) && \
|
||||
duration=$((end_time - start_time)) && \
|
||||
echo "✅ === SYSTEM DEPENDENCIES COMPLETE === (${duration}s)" && \
|
||||
echo "📊 Free disk space: $(df -h / | tail -1 | awk '{print $4}')"
|
||||
|
||||
# === PHASE 2: SYSTEM DEPENDENCIES ===
|
||||
RUN echo "🚀 === STARTING INKSCAPE INSTALLATION ===" && \
|
||||
echo "⏰ Estimated time: 1-2 minutes" && \
|
||||
start_time=$(date +%s) && \
|
||||
\
|
||||
echo "🔄 Adding Inkscape PPA repository..." && \
|
||||
apt-get update && \
|
||||
echo "📦 Installing software-properties-common..." && \
|
||||
apt-get install -y software-properties-common && \
|
||||
echo "📦 software-properties-common installed" && \
|
||||
echo "🔧 Adding Inkscape PPA..." && \
|
||||
add-apt-repository ppa:inkscape.dev/stable -y && \
|
||||
echo "📦 Inkscape PPA added" && \
|
||||
\
|
||||
echo "📦 Installing Inkscape..." && \
|
||||
apt-get update && \
|
||||
apt-get install -y inkscape && \
|
||||
echo "📦 Inkscape installed" && \
|
||||
\
|
||||
echo "🧹 Cleaning package cache..." && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
\
|
||||
end_time=$(date +%s) && \
|
||||
duration=$((end_time - start_time)) && \
|
||||
echo "✅ === INKSCAPE INSTALLATION COMPLETE === (${duration}s)"
|
||||
|
||||
# Install font dependencies (note: fonts-freefont-ttf already installed above)
|
||||
RUN echo "🚀 === STARTING FONT INSTALLATION ===" && \
|
||||
echo "📦 Installing additional fonts..." && \
|
||||
apt-get update && apt-get install -y \
|
||||
fonts-liberation \
|
||||
fontconfig && \
|
||||
echo "📦 Fonts installed" && \
|
||||
echo "🔧 Updating font cache..." && \
|
||||
fc-cache -fv && \
|
||||
echo "📄 Font cache updated" && \
|
||||
echo "🧹 Cleaning package cache..." && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
echo "✅ Font installation complete"
|
||||
|
||||
# Test Inkscape SVG to PDF conversion (same as your workflow)
|
||||
RUN echo "🚀 === STARTING INKSCAPE TEST ===" && \
|
||||
echo "📋 Creating test SVG file..." && \
|
||||
echo '<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100"><circle cx="50" cy="50" r="40" fill="red"/></svg>' > test.svg && \
|
||||
echo "📄 Test SVG created" && \
|
||||
echo "🔄 Converting SVG to PDF..." && \
|
||||
inkscape --export-type=pdf --export-filename=test.pdf test.svg && \
|
||||
echo "📦 Conversion completed" && \
|
||||
if [ -f test.pdf ]; then \
|
||||
echo "✅ Inkscape SVG to PDF conversion successful!"; \
|
||||
echo "📊 PDF file details:"; \
|
||||
ls -lh test.pdf; \
|
||||
else \
|
||||
echo "❌ Inkscape SVG to PDF conversion failed."; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "🧹 Cleaning up test files..." && \
|
||||
rm -f test.svg test.pdf && \
|
||||
echo "✅ Inkscape test complete"
|
||||
|
||||
# === PHASE 3: GHOSTSCRIPT INSTALLATION ===
|
||||
RUN echo "🚀 === STARTING GHOSTSCRIPT INSTALLATION ===" && \
|
||||
echo "⏰ Estimated time: 30 seconds" && \
|
||||
start_time=$(date +%s) && \
|
||||
\
|
||||
echo "📦 Installing Ghostscript..." && \
|
||||
apt-get update && apt-get install -y ghostscript && \
|
||||
echo "📦 Ghostscript installed" && \
|
||||
echo "📊 Ghostscript version:" && \
|
||||
gs --version && \
|
||||
\
|
||||
echo "🧹 Cleaning package cache..." && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
\
|
||||
end_time=$(date +%s) && \
|
||||
duration=$((end_time - start_time)) && \
|
||||
echo "✅ === GHOSTSCRIPT INSTALLATION COMPLETE === (${duration}s)"
|
||||
|
||||
# === PHASE 4: TEX LIVE INSTALLATION ===
|
||||
RUN echo "🚀 === STARTING TEX LIVE INSTALLATION ===" && \
|
||||
echo "⏰ Estimated time: 8-12 minutes (largest phase)" && \
|
||||
echo "📊 Free disk space before: $(df -h / | tail -1 | awk '{print $4}')" && \
|
||||
start_time=$(date +%s) && \
|
||||
\
|
||||
echo "📦 Installing TeX Live prerequisites..." && \
|
||||
echo "📋 Prerequisites:" && \
|
||||
echo " - perl" && \
|
||||
echo " - wget" && \
|
||||
echo " - xzdec" && \
|
||||
apt-get update && apt-get install -y \
|
||||
perl \
|
||||
wget \
|
||||
xzdec && \
|
||||
echo "📦 Prerequisites installed" && \
|
||||
\
|
||||
echo "🔍 Verifying prerequisites..." && \
|
||||
if command -v perl >/dev/null 2>&1; then \
|
||||
echo "📦 perl available"; \
|
||||
else \
|
||||
echo "❌ perl not found"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
if command -v wget >/dev/null 2>&1; then \
|
||||
echo "📦 wget available"; \
|
||||
else \
|
||||
echo "❌ wget not found"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
if command -v xzdec >/dev/null 2>&1; then \
|
||||
echo "📦 xzdec available"; \
|
||||
else \
|
||||
echo "❌ xzdec not found"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
\
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
\
|
||||
echo "🔄 Downloading TeX Live installer (~4MB)..." && \
|
||||
wget -O /tmp/install-tl-unx.tar.gz "https://mirror.ctan.org/systems/texlive/tlnet/install-tl-unx.tar.gz" && \
|
||||
echo "📥 Download completed" && \
|
||||
echo "📊 Downloaded file size:" && \
|
||||
ls -lh /tmp/install-tl-unx.tar.gz && \
|
||||
\
|
||||
echo "📦 Extracting TeX Live installer..." && \
|
||||
cd /tmp && tar -xzf install-tl-unx.tar.gz && \
|
||||
echo "📦 Extraction completed" && \
|
||||
echo "📊 Extracted files:" && \
|
||||
ls -la /tmp/install-tl-* && \
|
||||
\
|
||||
echo "🔧 Creating TeX Live installation profile..." && \
|
||||
echo "selected_scheme scheme-medium" > /tmp/texlive.profile && \
|
||||
echo "tlpdbopt_install_docfiles 0" >> /tmp/texlive.profile && \
|
||||
echo "tlpdbopt_install_srcfiles 0" >> /tmp/texlive.profile && \
|
||||
echo "TEXDIR /usr/local/texlive" >> /tmp/texlive.profile && \
|
||||
echo "TEXMFCONFIG /usr/local/texlive/texmf-config" >> /tmp/texlive.profile && \
|
||||
echo "TEXMFHOME /usr/local/texlive/texmf-home" >> /tmp/texlive.profile && \
|
||||
echo "TEXMFLOCAL /usr/local/texlive/texmf-local" >> /tmp/texlive.profile && \
|
||||
echo "TEXMFSYSCONFIG /usr/local/texlive/texmf-config" >> /tmp/texlive.profile && \
|
||||
echo "TEXMFSYSVAR /usr/local/texlive/texmf-var" >> /tmp/texlive.profile && \
|
||||
echo "TEXMFVAR /usr/local/texlive/texmf-var" >> /tmp/texlive.profile && \
|
||||
echo "📄 Profile created" && \
|
||||
echo "📊 Profile contents:" && \
|
||||
cat /tmp/texlive.profile && \
|
||||
\
|
||||
echo "🔄 Installing TeX Live base system..." && \
|
||||
/tmp/install-tl-*/install-tl --profile=/tmp/texlive.profile && \
|
||||
echo "📦 TeX Live base system installed" && \
|
||||
\
|
||||
echo "🔧 Setting up TeX Live PATH..." && \
|
||||
echo 'export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH' >> /etc/bash.bashrc && \
|
||||
export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH && \
|
||||
echo "🔧 TeX Live PATH configured" && \
|
||||
\
|
||||
echo "📊 Analyzing tl_packages file..." && \
|
||||
collection_count=$(grep -c '^collection-' /tmp/tl_packages) && \
|
||||
echo "📦 Found $collection_count TeX Live collections to install" && \
|
||||
\
|
||||
echo "🔄 Installing TeX Live collections..." && \
|
||||
export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH && \
|
||||
echo "📍 Checking if tlmgr is available..." && \
|
||||
if command -v tlmgr >/dev/null 2>&1; then \
|
||||
echo "📦 tlmgr available"; \
|
||||
tlmgr --version | head -1; \
|
||||
else \
|
||||
echo "❌ tlmgr not found or not working"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
i=1 && \
|
||||
while IFS= read -r collection; do \
|
||||
case "$collection" in \
|
||||
collection-*) \
|
||||
echo "📦 [$i/$collection_count] Installing $collection..."; \
|
||||
if command -v tlmgr >/dev/null 2>&1; then \
|
||||
tlmgr install "$collection" || echo "⚠️ Failed to install $collection, continuing..."; \
|
||||
else \
|
||||
echo "⚠️ tlmgr not available, skipping $collection"; \
|
||||
fi; \
|
||||
i=$((i+1)); \
|
||||
;; \
|
||||
esac; \
|
||||
done < /tmp/tl_packages && \
|
||||
\
|
||||
echo "🧹 Cleaning up TeX Live installer..." && \
|
||||
rm -rf /tmp/install-tl-* /tmp/texlive.profile /tmp/install-tl-unx.tar.gz && \
|
||||
\
|
||||
end_time=$(date +%s) && \
|
||||
duration=$((end_time - start_time)) && \
|
||||
echo "✅ === TEX LIVE INSTALLATION COMPLETE === (${duration}s)" && \
|
||||
echo "📊 Free disk space after: $(df -h / | tail -1 | awk '{print $4}')" && \
|
||||
echo "📊 TeX Live disk usage: $(du -sh /usr/local/texlive 2>/dev/null || echo 'N/A')"
|
||||
|
||||
# Verify TeX Live installation (with error handling)
|
||||
RUN echo "🔄 Verifying TeX Live installation..." && \
|
||||
export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH && \
|
||||
echo "📍 PATH: $PATH" && \
|
||||
echo "📍 Checking TeX Live directory:" && \
|
||||
ls -la /usr/local/texlive/ || echo "❌ TeX Live directory not found" && \
|
||||
ls -la /usr/local/texlive/bin/ || echo "❌ TeX Live bin directory not found" && \
|
||||
if [ -f /usr/local/texlive/bin/x86_64-linux/lualatex ]; then \
|
||||
echo "✅ lualatex found"; \
|
||||
/usr/local/texlive/bin/x86_64-linux/lualatex --version | head -1; \
|
||||
else \
|
||||
echo "❌ lualatex not found, checking for alternative locations"; \
|
||||
find /usr/local/texlive -name "lualatex" -type f 2>/dev/null || echo "No lualatex found anywhere"; \
|
||||
fi && \
|
||||
echo "✅ TeX Live verification complete (allowing partial failures)"
|
||||
|
||||
# === PHASE 6: R INSTALLATION ===
|
||||
RUN echo "🚀 === STARTING R INSTALLATION ===" && \
|
||||
echo "⏰ Estimated time: 1-2 minutes" && \
|
||||
start_time=$(date +%s) && \
|
||||
\
|
||||
echo "📦 Installing R and development packages..." && \
|
||||
echo "📋 R packages:" && \
|
||||
echo " - r-base" && \
|
||||
echo " - r-base-dev" && \
|
||||
echo " - r-recommended" && \
|
||||
apt-get update && apt-get install -y \
|
||||
r-base \
|
||||
r-base-dev \
|
||||
r-recommended && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ Failed to install R packages"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "📦 R packages installed" && \
|
||||
\
|
||||
echo "🧹 Cleaning package cache..." && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
\
|
||||
echo "📊 R version: $(R --version | head -1)" && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ Failed to verify R installation"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
\
|
||||
end_time=$(date +%s) && \
|
||||
duration=$((end_time - start_time)) && \
|
||||
echo "✅ === R INSTALLATION COMPLETE === (${duration}s)"
|
||||
|
||||
# === PHASE 7: PYTHON INSTALLATION ===
|
||||
RUN echo "🚀 === STARTING PYTHON INSTALLATION ===" && \
|
||||
echo "⏰ Estimated time: 1 minute" && \
|
||||
start_time=$(date +%s) && \
|
||||
\
|
||||
echo "📦 Installing Python 3 and development packages..." && \
|
||||
echo "📋 Python packages:" && \
|
||||
echo " - python3" && \
|
||||
echo " - python3-pip" && \
|
||||
echo " - python3-dev" && \
|
||||
apt-get update && apt-get install -y \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-dev && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ Failed to install Python packages"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "📦 Python packages installed" && \
|
||||
\
|
||||
echo "🧹 Cleaning package cache..." && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
\
|
||||
echo "📊 Python version: $(python3 --version)" && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ Failed to verify Python installation"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "📊 Pip version: $(pip3 --version)" && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ Failed to verify pip installation"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
\
|
||||
end_time=$(date +%s) && \
|
||||
duration=$((end_time - start_time)) && \
|
||||
echo "✅ === PYTHON INSTALLATION COMPLETE === (${duration}s)"
|
||||
|
||||
# === PHASE 5: QUARTO INSTALLATION ===
|
||||
RUN echo "🚀 === STARTING QUARTO INSTALLATION ===" && \
|
||||
echo "⏰ Estimated time: 1 minute" && \
|
||||
start_time=$(date +%s) && \
|
||||
\
|
||||
echo "📦 Downloading Quarto 1.7.31..." && \
|
||||
wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.7.31/quarto-1.7.31-linux-amd64.deb && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ Failed to download Quarto"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "📥 Download completed" && \
|
||||
echo "📊 Downloaded file size:" && \
|
||||
ls -lh quarto-1.7.31-linux-amd64.deb && \
|
||||
\
|
||||
echo "📦 Installing Quarto..." && \
|
||||
dpkg -i quarto-1.7.31-linux-amd64.deb && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ Failed to install Quarto"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "📦 Quarto installed" && \
|
||||
\
|
||||
echo "🧹 Cleaning up installer..." && \
|
||||
rm quarto-1.7.31-linux-amd64.deb && \
|
||||
echo "🧹 Installer cleaned up" && \
|
||||
\
|
||||
end_time=$(date +%s) && \
|
||||
duration=$((end_time - start_time)) && \
|
||||
echo "✅ === QUARTO INSTALLATION COMPLETE === (${duration}s)"
|
||||
|
||||
# Create R library directory
|
||||
RUN echo "🚀 === STARTING R LIBRARY SETUP ===" && \
|
||||
echo "📁 Creating R library directory..." && \
|
||||
mkdir -p $R_LIBS_USER && \
|
||||
echo "✅ R library directory created: $R_LIBS_USER" && \
|
||||
echo "✅ R library setup complete"
|
||||
|
||||
|
||||
|
||||
# === PHASE 8: PYTHON PACKAGES ===
|
||||
RUN echo "🚀 === STARTING PYTHON PACKAGE INSTALLATION ===" && \
|
||||
echo "⏰ Estimated time: 1-2 minutes" && \
|
||||
start_time=$(date +%s) && \
|
||||
\
|
||||
echo "🔄 Upgrading pip..." && \
|
||||
pip3 install --upgrade pip && \
|
||||
\
|
||||
echo "📊 Analyzing requirements.txt..." && \
|
||||
package_count=$(grep -v '^#' /tmp/requirements.txt | grep -v '^$' | wc -l) && \
|
||||
echo "📦 Found $package_count Python packages to install" && \
|
||||
\
|
||||
echo "🔄 Installing Python packages with space optimization..." && \
|
||||
pip3 install --no-cache-dir -r /tmp/requirements.txt && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ Failed to install Python packages"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
\
|
||||
echo "🧹 Cleaning Python installation caches..." && \
|
||||
pip3 cache purge && \
|
||||
find /usr -name "*.pyc" -delete && \
|
||||
find /usr -name "__pycache__" -type d -exec rm -rf {} + || true && \
|
||||
\
|
||||
echo "📊 Installed Python packages:" && \
|
||||
pip3 list | head -10 && \
|
||||
echo "📊 Total packages: $(pip3 list | wc -l)" && \
|
||||
\
|
||||
end_time=$(date +%s) && \
|
||||
duration=$((end_time - start_time)) && \
|
||||
echo "✅ === PYTHON PACKAGES COMPLETE === (${duration}s)"
|
||||
|
||||
# === PHASE 9: R PACKAGES ===
|
||||
# Step 9.1: Set up R environment and install remotes
|
||||
RUN echo "🚀 === STEP 9.1: SETTING UP R ENVIRONMENT ===" && \
|
||||
R --slave -e " \
|
||||
options(repos = c(CRAN = 'https://cran.rstudio.com')); \
|
||||
cat('🔄 Setting up R environment...\n'); \
|
||||
cat(paste('R library path:', Sys.getenv('R_LIBS_USER'), '\n')); \
|
||||
lib_path <- Sys.getenv('R_LIBS_USER'); \
|
||||
dir.create(lib_path, showWarnings = FALSE, recursive = TRUE); \
|
||||
.libPaths(lib_path); \
|
||||
cat('📦 Installing remotes package...\n'); \
|
||||
install.packages('remotes'); \
|
||||
cat('✅ R environment setup complete\n'); \
|
||||
" && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ Failed to set up R environment"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "✅ R environment setup successful"
|
||||
|
||||
# Step 9.2: Install R packages from install_packages.R or fallback
|
||||
RUN echo "🚀 === STEP 9.2: INSTALLING R PACKAGES ===" && \
|
||||
R --slave -e " \
|
||||
options(repos = c(CRAN = 'https://cran.rstudio.com')); \
|
||||
lib_path <- Sys.getenv('R_LIBS_USER'); \
|
||||
.libPaths(lib_path); \
|
||||
if (file.exists('/tmp/install_packages.R')) { \
|
||||
cat('📦 Installing packages from tools/dependencies/install_packages.R...\n'); \
|
||||
source('/tmp/install_packages.R'); \
|
||||
} else { \
|
||||
cat('⚠️ No tools/dependencies/install_packages.R found, installing common packages\n'); \
|
||||
pkgs <- c('rmarkdown', 'knitr', 'tidyverse', 'ggplot2', 'bookdown'); \
|
||||
cat(paste('📦 Installing packages:', paste(pkgs, collapse=', '), '\n')); \
|
||||
install.packages(pkgs); \
|
||||
}; \
|
||||
cat('✅ R package installation complete\n'); \
|
||||
" && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ Failed to install R packages"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "✅ R packages installed successfully"
|
||||
|
||||
# Step 9.3: Verify R package installation
|
||||
RUN echo "🚀 === STEP 9.3: VERIFYING R PACKAGES ===" && \
|
||||
R --slave -e " \
|
||||
lib_path <- Sys.getenv('R_LIBS_USER'); \
|
||||
.libPaths(lib_path); \
|
||||
cat('📊 Installed packages:\n'); \
|
||||
ip <- installed.packages()[, 'Package']; \
|
||||
print(head(ip, 10)); \
|
||||
cat(paste('Total packages installed:', nrow(ip), '\n')); \
|
||||
" && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ Failed to verify R packages"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "✅ R package verification successful"
|
||||
|
||||
# === PHASE 10: R PACKAGE VERIFICATION ===
|
||||
RUN echo "🔍 Verifying R package installation..." && \
|
||||
Rscript /tmp/verify_r_packages.R && \
|
||||
if [ $? -ne 0 ]; then \
|
||||
echo "❌ R package verification failed"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "✅ R package verification successful"
|
||||
|
||||
# === PHASE 11: COMPREHENSIVE CLEANUP ===
|
||||
RUN echo "🚀 === STARTING COMPREHENSIVE CLEANUP ===" && \
|
||||
echo "📊 Disk space before cleanup: $(df -h / | tail -1 | awk '{print $4}')" && \
|
||||
start_time=$(date +%s) && \
|
||||
\
|
||||
echo "🧹 Removing temporary files..." && \
|
||||
rm -rf /tmp/* && \
|
||||
rm -rf /var/tmp/* && \
|
||||
\
|
||||
echo "🧹 Cleaning package caches..." && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
rm -rf /var/cache/apt/* && \
|
||||
\
|
||||
echo "🧹 Cleaning Python caches..." && \
|
||||
find /usr -name "*.pyc" -delete && \
|
||||
find /usr -name "__pycache__" -type d -exec rm -rf {} + || true && \
|
||||
pip3 cache purge || true && \
|
||||
\
|
||||
echo "🧹 Cleaning R temporary files..." && \
|
||||
rm -rf /tmp/Rtmp* || true && \
|
||||
rm -rf /var/lib/R/site-library/*/help || true && \
|
||||
\
|
||||
echo "🧹 Cleaning TeX Live caches and docs..." && \
|
||||
rm -rf /usr/local/texlive/*/texmf-var/luatex-cache/* || true && \
|
||||
rm -rf /usr/local/texlive/*/texmf-var/web2c/* || true && \
|
||||
\
|
||||
echo "🧹 Removing unnecessary system files..." && \
|
||||
rm -rf /usr/share/doc/* && \
|
||||
rm -rf /usr/share/man/* && \
|
||||
rm -rf /usr/share/info/* && \
|
||||
rm -rf /var/log/* && \
|
||||
\
|
||||
end_time=$(date +%s) && \
|
||||
duration=$((end_time - start_time)) && \
|
||||
echo "✅ === COMPREHENSIVE CLEANUP COMPLETE === (${duration}s)" && \
|
||||
echo "📊 Final disk space: $(df -h / | tail -1 | awk '{print $4}')"
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /workspace
|
||||
|
||||
# Verify installations
|
||||
RUN echo "🚀 === STARTING FINAL VERIFICATION ===" && \
|
||||
export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH && \
|
||||
echo "📊 Checking Quarto..." && \
|
||||
if command -v quarto >/dev/null 2>&1; then \
|
||||
quarto --version && \
|
||||
echo "✅ Quarto verified"; \
|
||||
else \
|
||||
echo "❌ Quarto not found in PATH"; \
|
||||
echo "📍 Checking for quarto in common locations:"; \
|
||||
find /usr -name "quarto" -type f 2>/dev/null || echo "No quarto found"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "📊 Checking Python..." && \
|
||||
if command -v python3 >/dev/null 2>&1; then \
|
||||
python3 --version && \
|
||||
echo "✅ Python verified"; \
|
||||
else \
|
||||
echo "❌ Python3 not found"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "📊 Checking R..." && \
|
||||
if command -v R >/dev/null 2>&1; then \
|
||||
R --version && \
|
||||
echo "✅ R verified"; \
|
||||
else \
|
||||
echo "❌ R not found"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "📊 Checking LaTeX..." && \
|
||||
if command -v lualatex >/dev/null 2>&1; then \
|
||||
lualatex --version && \
|
||||
echo "✅ LaTeX verified"; \
|
||||
else \
|
||||
echo "❌ lualatex not found"; \
|
||||
echo "📍 Checking for lualatex in TeX Live:"; \
|
||||
find /usr/local/texlive -name "lualatex" -type f 2>/dev/null || echo "No lualatex found"; \
|
||||
exit 1; \
|
||||
fi && \
|
||||
echo "✅ Final verification complete"
|
||||
|
||||
# Health check
|
||||
RUN export PATH=/usr/local/texlive/bin/x86_64-linux:$PATH && \
|
||||
echo "✅ Container build completed successfully" && \
|
||||
echo "📊 Quarto version: $(quarto --version)" && \
|
||||
echo "📊 Python version: $(python3 --version)" && \
|
||||
echo "📊 R version: $(R --version | head -1)" && \
|
||||
echo "📊 TeX Live: $(lualatex --version | head -1)"
|
||||
@@ -1,109 +0,0 @@
|
||||
# Quarto Build Container
|
||||
|
||||
This directory contains the Docker container configuration for the MLSysBook build system.
|
||||
|
||||
## Purpose
|
||||
|
||||
The container pre-installs all dependencies to eliminate the 30-45 minute setup time for Linux builds, reducing build times from 45 minutes to 5-10 minutes.
|
||||
|
||||
## Structure
|
||||
|
||||
```
|
||||
docker/quarto-build/
|
||||
├── Dockerfile # Container definition
|
||||
├── README.md # This file
|
||||
└── .dockerignore # Files to exclude from build
|
||||
```
|
||||
|
||||
## Container Contents
|
||||
|
||||
- **Base**: Ubuntu 22.04
|
||||
- **TeX Live**: Full distribution (texlive-full)
|
||||
- **R**: R-base with all required packages
|
||||
- **Python**: Python 3.13 with all requirements
|
||||
- **Quarto**: Version 1.7.31
|
||||
- **Tools**: Inkscape, Ghostscript, fonts
|
||||
- **Dependencies**: All from `tools/dependencies/`
|
||||
|
||||
## Build Process
|
||||
|
||||
The container is built and tested via GitHub Actions:
|
||||
|
||||
```bash
|
||||
# Trigger container build
|
||||
gh workflow run build-container.yml
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
The container is used in the containerized build workflow:
|
||||
|
||||
```yaml
|
||||
container:
|
||||
image: ghcr.io/harvard-edge/cs249r_book/quarto-build:latest
|
||||
options: --user root
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
The container build includes 17 comprehensive tests:
|
||||
|
||||
1. Quarto functionality
|
||||
2. Python packages (all from requirements.txt)
|
||||
3. R packages (all from install_packages.R)
|
||||
4. TeX Live and LaTeX engines
|
||||
5. Inkscape SVG to PDF conversion
|
||||
6. Ghostscript PDF compression
|
||||
7. Fonts and graphics libraries
|
||||
8. Quarto render test
|
||||
9. TikZ compilation test
|
||||
10. System resources check
|
||||
11. Network connectivity
|
||||
12. Book structure compatibility
|
||||
13. Quarto configuration files
|
||||
14. Dependencies files accessibility
|
||||
15. Quarto check (same as workflow)
|
||||
16. Actual build process simulation
|
||||
17. Memory and disk space verification
|
||||
|
||||
## Registry
|
||||
|
||||
- **Registry**: GitHub Container Registry (ghcr.io)
|
||||
- **Image**: `ghcr.io/harvard-edge/cs249r_book/quarto-build`
|
||||
- **Tags**: `latest`, `main`, `dev`, branch-specific tags
|
||||
- **Size**: ~2-3GB (includes TeX Live, R, Python packages)
|
||||
|
||||
## Performance
|
||||
|
||||
The container reduces build times significantly:
|
||||
- **Traditional Linux build**: 45 minutes (including dependency installation)
|
||||
- **Containerized build**: 5-10 minutes (dependencies pre-installed)
|
||||
- **Container size**: ~2-3GB (optimized with multi-layer cleanup)
|
||||
- **Build phases**: 11 optimized phases with progress tracking
|
||||
|
||||
## Recent Improvements (2025)
|
||||
|
||||
- Fixed dependency path issues after repository restructuring
|
||||
- Improved error handling and progress tracking
|
||||
- Optimized TeX Live package installation
|
||||
- Enhanced cleanup procedures for smaller image size
|
||||
- Added comprehensive testing (17 test scenarios)
|
||||
- Fixed PATH environment variables for all tools
|
||||
|
||||
## Build Phases
|
||||
|
||||
1. **System Dependencies** - Core Ubuntu packages and libraries
|
||||
2. **Inkscape Installation** - SVG to PDF conversion capability
|
||||
3. **Quarto Installation** - Latest Quarto CLI (v1.7.31)
|
||||
4. **TeX Live Installation** - Complete LaTeX distribution
|
||||
5. **Ghostscript Installation** - PDF processing capabilities
|
||||
6. **R Installation** - R base and development packages
|
||||
7. **Python Installation** - Python 3 with pip
|
||||
8. **Python Packages** - All production requirements
|
||||
9. **R Packages** - All required R libraries
|
||||
10. **R Package Verification** - Validation of successful installation
|
||||
11. **Comprehensive Cleanup** - Size optimization and cache clearing
|
||||
|
||||
- **Traditional build**: 45 minutes
|
||||
- **Containerized build**: 5-10 minutes
|
||||
- **Improvement**: 80-90% time reduction
|
||||
@@ -1,13 +0,0 @@
|
||||
#!/usr/bin/env Rscript
|
||||
|
||||
# Verify R package installation
|
||||
source('/tmp/install_packages.R')
|
||||
|
||||
missing_packages <- required_packages[!sapply(required_packages, requireNamespace, quietly = TRUE)]
|
||||
|
||||
if(length(missing_packages) > 0) {
|
||||
cat('❌ Missing packages:', paste(missing_packages, collapse = ', '), '\n')
|
||||
quit(status = 1)
|
||||
} else {
|
||||
cat('✅ All required R packages installed successfully\n')
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
# Exclude unnecessary files from Docker build context
|
||||
# This reduces build time and image size
|
||||
|
||||
# Build artifacts
|
||||
build/
|
||||
_book/
|
||||
_site/
|
||||
*.pdf
|
||||
*.html
|
||||
|
||||
# Git and version control
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# Documentation
|
||||
docs/
|
||||
*.md
|
||||
!docker/quarto-build-windows/README.md
|
||||
|
||||
# IDE and editor files
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Logs and temporary files
|
||||
*.log
|
||||
*.tmp
|
||||
*.temp
|
||||
|
||||
# Node modules (if any)
|
||||
node_modules/
|
||||
|
||||
# Python cache
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
|
||||
# R cache
|
||||
.Rhistory
|
||||
.RData
|
||||
|
||||
# Large media files (not needed for build)
|
||||
assets/media/
|
||||
*.mp4
|
||||
*.avi
|
||||
*.mov
|
||||
|
||||
# Test files
|
||||
test-*
|
||||
*.test.*
|
||||
|
||||
# Backup files
|
||||
*.bak
|
||||
*.backup
|
||||
|
||||
# Large data files
|
||||
data/
|
||||
*.csv
|
||||
*.json
|
||||
*.xml
|
||||
|
||||
# Keep only essential files for build
|
||||
# - tools/dependencies/ (needed for package installation)
|
||||
# - book/ (needed for build testing)
|
||||
# - .github/workflows/ (needed for workflow files)
|
||||
@@ -1,369 +0,0 @@
|
||||
# escape=`
|
||||
# MLSysBook Windows Quarto Build Container (Windows Server 2022)
|
||||
# - PowerShell 7 via ZIP (no MSI)
|
||||
# - Quarto 1.7.31 via ZIP (no MSI)
|
||||
# - Python 3.13.1 + requirements
|
||||
# - Ghostscript + Inkscape (Chocolatey)
|
||||
# - TeX Live pinned to 2025 snapshot + packages from tl_packages
|
||||
# - R 4.3.2 + packages via install_packages.R
|
||||
# - Verifications: versions, kpsewhich font files, TikZ smoke test
|
||||
|
||||
FROM mcr.microsoft.com/windows/server:ltsc2022
|
||||
|
||||
# Use Windows PowerShell initially
|
||||
SHELL ["powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass", "-Command"]
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 0: Base dirs and env (same as quarto-build workflow)
|
||||
# ------------------------------------------------------------
|
||||
ENV R_LIBS_USER="C:/r-lib"
|
||||
ENV QUARTO_LOG_LEVEL="INFO"
|
||||
ENV PYTHONIOENCODING="utf-8"
|
||||
ENV LANG="en_US.UTF-8"
|
||||
ENV LC_ALL="en_US.UTF-8"
|
||||
|
||||
RUN Write-Host '=== STARTING BASE SETUP ===' ; `
|
||||
Write-Host 'Creating base directories...' ; `
|
||||
New-Item -ItemType Directory -Force -Path 'C:\temp' | Out-Null ; `
|
||||
Write-Host '📁 Created C:\temp' ; `
|
||||
New-Item -ItemType Directory -Force -Path 'C:\r-lib' | Out-Null ; `
|
||||
Write-Host '📁 Created C:\r-lib' ; `
|
||||
Write-Host 'Environment variables set:' ; `
|
||||
Write-Host " R_LIBS_USER: $env:R_LIBS_USER" ; `
|
||||
Write-Host " QUARTO_LOG_LEVEL: $env:QUARTO_LOG_LEVEL" ; `
|
||||
Write-Host " PYTHONIOENCODING: $env:PYTHONIOENCODING" ; `
|
||||
Write-Host " LANG: $env:LANG" ; `
|
||||
Write-Host " LC_ALL: $env:LC_ALL" ; `
|
||||
Write-Host '✅ Base setup complete'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 1: PowerShell 7 (ZIP install, container-safe)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING POWERSHELL 7 INSTALLATION ===' ; `
|
||||
Write-Host 'Using ZIP install for container compatibility' ; `
|
||||
Write-Host 'Download URL: https://github.com/PowerShell/PowerShell/releases/download/v7.4.1/PowerShell-7.4.1-win-x64.zip' ; `
|
||||
$Url = 'https://github.com/PowerShell/PowerShell/releases/download/v7.4.1/PowerShell-7.4.1-win-x64.zip' ; `
|
||||
$Zip = 'C:\PowerShell-7.4.1.zip' ; `
|
||||
Write-Host "Downloading PowerShell 7 to: $Zip" ; `
|
||||
Invoke-WebRequest -Uri $Url -OutFile $Zip -UseBasicParsing ; `
|
||||
Write-Host '📥 Download completed' ; `
|
||||
Write-Host 'Creating PowerShell directory...' ; `
|
||||
New-Item -ItemType Directory -Force -Path 'C:\Program Files\PowerShell\7' | Out-Null ; `
|
||||
Write-Host '📁 Directory created' ; `
|
||||
Write-Host 'Extracting ZIP file...' ; `
|
||||
Expand-Archive -Path $Zip -DestinationPath 'C:\Program Files\PowerShell\7' -Force ; `
|
||||
Write-Host '📦 Extraction completed' ; `
|
||||
Write-Host 'Cleaning up ZIP file...' ; `
|
||||
Remove-Item $Zip -Force ; `
|
||||
Write-Host '🧹 Cleanup completed' ; `
|
||||
Write-Host 'Adding PowerShell to PATH...' ; `
|
||||
$mach = [Environment]::GetEnvironmentVariable('PATH','Machine') ; `
|
||||
Write-Host "Current PATH: $mach" ; `
|
||||
if ($mach -notmatch [regex]::Escape('C:\Program Files\PowerShell\7')) { `
|
||||
[Environment]::SetEnvironmentVariable('PATH', ('C:\Program Files\PowerShell\7;' + $mach), 'Machine') ; `
|
||||
Write-Host '🔗 PowerShell added to PATH' ; `
|
||||
} else { `
|
||||
Write-Host '⚠️ PowerShell already in PATH' ; `
|
||||
} ; `
|
||||
Write-Host 'Verifying PowerShell installation...' ; `
|
||||
& 'C:\Program Files\PowerShell\7\pwsh.exe' -NoLogo -Command '$PSVersionTable.PSVersion ; Write-Host ''PowerShell 7 installation verified ✅'''
|
||||
|
||||
# Switch to PowerShell 7 for subsequent layers
|
||||
SHELL ["C:\\Program Files\\PowerShell\\7\\pwsh.exe", "-NoLogo", "-ExecutionPolicy", "Bypass", "-Command"]
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 2: Chocolatey (package manager for Windows)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING CHOCOLATEY INSTALLATION ===' ; `
|
||||
Write-Host 'Installing Chocolatey package manager...' ; `
|
||||
Write-Host 'Setting TLS 1.2 for download...' ; `
|
||||
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 ; `
|
||||
Write-Host '🔒 TLS 1.2 enabled' ; `
|
||||
Write-Host 'Downloading and executing Chocolatey install script...' ; `
|
||||
iex ((New-Object Net.WebClient).DownloadString('https://chocolatey.org/install.ps1')) ; `
|
||||
Write-Host '📦 Chocolatey install script executed' ; `
|
||||
Write-Host 'Verifying Chocolatey installation...' ; `
|
||||
choco --version ; `
|
||||
Write-Host '✅ Chocolatey installation complete'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 3: Copy dependency files (same as quarto-build workflow)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING DEPENDENCY FILE COPY ==='
|
||||
COPY tools/dependencies/requirements.txt C:/temp/requirements.txt
|
||||
COPY tools/dependencies/install_packages.R C:/temp/install_packages.R
|
||||
COPY tools/dependencies/tl_packages C:/temp/tl_packages
|
||||
RUN Write-Host '✅ Dependency file copy complete'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 4: Install TeX Live FIRST (Most complex, fail fast)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING TEX LIVE INSTALLATION (2025) ===' ; `
|
||||
Write-Host '📦 Installing TeX Live via Chocolatey...' ; `
|
||||
choco install texlive -y ; `
|
||||
Write-Host '✅ TeX Live installed via Chocolatey' ; `
|
||||
`
|
||||
Write-Host '🔍 Finding TeX Live installation directory...' ; `
|
||||
$texRoot = Join-Path $env:SystemDrive 'texlive' ; `
|
||||
Write-Host "📁 TeX Live root: $texRoot" ; `
|
||||
`
|
||||
Write-Host '🔍 Looking for year-based directories...' ; `
|
||||
$texYearDir = Get-ChildItem $texRoot -Directory | `
|
||||
Where-Object { $_.Name -match '^\d{4}$' } | `
|
||||
Sort-Object Name -Descending | `
|
||||
Select-Object -First 1 ; `
|
||||
Write-Host "📁 Found year directory: $($texYearDir.FullName)" ; `
|
||||
`
|
||||
$texLiveBin = Join-Path $texYearDir.FullName 'bin\windows' ; `
|
||||
Write-Host "📁 TeX Live bin directory: $texLiveBin" ; `
|
||||
`
|
||||
Write-Host '🔧 Adding TeX Live to PATH...' ; `
|
||||
$env:PATH = "$texLiveBin;$env:PATH" ; `
|
||||
Write-Host "✅ PATH updated with: $texLiveBin" ; `
|
||||
`
|
||||
Write-Host '📋 Reading collections from tl_packages...' ; `
|
||||
if (Test-Path 'C:\temp\tl_packages') { `
|
||||
$collections = Get-Content 'C:\temp\tl_packages' | `
|
||||
Where-Object { $_.Trim() -ne '' -and -not $_.Trim().StartsWith('#') } ; `
|
||||
Write-Host "📦 Found $($collections.Count) collections to install" ; `
|
||||
Write-Host '📋 Collections:' ; `
|
||||
$collections | ForEach-Object { Write-Host " - $_" } ; `
|
||||
`
|
||||
Write-Host '🔄 Installing collections...' ; `
|
||||
$i = 1 ; `
|
||||
foreach ($collection in $collections) { `
|
||||
Write-Host "📦 [$i/$($collections.Count)] Installing $collection..." ; `
|
||||
& "$texLiveBin\tlmgr.bat" install $collection ; `
|
||||
if ($LASTEXITCODE -eq 0) { `
|
||||
Write-Host "✅ $collection installed successfully" ; `
|
||||
} else { `
|
||||
Write-Host "⚠️ Failed to install $collection, continuing..." ; `
|
||||
} ; `
|
||||
$i++ ; `
|
||||
} ; `
|
||||
Write-Host '✅ Collection installation complete' ; `
|
||||
} else { `
|
||||
Write-Host '⚠️ No tl_packages file found, skipping collection installation' ; `
|
||||
} ; `
|
||||
`
|
||||
Write-Host '🔄 Updating tlmgr...' ; `
|
||||
& "$texLiveBin\tlmgr.bat" update --self --all ; `
|
||||
Write-Host '✅ tlmgr updated' ; `
|
||||
`
|
||||
Write-Host '🔍 Verifying lualatex installation...' ; `
|
||||
& "$texLiveBin\lualatex.exe" --version ; `
|
||||
Write-Host '✅ TeX Live installation verified'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 5: Install Scoop (Package manager setup)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING SCOOP INSTALLATION ===' ; `
|
||||
Write-Host 'Setting UTF-8 encoding...' ; `
|
||||
[Console]::OutputEncoding = [System.Text.Encoding]::UTF8 ; `
|
||||
$OutputEncoding = [System.Text.Encoding]::UTF8 ; `
|
||||
Write-Host '🔤 UTF-8 encoding set' ; `
|
||||
Write-Host 'Setting execution policy...' ; `
|
||||
Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser -Force ; `
|
||||
Write-Host '🔐 Execution policy set' ; `
|
||||
Write-Host 'Installing Scoop package manager...' ; `
|
||||
Invoke-WebRequest -useb get.scoop.sh -outfile 'install.ps1' ; `
|
||||
Write-Host '📥 Scoop install script downloaded' ; `
|
||||
& .\install.ps1 -RunAsAdmin ; `
|
||||
Write-Host '📦 Scoop installed' ; `
|
||||
Write-Host 'Adding Scoop shims to PATH...' ; `
|
||||
$scoopShims = Join-Path (Resolve-Path ~).Path 'scoop\shims' ; `
|
||||
Write-Host "Scoop shims path: $scoopShims" ; `
|
||||
$mach = [Environment]::GetEnvironmentVariable('PATH','Machine') ; `
|
||||
[Environment]::SetEnvironmentVariable('PATH', ($scoopShims + ';' + $mach), 'Machine') ; `
|
||||
Write-Host '🔗 Added Scoop shims to PATH' ; `
|
||||
Write-Host 'Installing Git (required for buckets)...' ; `
|
||||
scoop install git ; `
|
||||
Write-Host '📦 Git installed' ; `
|
||||
Write-Host 'Adding r-bucket...' ; `
|
||||
scoop bucket add r-bucket https://github.com/cderv/r-bucket.git ; `
|
||||
Write-Host '📦 r-bucket added' ; `
|
||||
Write-Host 'Adding extras bucket...' ; `
|
||||
scoop bucket add extras ; `
|
||||
Write-Host '📦 extras bucket added' ; `
|
||||
Write-Host '✅ Scoop installation completed!'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 6: Install Quarto (Main tool)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING QUARTO INSTALLATION ===' ; `
|
||||
Write-Host 'Installing Quarto via Scoop...' ; `
|
||||
scoop install quarto ; `
|
||||
Write-Host '📦 Quarto installed' ; `
|
||||
Write-Host 'Verifying Quarto installation...' ; `
|
||||
quarto --version ; `
|
||||
Write-Host '✅ Quarto installation completed!'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 7: Install Python (Medium complexity)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING PYTHON INSTALLATION ===' ; `
|
||||
Write-Host 'Installing Python via Scoop (same as quarto-build workflow)...' ; `
|
||||
Write-Host 'Installing Python from main bucket...' ; `
|
||||
scoop install main/python ; `
|
||||
Write-Host '📦 Python installed' ; `
|
||||
Write-Host 'Verifying Python installation...' ; `
|
||||
python --version ; `
|
||||
Write-Host '✅ Python installation complete'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 8: Install Python packages (Medium complexity)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING PYTHON PACKAGE INSTALLATION ===' ; `
|
||||
Write-Host 'Installing Python packages from requirements.txt (same as quarto-build workflow)...' ; `
|
||||
Write-Host 'Upgrading pip...' ; `
|
||||
python -m pip install --upgrade pip ; `
|
||||
Write-Host '📦 pip upgraded' ; `
|
||||
Write-Host 'Installing packages from requirements.txt...' ; `
|
||||
Write-Host 'Requirements file contents:' ; `
|
||||
Get-Content C:/temp/requirements.txt | Write-Host ; `
|
||||
python -m pip install -r C:/temp/requirements.txt ; `
|
||||
Write-Host '✅ Python package installation complete'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 9: Install Visual C++ Redistributable (Required for Quarto DLLs)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING VISUAL C++ REDISTRIBUTABLE INSTALLATION ===' ; `
|
||||
Write-Host 'Installing Microsoft Visual C++ Redistributable...' ; `
|
||||
Write-Host 'This is required for Quarto DLL dependencies on Windows' ; `
|
||||
choco install vcredist-all -y ; `
|
||||
Write-Host '📦 Visual C++ Redistributable installed' ; `
|
||||
Write-Host '✅ Visual C++ Redistributable installation complete'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 10: Install Ghostscript (required for PDF generation)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING GHOSTSCRIPT INSTALLATION ===' ; `
|
||||
Write-Host 'Installing Ghostscript via Scoop...' ; `
|
||||
scoop install main/ghostscript ; `
|
||||
Write-Host '📦 Ghostscript installed' ; `
|
||||
Write-Host 'Verifying Ghostscript installation...' ; `
|
||||
gs --version ; `
|
||||
Write-Host '✅ Ghostscript installation complete'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 11: Install Inkscape (required for SVG processing)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING INKSCAPE INSTALLATION ===' ; `
|
||||
Write-Host 'Installing Inkscape via Scoop...' ; `
|
||||
scoop install inkscape ; `
|
||||
Write-Host '📦 Inkscape installed' ; `
|
||||
Write-Host 'Verifying Inkscape installation...' ; `
|
||||
inkscape --version ; `
|
||||
Write-Host '✅ Inkscape installation complete'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 12: Install R (Medium complexity)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING R INSTALLATION ===' ; `
|
||||
Write-Host 'Installing R via Scoop (same as quarto-build workflow)...' ; `
|
||||
Write-Host 'Installing R from main bucket...' ; `
|
||||
scoop install main/r ; `
|
||||
Write-Host '📦 R installed' ; `
|
||||
Write-Host 'Verifying R installation...' ; `
|
||||
R --version ; `
|
||||
Write-Host '✅ R installation complete'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 13: Install R packages (Medium complexity)
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== INSTALLING R PACKAGES ===' ; `
|
||||
Write-Host 'Installing R packages from install_packages.R (same as quarto-build workflow)...' ; `
|
||||
Write-Host 'Setting up R environment...' ; `
|
||||
Write-Host "R_LIBS_USER: $env:R_LIBS_USER" ; `
|
||||
Write-Host 'Installing R packages...' ; `
|
||||
Rscript -e 'options(repos=c(CRAN=\"https://cran.rstudio.com\"))' ; `
|
||||
Rscript -e 'dir.create(Sys.getenv(\"R_LIBS_USER\"), recursive=TRUE, showWarnings=FALSE)' ; `
|
||||
Rscript -e '.libPaths(Sys.getenv(\"R_LIBS_USER\"))' ; `
|
||||
Rscript -e 'install.packages(\"remotes\")' ; `
|
||||
if (Test-Path 'C:/temp/install_packages.R') { `
|
||||
Write-Host 'Found install_packages.R, sourcing it...' ; `
|
||||
Rscript 'C:/temp/install_packages.R' ; `
|
||||
} else { `
|
||||
Write-Host 'No install_packages.R found, installing basic packages...' ; `
|
||||
Rscript -e 'install.packages(c(\"rmarkdown\",\"knitr\",\"ggplot2\"))' ; `
|
||||
} ; `
|
||||
Rscript -e 'for (p in c(\"rmarkdown\",\"knitr\")) if (!require(p, character.only=TRUE, quietly=TRUE)) stop(\"missing: \", p)' ; `
|
||||
Write-Host '📦 R packages installed' ; `
|
||||
Write-Host 'Verifying R packages...' ; `
|
||||
Rscript C:/temp/verify_r_packages.R ; `
|
||||
Write-Host '✅ R package installation complete'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# PHASE 14: Cleanup and Environment Setup
|
||||
# ------------------------------------------------------------
|
||||
RUN Write-Host '=== STARTING CLEANUP AND ENVIRONMENT SETUP ===' ; `
|
||||
Write-Host 'Cleaning temporary files and setting up environment...' ; `
|
||||
Write-Host 'Removing temporary files...' ; `
|
||||
Remove-Item C:/temp/requirements.txt -ErrorAction SilentlyContinue ; `
|
||||
Write-Host '🗑️ requirements.txt removed' ; `
|
||||
Remove-Item C:/temp/install_packages.R -ErrorAction SilentlyContinue ; `
|
||||
Write-Host '🗑️ install_packages.R removed' ; `
|
||||
Remove-Item C:/temp/verify_r_packages.R -ErrorAction SilentlyContinue ; `
|
||||
Write-Host '🗑️ verify_r_packages.R removed' ; `
|
||||
Remove-Item C:/temp/tl_packages -ErrorAction SilentlyContinue ; `
|
||||
Write-Host '🗑️ tl_packages removed' ; `
|
||||
Remove-Item C:/temp/requirements/ -Recurse -Force -ErrorAction SilentlyContinue ; `
|
||||
Write-Host '🗑️ requirements/ directory removed' ; `
|
||||
Write-Host 'Setting up environment variables for Quarto...' ; `
|
||||
$env:QUARTO_LOG_LEVEL = 'DEBUG' ; `
|
||||
[Environment]::SetEnvironmentVariable('QUARTO_LOG_LEVEL', 'DEBUG', 'Machine') ; `
|
||||
Write-Host '🔧 QUARTO_LOG_LEVEL set to DEBUG' ; `
|
||||
Write-Host '✅ Cleanup and environment setup complete'
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# FINAL CHECKS: Comprehensive verification with diagnostics
|
||||
# ------------------------------------------------------------
|
||||
WORKDIR C:/workspace
|
||||
RUN Write-Host '=== FINAL VERIFICATION WITH ENHANCED DIAGNOSTICS ===' ; `
|
||||
Write-Host 'Verifying all installations with comprehensive checks...' ; `
|
||||
Write-Host '' ; `
|
||||
Write-Host '🔍 SYSTEM DIAGNOSTICS:' ; `
|
||||
Write-Host '----------------------' ; `
|
||||
Write-Host 'PATH environment variable:' ; `
|
||||
Write-Host $env:PATH ; `
|
||||
Write-Host '' ; `
|
||||
Write-Host 'Visual C++ Redistributable check:' ; `
|
||||
Get-ChildItem 'C:\Windows\System32' -Filter 'msvcp*.dll' | Select-Object Name, Length, LastWriteTime ; `
|
||||
Write-Host '' ; `
|
||||
Write-Host '📊 TOOL VERIFICATION:' ; `
|
||||
Write-Host '---------------------' ; `
|
||||
Write-Host 'Checking Quarto...' ; `
|
||||
try { `
|
||||
quarto --version ; `
|
||||
Write-Host '✅ Quarto version check: PASSED' ; `
|
||||
Write-Host 'Running Quarto check for comprehensive validation...' ; `
|
||||
& quarto check 2>&1 | Write-Host ; `
|
||||
if ($LASTEXITCODE -eq 0) { `
|
||||
Write-Host '✅ Quarto check: PASSED' ; `
|
||||
} else { `
|
||||
Write-Host '⚠️ Quarto check: ISSUES DETECTED' ; `
|
||||
Write-Host "Exit code: $LASTEXITCODE" ; `
|
||||
} ; `
|
||||
} catch { `
|
||||
Write-Host '❌ Quarto verification failed:' ; `
|
||||
Write-Host $_.Exception.Message ; `
|
||||
} ; `
|
||||
Write-Host 'Checking Python...' ; `
|
||||
python --version ; `
|
||||
Write-Host '✅ Python verified' ; `
|
||||
Write-Host 'Checking R...' ; `
|
||||
R --version ; `
|
||||
Write-Host '✅ R verified' ; `
|
||||
Write-Host 'Checking LaTeX...' ; `
|
||||
lualatex --version ; `
|
||||
Write-Host '✅ LaTeX verified' ; `
|
||||
Write-Host 'Checking Ghostscript...' ; `
|
||||
gs --version ; `
|
||||
Write-Host '✅ Ghostscript verified' ; `
|
||||
Write-Host 'Checking Inkscape...' ; `
|
||||
inkscape --version ; `
|
||||
Write-Host '✅ Inkscape verified' ; `
|
||||
Write-Host '' ; `
|
||||
Write-Host '🎯 FINAL STATUS:' ; `
|
||||
Write-Host '----------------' ; `
|
||||
Write-Host '✅ Windows container build completed with enhanced diagnostics'
|
||||
@@ -1,227 +0,0 @@
|
||||
# MINIMAL WINDOWS DOCKERFILE - GHOSTSCRIPT ONLY
|
||||
# Focus on fixing Ghostscript download/install, then add other components back
|
||||
|
||||
# Try Windows Server with more services (closer to GitHub Actions environment)
|
||||
FROM mcr.microsoft.com/windows/server:ltsc2022
|
||||
|
||||
# === PHASE 1: POWERSHELL 7 INSTALLATION (Required for our scripts) ===
|
||||
RUN curl -L -o PowerShell.msi https://github.com/PowerShell/PowerShell/releases/download/v7.4.1/PowerShell-7.4.1-win-x64.msi && \
|
||||
if not exist PowerShell.msi (echo ❌ PowerShell download failed - aborting build && exit 1) && \
|
||||
msiexec /i PowerShell.msi /quiet /norestart && \
|
||||
if %ERRORLEVEL% neq 0 (echo ❌ PowerShell installation failed - aborting build && exit 1) && \
|
||||
del PowerShell.msi
|
||||
|
||||
# Add PowerShell to PATH and test
|
||||
RUN setx PATH "%PATH%;C:\Program Files\PowerShell\7" /M && \
|
||||
"C:\Program Files\PowerShell\7\pwsh.exe" -Command "Write-Host 'PowerShell 7 installation verified'"
|
||||
|
||||
# === MINIMAL SETUP ===
|
||||
RUN mkdir C:\temp
|
||||
|
||||
# === CHOCOLATEY INSTALLATION (for alternative Ghostscript method) ===
|
||||
RUN "C:\Program Files\PowerShell\7\pwsh.exe" -Command " \
|
||||
Write-Host '🚀 === INSTALLING CHOCOLATEY ==='; \
|
||||
try { \
|
||||
Set-ExecutionPolicy Bypass -Scope Process -Force; \
|
||||
[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; \
|
||||
iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1')); \
|
||||
Write-Host '✅ Chocolatey installation complete'; \
|
||||
# Test chocolatey installation \
|
||||
$chocoVersion = & choco --version 2>&1; \
|
||||
if ($LASTEXITCODE -ne 0) { throw 'Chocolatey test failed' }; \
|
||||
Write-Host ('📊 Chocolatey version: ' + $chocoVersion); \
|
||||
} catch { \
|
||||
Write-Host ('❌ Chocolatey installation failed: ' + $_.Exception.Message); \
|
||||
Write-Host '⚠️ Will skip Chocolatey method in Ghostscript testing'; \
|
||||
} \
|
||||
"
|
||||
|
||||
# === GHOSTSCRIPT TESTING - MULTIPLE STRATEGIES ===
|
||||
RUN "C:\Program Files\PowerShell\7\pwsh.exe" -Command " \
|
||||
Write-Host '🚀 === GHOSTSCRIPT DOWNLOAD TESTING ==='; \
|
||||
Write-Host '⏰ Testing multiple download methods...'; \
|
||||
$startTime = Get-Date; \
|
||||
\
|
||||
# Test URLs first \
|
||||
$gsUrls = @( \
|
||||
'https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs10051/gs10051w64.exe', \
|
||||
'https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/latest/download/gs10051w64.exe', \
|
||||
'https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs10051/gs10051w32.exe' \
|
||||
); \
|
||||
\
|
||||
Write-Host '📊 Testing URL accessibility...'; \
|
||||
foreach ($url in $gsUrls) { \
|
||||
Write-Host ('🔍 Testing: ' + $url); \
|
||||
try { \
|
||||
$response = Invoke-WebRequest -Uri $url -Method Head -TimeoutSec 30 -ErrorAction Stop; \
|
||||
Write-Host ('✅ URL accessible - Status: ' + $response.StatusCode + ' Size: ' + $response.Headers['Content-Length']); \
|
||||
} catch { \
|
||||
Write-Host ('❌ URL test failed: ' + $_.Exception.Message); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
Write-Host '📦 === DOWNLOAD METHOD 1: Chocolatey (should work with full Windows image) ==='; \
|
||||
try { \
|
||||
Write-Host '🔄 Testing Chocolatey availability...'; \
|
||||
$chocoTest = & choco --version 2>&1; \
|
||||
if ($LASTEXITCODE -eq 0) { \
|
||||
Write-Host '✅ Chocolatey available, trying Ghostscript installation...'; \
|
||||
Write-Host '📦 Installing Ghostscript via simple Chocolatey command (matching quarto-build.yml)...'; \
|
||||
# Use simple approach like working quarto-build.yml \
|
||||
choco install ghostscript -y; \
|
||||
if ($LASTEXITCODE -eq 0) { \
|
||||
Write-Host '✅ Chocolatey Ghostscript installation SUCCESS!'; \
|
||||
$downloadSuccess = $true; \
|
||||
} else { \
|
||||
Write-Host ('❌ Chocolatey installation failed with exit code: ' + $LASTEXITCODE); \
|
||||
} \
|
||||
} else { \
|
||||
Write-Host '⚠️ Chocolatey not available, trying direct download methods...'; \
|
||||
} \
|
||||
} catch { \
|
||||
Write-Host ('❌ Chocolatey method failed: ' + $_.Exception.Message); \
|
||||
Write-Host '⚠️ Will try direct download methods as fallback...'; \
|
||||
} \
|
||||
\
|
||||
if (-not $downloadSuccess) { \
|
||||
Write-Host '📦 === DOWNLOAD METHOD 2: WebClient (fallback) ==='; \
|
||||
$gsInstaller = 'C:/temp/gs_installer_method2.exe'; \
|
||||
\
|
||||
foreach ($gsUrl in $gsUrls) { \
|
||||
Write-Host ('🔄 WebClient trying: ' + $gsUrl + ' (3 minute timeout)'); \
|
||||
try { \
|
||||
[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; \
|
||||
$webClient = New-Object System.Net.WebClient; \
|
||||
$webClient.Headers.Add('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); \
|
||||
$webClient.Timeout = 180000; # 3 minutes in milliseconds \
|
||||
$webClient.DownloadFile($gsUrl, $gsInstaller); \
|
||||
$webClient.Dispose(); \
|
||||
\
|
||||
if ((Test-Path $gsInstaller) -and ((Get-Item $gsInstaller).Length -gt 1MB)) { \
|
||||
Write-Host ('✅ WebClient SUCCESS - Size: ' + ((Get-Item $gsInstaller).Length / 1MB).ToString('F1') + ' MB'); \
|
||||
$downloadSuccess = $true; \
|
||||
break; \
|
||||
} else { \
|
||||
Write-Host '⚠️ WebClient downloaded but file too small'; \
|
||||
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
|
||||
} \
|
||||
} catch { \
|
||||
Write-Host ('❌ WebClient failed: ' + $_.Exception.Message); \
|
||||
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
if (-not $downloadSuccess) { \
|
||||
Write-Host '📦 === DOWNLOAD METHOD 3: Invoke-WebRequest ==='; \
|
||||
$gsInstaller = 'C:/temp/gs_installer_method3.exe'; \
|
||||
\
|
||||
foreach ($gsUrl in $gsUrls) { \
|
||||
Write-Host ('🔄 Invoke-WebRequest trying: ' + $gsUrl + ' (5 minute timeout)'); \
|
||||
try { \
|
||||
Invoke-WebRequest -Uri $gsUrl -OutFile $gsInstaller -TimeoutSec 300 -UserAgent 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' -UseBasicParsing; \
|
||||
\
|
||||
if ((Test-Path $gsInstaller) -and ((Get-Item $gsInstaller).Length -gt 1MB)) { \
|
||||
Write-Host ('✅ Invoke-WebRequest SUCCESS - Size: ' + ((Get-Item $gsInstaller).Length / 1MB).ToString('F1') + ' MB'); \
|
||||
$downloadSuccess = $true; \
|
||||
break; \
|
||||
} else { \
|
||||
Write-Host '⚠️ Invoke-WebRequest downloaded but file too small'; \
|
||||
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
|
||||
} \
|
||||
} catch { \
|
||||
Write-Host ('❌ Invoke-WebRequest failed: ' + $_.Exception.Message); \
|
||||
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
if (-not $downloadSuccess) { \
|
||||
Write-Host '📦 === DOWNLOAD METHOD 4: curl (final fallback) ==='; \
|
||||
$gsInstaller = 'C:/temp/gs_installer_method4.exe'; \
|
||||
\
|
||||
foreach ($gsUrl in $gsUrls) { \
|
||||
Write-Host ('🔄 curl trying: ' + $gsUrl + ' (5 minute timeout)'); \
|
||||
try { \
|
||||
$curlResult = & curl -L -o $gsInstaller $gsUrl --user-agent 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' --max-time 300 2>&1; \
|
||||
\
|
||||
if ((Test-Path $gsInstaller) -and ((Get-Item $gsInstaller).Length -gt 1MB)) { \
|
||||
Write-Host ('✅ curl SUCCESS - Size: ' + ((Get-Item $gsInstaller).Length / 1MB).ToString('F1') + ' MB'); \
|
||||
$downloadSuccess = $true; \
|
||||
break; \
|
||||
} else { \
|
||||
Write-Host '⚠️ curl downloaded but file too small'; \
|
||||
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
|
||||
} \
|
||||
} catch { \
|
||||
Write-Host ('❌ curl failed: ' + $_.Exception.Message); \
|
||||
if (Test-Path $gsInstaller) { Remove-Item $gsInstaller -ErrorAction SilentlyContinue }; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
\
|
||||
\
|
||||
if ($downloadSuccess) { \
|
||||
Write-Host '🎉 === DOWNLOAD/INSTALL SUCCESSFUL ==='; \
|
||||
\
|
||||
# If we have an installer file, run it \
|
||||
if ((Test-Path $gsInstaller) -and ($gsInstaller -ne '')) { \
|
||||
Write-Host '🔄 Testing manual installation...'; \
|
||||
$installProcess = Start-Process -FilePath $gsInstaller -ArgumentList '/S' -Wait -PassThru -NoNewWindow; \
|
||||
if ($installProcess.ExitCode -ne 0) { \
|
||||
Write-Host ('❌ Manual installation failed with exit code: ' + $installProcess.ExitCode); \
|
||||
throw 'Ghostscript manual installation failed'; \
|
||||
} \
|
||||
Write-Host '✅ Manual installation SUCCESS!'; \
|
||||
} else { \
|
||||
Write-Host '📦 Chocolatey installation already completed, skipping manual install'; \
|
||||
} \
|
||||
\
|
||||
# Test final installation \
|
||||
Write-Host '🧪 Testing Ghostscript functionality...'; \
|
||||
$gsPath = Get-ChildItem 'C:/Program Files/gs' -ErrorAction SilentlyContinue | Sort-Object Name -Descending | Select-Object -First 1; \
|
||||
if ($gsPath) { \
|
||||
$binPath = Join-Path $gsPath.FullName 'bin'; \
|
||||
Write-Host ('📍 Found Ghostscript at: ' + $binPath); \
|
||||
try { \
|
||||
$gsVersion = & \"$binPath/gs.exe\" --version 2>&1; \
|
||||
Write-Host ('✅ Ghostscript version: ' + $gsVersion); \
|
||||
Write-Host '🎉 === GHOSTSCRIPT FULLY WORKING ==='; \
|
||||
} catch { \
|
||||
Write-Host '⚠️ Ghostscript installed but version test failed (may be normal in containers)'; \
|
||||
} \
|
||||
} else { \
|
||||
Write-Host '⚠️ Installation succeeded but Ghostscript directory not found'; \
|
||||
# Check alternative locations \
|
||||
$altPaths = @('C:/Program Files (x86)/gs', 'C:/ProgramData/chocolatey/lib/ghostscript'); \
|
||||
foreach ($altPath in $altPaths) { \
|
||||
if (Test-Path $altPath) { \
|
||||
Write-Host ('📍 Found Ghostscript at alternative location: ' + $altPath); \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} else { \
|
||||
Write-Host '❌ === ALL DOWNLOAD METHODS FAILED ==='; \
|
||||
Write-Host 'This will help us debug the root cause of download failures'; \
|
||||
throw 'All Ghostscript download methods failed'; \
|
||||
} \
|
||||
\
|
||||
$endTime = Get-Date; \
|
||||
$duration = ($endTime - $startTime).TotalMinutes; \
|
||||
Write-Host ('✅ === GHOSTSCRIPT TEST COMPLETE === (' + $duration.ToString('F1') + ' minutes)'); \
|
||||
"
|
||||
|
||||
# === BASIC VERIFICATION ===
|
||||
RUN "C:\Program Files\PowerShell\7\pwsh.exe" -Command " \
|
||||
Write-Host '🔍 === FINAL VERIFICATION ==='; \
|
||||
Write-Host 'PowerShell 7: OK'; \
|
||||
Write-Host 'Temp directory: OK'; \
|
||||
if (Get-Command gs -ErrorAction SilentlyContinue) { \
|
||||
Write-Host 'Ghostscript: AVAILABLE'; \
|
||||
} else { \
|
||||
Write-Host 'Ghostscript: Not in PATH (expected)'; \
|
||||
} \
|
||||
Write-Host '✅ Minimal container ready for testing'; \
|
||||
"
|
||||
@@ -1,181 +0,0 @@
|
||||
# Windows Dockerfile Fixes Summary
|
||||
|
||||
## 🔧 Critical Issues Fixed
|
||||
|
||||
### 1. PowerShell 7 Path Resolution
|
||||
**Problem**: Using `pwsh` shorthand can fail in Windows containers
|
||||
```dockerfile
|
||||
# BEFORE (problematic)
|
||||
SHELL ["pwsh", "-NoLogo", "-ExecutionPolicy", "Bypass", "-Command"]
|
||||
|
||||
# AFTER (fixed)
|
||||
SHELL ["C:\\Program Files\\PowerShell\\7\\pwsh.exe", "-NoLogo", "-ExecutionPolicy", "Bypass", "-Command"]
|
||||
```
|
||||
|
||||
**Why**: Windows containers may not have `pwsh` in PATH, requiring full path specification.
|
||||
|
||||
### 2. TeX Live Installation Process
|
||||
**Problem**: `Start-Process` without `-NoNewWindow` can hang in containers
|
||||
```dockerfile
|
||||
# BEFORE (problematic)
|
||||
Start-Process -FilePath $Installer -ArgumentList '-repository', $Repo, '-profile', 'C:\temp\texlive.profile' -Wait
|
||||
|
||||
# AFTER (fixed)
|
||||
Start-Process -FilePath $Installer -ArgumentList '-repository', $Repo, '-profile', 'C:\temp\texlive.profile' -Wait -NoNewWindow
|
||||
```
|
||||
|
||||
**Why**: Container environments need `-NoNewWindow` to prevent GUI-related hangs.
|
||||
|
||||
### 3. TeX Package Installation
|
||||
**Problem**: Comments in `tl_packages` file causing installation failures
|
||||
```dockerfile
|
||||
# BEFORE (problematic)
|
||||
$pkgs = Get-Content 'C:\temp\tl_packages' | Where-Object { $_.Trim() -ne '' }
|
||||
|
||||
# AFTER (fixed)
|
||||
$pkgs = Get-Content 'C:\temp\tl_packages' | Where-Object { $_.Trim() -ne '' -and -not $_.Trim().StartsWith('#') }
|
||||
```
|
||||
|
||||
**Why**: Comments starting with `#` were being passed to `tlmgr install`, causing errors.
|
||||
|
||||
### 4. TikZ Test Document
|
||||
**Problem**: Complex here-string with backticks causing parsing issues
|
||||
```dockerfile
|
||||
# BEFORE (problematic)
|
||||
Set-Content -Path C:\temp\test_tikz.tex -Value @'`n\documentclass{standalone}`n\usepackage{tikz}`n...
|
||||
|
||||
# AFTER (fixed)
|
||||
Set-Content -Path C:\temp\test_tikz.tex -Value @'
|
||||
\documentclass{standalone}
|
||||
\usepackage{tikz}
|
||||
...
|
||||
'@ -Encoding ASCII
|
||||
```
|
||||
|
||||
**Why**: Backticks in here-strings can cause parsing issues in PowerShell.
|
||||
|
||||
### 5. Package Installation Verbosity
|
||||
**Problem**: Silent failures in package installation
|
||||
```dockerfile
|
||||
# BEFORE (problematic)
|
||||
foreach ($p in $pkgs) { & $tlmgr install $p.Trim() }
|
||||
|
||||
# AFTER (fixed)
|
||||
foreach ($p in $pkgs) { Write-Host "Installing TeX package: $p" ; & $tlmgr install $p.Trim() }
|
||||
```
|
||||
|
||||
**Why**: Added verbose output to help debug installation issues.
|
||||
|
||||
## 🐛 Windows Container Quirks Addressed
|
||||
|
||||
### 1. PATH Environment Variable
|
||||
- **Issue**: Windows PATH manipulation requires regex escaping
|
||||
- **Solution**: Used `[regex]::Escape()` for proper path matching
|
||||
|
||||
### 2. File Path Handling
|
||||
- **Issue**: Mixed forward/backward slashes
|
||||
- **Solution**: Consistent use of Windows-style paths with proper escaping
|
||||
|
||||
### 3. PowerShell Execution Policy
|
||||
- **Issue**: Default execution policy blocks scripts
|
||||
- **Solution**: Used `-ExecutionPolicy Bypass` consistently
|
||||
|
||||
### 4. Chocolatey Installation
|
||||
- **Issue**: TLS 1.2 requirement for downloads
|
||||
- **Solution**: Added `[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12`
|
||||
|
||||
### 5. Container-Safe Installations
|
||||
- **Issue**: MSI installers can hang in containers
|
||||
- **Solution**: Used ZIP installations for PowerShell 7 and Quarto
|
||||
|
||||
## 📋 Validation Improvements
|
||||
|
||||
### 1. Comprehensive Testing
|
||||
- Added version checks for all major components
|
||||
- Included `kpsewhich` font verification
|
||||
- Added TikZ smoke test with PDF generation
|
||||
- Enhanced R package verification
|
||||
|
||||
### 2. Error Handling
|
||||
- Added explicit error checking with `throw` statements
|
||||
- Included progress indicators for long operations
|
||||
- Added fallback mechanisms for critical components
|
||||
|
||||
### 3. File Existence Checks
|
||||
- Verified all required files exist before copying
|
||||
- Added validation for installation paths
|
||||
- Included cleanup procedures
|
||||
|
||||
## 🚀 Performance Optimizations
|
||||
|
||||
### 1. Minimal TeX Live Installation
|
||||
- Used `scheme-infraonly` for faster installation
|
||||
- Disabled documentation and source files
|
||||
- Targeted package installation instead of full distribution
|
||||
|
||||
### 2. Efficient Package Management
|
||||
- Used Chocolatey for reliable Windows package installation
|
||||
- Implemented proper PATH management
|
||||
- Added cleanup procedures to reduce image size
|
||||
|
||||
### 3. Build Phase Optimization
|
||||
- Organized into logical phases for better caching
|
||||
- Separated dependency installation from verification
|
||||
- Added progress indicators for long-running operations
|
||||
|
||||
## 🔍 Testing Strategy
|
||||
|
||||
### 1. Pre-Build Validation
|
||||
- Created test scripts to validate Dockerfile syntax
|
||||
- Checked for common Windows container issues
|
||||
- Verified all required files exist
|
||||
|
||||
### 2. Component Verification
|
||||
- PowerShell 7: Version and command availability
|
||||
- Quarto: Version and functionality
|
||||
- Python: Package installation and imports
|
||||
- TeX Live: Package and font verification
|
||||
- R: Package installation and library loading
|
||||
|
||||
### 3. Integration Testing
|
||||
- TikZ smoke test with PDF generation
|
||||
- Cross-component dependency verification
|
||||
- End-to-end build process validation
|
||||
|
||||
## 📊 Expected Performance
|
||||
|
||||
- **Build Time**: 45-60 minutes (down from 90+ minutes)
|
||||
- **Image Size**: 8-12GB (optimized for Windows)
|
||||
- **Memory Usage**: 4-6GB during build, 2-3GB runtime
|
||||
- **Success Rate**: >95% (with proper error handling)
|
||||
|
||||
## 🛠️ Maintenance Notes
|
||||
|
||||
### 1. Version Updates
|
||||
- PowerShell 7: Update URL and version number
|
||||
- Quarto: Update version and download URL
|
||||
- Python: Update version in Chocolatey command
|
||||
- TeX Live: Update repository URL and packages
|
||||
|
||||
### 2. Package Management
|
||||
- Add new TeX packages to `tl_packages` file
|
||||
- Update Python requirements in `requirements-build.txt`
|
||||
- Add R packages to `install_packages.R`
|
||||
|
||||
### 3. Testing Procedures
|
||||
- Run validation script before building
|
||||
- Test all components after updates
|
||||
- Verify cross-platform compatibility
|
||||
|
||||
## ✅ Verification Checklist
|
||||
|
||||
- [x] PowerShell 7 installation and PATH setup
|
||||
- [x] Chocolatey installation and package management
|
||||
- [x] Quarto installation and verification
|
||||
- [x] Python installation and package management
|
||||
- [x] TeX Live installation with package filtering
|
||||
- [x] R installation and package verification
|
||||
- [x] Graphics tools (Ghostscript, Inkscape)
|
||||
- [x] Font verification and TikZ testing
|
||||
- [x] Error handling and progress indicators
|
||||
- [x] Cleanup procedures and optimization
|
||||
@@ -1,171 +0,0 @@
|
||||
# Windows Quarto Build Container
|
||||
|
||||
This directory contains the Windows Server 2022 container configuration for building the MLSysBook with Quarto.
|
||||
|
||||
## 🐳 Container Features
|
||||
|
||||
- **Base Image**: Windows Server 2022 LTSC
|
||||
- **PowerShell**: 7.4.1 (ZIP install, container-safe)
|
||||
- **Quarto**: 1.7.31 (ZIP install)
|
||||
- **Python**: 3.13.1 + production dependencies
|
||||
- **TeX Live**: 2025 snapshot with required packages
|
||||
- **R**: 4.3.2 + R Markdown packages
|
||||
- **Graphics**: Ghostscript + Inkscape (via Chocolatey)
|
||||
|
||||
## 🔧 Key Fixes Applied
|
||||
|
||||
### 1. PowerShell 7 Path Issues
|
||||
- **Problem**: Using `pwsh` shorthand can fail in containers
|
||||
- **Fix**: Use full path `C:\Program Files\PowerShell\7\pwsh.exe`
|
||||
|
||||
### 2. TeX Live Installation
|
||||
- **Problem**: `Start-Process` without `-NoNewWindow` can hang
|
||||
- **Fix**: Added `-NoNewWindow` flag for container compatibility
|
||||
- **Problem**: Comments in `tl_packages` file
|
||||
- **Fix**: Filter out comment lines when installing packages
|
||||
|
||||
### 3. TikZ Test Document
|
||||
- **Problem**: Complex here-string with backticks
|
||||
- **Fix**: Simplified to standard multi-line string
|
||||
|
||||
### 4. Package Installation
|
||||
- **Problem**: Silent failures in package installation
|
||||
- **Fix**: Added verbose output and better error handling
|
||||
|
||||
## 🚀 Building the Container
|
||||
|
||||
### Prerequisites
|
||||
- Windows Docker Desktop or Windows Server with Docker
|
||||
- At least 8GB RAM available for Docker
|
||||
- 20GB+ free disk space
|
||||
|
||||
### Build Command
|
||||
```powershell
|
||||
# From project root
|
||||
docker build -f docker/build-quarto-windows/Dockerfile -t mlsysbook-windows .
|
||||
```
|
||||
|
||||
### Test Before Building
|
||||
```powershell
|
||||
# Run validation script
|
||||
.\docker\build-quarto-windows\test_dockerfile.ps1
|
||||
```
|
||||
|
||||
## 📋 Build Phases
|
||||
|
||||
1. **Base Setup**: Directories, environment variables
|
||||
2. **PowerShell 7**: ZIP installation (container-safe)
|
||||
3. **Chocolatey**: Package manager installation
|
||||
4. **Dependencies**: Copy required files
|
||||
5. **Quarto**: ZIP installation with PATH setup
|
||||
6. **Python**: 3.13.1 + production requirements
|
||||
7. **Graphics**: Ghostscript + Inkscape
|
||||
8. **TeX Live**: 2025 snapshot + packages
|
||||
9. **R**: 4.3.2 + R Markdown packages
|
||||
10. **Cleanup**: Remove temporary files
|
||||
|
||||
## 🔍 Verification Steps
|
||||
|
||||
The container includes comprehensive verification:
|
||||
|
||||
- **PowerShell 7**: Version check
|
||||
- **Quarto**: Version and command availability
|
||||
- **Python**: Version and pip functionality
|
||||
- **TeX Live**: Package verification with `kpsewhich`
|
||||
- **Fonts**: Helvetica font files verification
|
||||
- **TikZ**: Smoke test with PDF generation
|
||||
- **R**: Package installation verification
|
||||
|
||||
## ⚠️ Common Issues & Solutions
|
||||
|
||||
### 1. Build Timeouts
|
||||
- **Cause**: Large downloads (TeX Live, Python packages)
|
||||
- **Solution**: Increased timeout values in Dockerfile
|
||||
|
||||
### 2. PATH Issues
|
||||
- **Cause**: Windows PATH not properly updated
|
||||
- **Solution**: Explicit PATH manipulation with regex escaping
|
||||
|
||||
### 3. Package Installation Failures
|
||||
- **Cause**: Network issues or missing dependencies
|
||||
- **Solution**: Added verbose output and error checking
|
||||
|
||||
### 4. Memory Issues
|
||||
- **Cause**: TeX Live installation requires significant memory
|
||||
- **Solution**: Use `scheme-infraonly` for minimal installation
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
### Run Container
|
||||
```powershell
|
||||
docker run -it mlsysbook-windows pwsh
|
||||
```
|
||||
|
||||
### Test Quarto
|
||||
```powershell
|
||||
quarto --version
|
||||
quarto check
|
||||
```
|
||||
|
||||
### Test Python
|
||||
```powershell
|
||||
python --version
|
||||
python -c "import nltk; print('NLTK available')"
|
||||
```
|
||||
|
||||
### Test R
|
||||
```powershell
|
||||
R --version
|
||||
Rscript -e "library(rmarkdown); print('R Markdown available')"
|
||||
```
|
||||
|
||||
### Test TeX Live
|
||||
```powershell
|
||||
lualatex --version
|
||||
kpsewhich pgf.sty
|
||||
```
|
||||
|
||||
## 📊 Performance Notes
|
||||
|
||||
- **Build Time**: ~45-60 minutes on typical hardware
|
||||
- **Image Size**: ~8-12GB (includes TeX Live, R, Python)
|
||||
- **Memory Usage**: 4-6GB during build, 2-3GB runtime
|
||||
- **Disk Space**: 15-20GB for build cache
|
||||
|
||||
## 🔧 Troubleshooting
|
||||
|
||||
### Build Fails on TeX Live
|
||||
```powershell
|
||||
# Check available memory
|
||||
docker system df
|
||||
docker system prune -f
|
||||
```
|
||||
|
||||
### PowerShell Issues
|
||||
```powershell
|
||||
# Verify PowerShell 7 installation
|
||||
docker run mlsysbook-windows pwsh -Command "Get-Host"
|
||||
```
|
||||
|
||||
### Package Installation Issues
|
||||
```powershell
|
||||
# Check Chocolatey installation
|
||||
docker run mlsysbook-windows choco --version
|
||||
```
|
||||
|
||||
## 📝 Maintenance
|
||||
|
||||
### Updating Dependencies
|
||||
1. Update version numbers in Dockerfile
|
||||
2. Test with validation script
|
||||
3. Rebuild and verify all components
|
||||
|
||||
### Adding New Packages
|
||||
1. Add to appropriate phase in Dockerfile
|
||||
2. Update verification steps
|
||||
3. Test thoroughly
|
||||
|
||||
### Security Updates
|
||||
- Regularly update base image
|
||||
- Monitor for CVE reports
|
||||
- Update package versions as needed
|
||||
@@ -1,96 +0,0 @@
|
||||
#!/usr/bin/env pwsh
|
||||
|
||||
# Test script for Windows Dockerfile validation
|
||||
# Run this before building to catch common issues
|
||||
|
||||
# Variables
|
||||
$headline = "🚀 Testing Dockerfile: Windows"
|
||||
$dockerfile = "docker/build-quarto-windows/Dockerfile"
|
||||
$image_name = "mlsysbook-windows-test"
|
||||
$container_name = "mlsysbook-windows-test-container"
|
||||
|
||||
Write-Host $headline -ForegroundColor Green
|
||||
|
||||
# Check if required files exist
|
||||
$requiredFiles = @(
|
||||
"tools/dependencies/requirements/",
|
||||
"tools/dependencies/requirements-build.txt",
|
||||
"tools/dependencies/install_packages.R",
|
||||
"tools/dependencies/tl_packages",
|
||||
"docker/build-quarto-windows/verify_r_packages.R"
|
||||
)
|
||||
|
||||
Write-Host "📁 Checking required files..." -ForegroundColor Yellow
|
||||
foreach ($file in $requiredFiles) {
|
||||
if (Test-Path $file) {
|
||||
Write-Host " ✅ $file" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host " ❌ $file (MISSING)" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
# Check Dockerfile syntax
|
||||
Write-Host "🐳 Validating Dockerfile syntax..." -ForegroundColor Yellow
|
||||
if (Test-Path $dockerfile) {
|
||||
$content = Get-Content $dockerfile -Raw
|
||||
|
||||
# Check for common Windows container issues
|
||||
$issues = @()
|
||||
|
||||
# Check for proper escape character
|
||||
if ($content -notmatch "# escape=`") {
|
||||
$issues += "Missing escape character at top"
|
||||
}
|
||||
|
||||
# Check for proper SHELL commands
|
||||
if ($content -match 'SHELL \["pwsh"') {
|
||||
$issues += "Using 'pwsh' instead of full path - should use 'C:\\Program Files\\PowerShell\\7\\pwsh.exe'"
|
||||
}
|
||||
|
||||
# Check for proper line continuation
|
||||
if ($content -match '`\s*$') {
|
||||
$issues += "Trailing backticks found - should be removed"
|
||||
}
|
||||
|
||||
# Check for proper PowerShell commands
|
||||
if ($content -match 'Start-Process.*-Wait(?!.*-NoNewWindow)') {
|
||||
$issues += "Start-Process should include -NoNewWindow for container builds"
|
||||
}
|
||||
|
||||
if ($issues.Count -eq 0) {
|
||||
Write-Host " ✅ Dockerfile syntax looks good" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host " ⚠️ Potential issues found:" -ForegroundColor Yellow
|
||||
foreach ($issue in $issues) {
|
||||
Write-Host " - $issue" -ForegroundColor Yellow
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Write-Host " ❌ Dockerfile not found" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Check tl_packages content
|
||||
Write-Host "📦 Checking TeX Live packages..." -ForegroundColor Yellow
|
||||
$tlPackages = "tools/dependencies/tl_packages"
|
||||
if (Test-Path $tlPackages) {
|
||||
$packages = Get-Content $tlPackages | Where-Object { $_.Trim() -ne '' -and -not $_.Trim().StartsWith('#') }
|
||||
Write-Host " ✅ Found $($packages.Count) TeX packages to install" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host " ❌ tl_packages file missing" -ForegroundColor Red
|
||||
}
|
||||
|
||||
# Check requirements
|
||||
Write-Host "🐍 Checking Python requirements..." -ForegroundColor Yellow
|
||||
$requirements = "tools/dependencies/requirements-build.txt"
|
||||
if (Test-Path $requirements) {
|
||||
Write-Host " ✅ Requirements file found" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host " ❌ Requirements file missing" -ForegroundColor Red
|
||||
}
|
||||
|
||||
Write-Host "✅ Dockerfile validation complete!" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
Write-Host "To build the container:" -ForegroundColor Cyan
|
||||
Write-Host " docker build -f docker/build-quarto-windows/Dockerfile -t mlsysbook-windows ." -ForegroundColor White
|
||||
@@ -1,93 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Test script for Windows Dockerfile validation (bash version)
|
||||
# Run this before building to catch common issues
|
||||
|
||||
# Variables
|
||||
headline="🚀 Testing Dockerfile: Windows"
|
||||
dockerfile="docker/build-quarto-windows/Dockerfile"
|
||||
image_name="mlsysbook-windows-test"
|
||||
container_name="mlsysbook-windows-test-container"
|
||||
|
||||
echo "$headline"
|
||||
|
||||
# Check if required files exist
|
||||
required_files=(
|
||||
"tools/dependencies/requirements/"
|
||||
"tools/dependencies/requirements-build.txt"
|
||||
"tools/dependencies/install_packages.R"
|
||||
"tools/dependencies/tl_packages"
|
||||
"docker/build-quarto-windows/verify_r_packages.R"
|
||||
)
|
||||
|
||||
echo "📁 Checking required files..."
|
||||
for file in "${required_files[@]}"; do
|
||||
if [ -e "$file" ]; then
|
||||
echo " ✅ $file"
|
||||
else
|
||||
echo " ❌ $file (MISSING)"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Check Dockerfile syntax
|
||||
echo "🐳 Validating Dockerfile syntax..."
|
||||
if [ -f "$dockerfile" ]; then
|
||||
issues=()
|
||||
|
||||
# Check for proper escape character
|
||||
if ! grep -q "^# escape=\`" "$dockerfile"; then
|
||||
issues+=("Missing escape character at top")
|
||||
fi
|
||||
|
||||
# Check for proper SHELL commands (should use full path)
|
||||
if grep -q 'SHELL \["pwsh"' "$dockerfile"; then
|
||||
issues+=("Using 'pwsh' instead of full path - should use 'C:\\\\Program Files\\\\PowerShell\\\\7\\\\pwsh.exe'")
|
||||
fi
|
||||
|
||||
# Check for proper PowerShell commands
|
||||
if grep -q 'Start-Process.*-Wait' "$dockerfile" && ! grep -q 'Start-Process.*-Wait.*-NoNewWindow' "$dockerfile"; then
|
||||
issues+=("Start-Process should include -NoNewWindow for container builds")
|
||||
fi
|
||||
|
||||
# Check for comment filtering in tl_packages
|
||||
if ! grep -q "StartsWith('#')" "$dockerfile"; then
|
||||
issues+=("Missing comment filtering for tl_packages")
|
||||
fi
|
||||
|
||||
if [ ${#issues[@]} -eq 0 ]; then
|
||||
echo " ✅ Dockerfile syntax looks good"
|
||||
else
|
||||
echo " ⚠️ Potential issues found:"
|
||||
for issue in "${issues[@]}"; do
|
||||
echo " - $issue"
|
||||
done
|
||||
fi
|
||||
else
|
||||
echo " ❌ Dockerfile not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check tl_packages content
|
||||
echo "📦 Checking TeX Live packages..."
|
||||
tl_packages="tools/dependencies/tl_packages"
|
||||
if [ -f "$tl_packages" ]; then
|
||||
package_count=$(grep -v '^#' "$tl_packages" | grep -v '^$' | wc -l)
|
||||
echo " ✅ Found $package_count TeX packages to install"
|
||||
else
|
||||
echo " ❌ tl_packages file missing"
|
||||
fi
|
||||
|
||||
# Check requirements
|
||||
echo "🐍 Checking Python requirements..."
|
||||
requirements="tools/dependencies/requirements-build.txt"
|
||||
if [ -f "$requirements" ]; then
|
||||
echo " ✅ Requirements file found"
|
||||
else
|
||||
echo " ❌ Requirements file missing"
|
||||
fi
|
||||
|
||||
echo "✅ Dockerfile validation complete!"
|
||||
echo ""
|
||||
echo "To build the container:"
|
||||
echo " docker build -f docker/build-quarto-windows/Dockerfile -t mlsysbook-windows ."
|
||||
@@ -1,13 +0,0 @@
|
||||
#!/usr/bin/env Rscript
|
||||
|
||||
# Verify R package installation
|
||||
source('C:/temp/install_packages.R')
|
||||
|
||||
missing_packages <- required_packages[!sapply(required_packages, requireNamespace, quietly = TRUE)]
|
||||
|
||||
if(length(missing_packages) > 0) {
|
||||
cat('❌ Missing packages:', paste(missing_packages, collapse = ', '), '\n')
|
||||
quit(status = 1)
|
||||
} else {
|
||||
cat('✅ All required R packages installed successfully\n')
|
||||
}
|
||||
@@ -30,13 +30,12 @@ Containerized Linux Build (5-10 minutes):
|
||||
## Files
|
||||
|
||||
### Core Files
|
||||
- `docker/build-quarto-linux/Dockerfile` - A single Dockerfile for Linux builds.
|
||||
- `docker/build-quarto-linux/README.md` - Linux container documentation
|
||||
- `docker/build-quarto-linux/.dockerignore` - Build exclusions
|
||||
- `docker/build-quarto-windows/Dockerfile` - A single Dockerfile for Windows builds.
|
||||
- `.github/workflows/build-linux-container.yml` - Builds and pushes Linux container
|
||||
- `.github/workflows/build-windows-container.yml` - Builds and pushes Windows container
|
||||
- `.github/workflows/quarto-build-container.yml` - Containerized build workflow
|
||||
- `docker/linux/Dockerfile` - A single Dockerfile for Linux builds.
|
||||
- `docker/linux/README.md` - Linux container documentation
|
||||
- `docker/linux/.dockerignore` - Build exclusions
|
||||
- `docker/windows/Dockerfile` - A single Dockerfile for Windows builds.
|
||||
- `docker/windows/README.md` - Windows container documentation
|
||||
- `docker/windows/.dockerignore` - Build exclusions
|
||||
|
||||
### Container Lifecycle
|
||||
1. **Build**: Weekly automatic rebuilds + manual triggers
|
||||
@@ -48,14 +47,20 @@ Containerized Linux Build (5-10 minutes):
|
||||
|
||||
## Usage
|
||||
|
||||
### Manual Container Build
|
||||
```bash
|
||||
# Trigger Linux container build manually
|
||||
gh workflow run build-linux-container.yml
|
||||
### Registry Paths
|
||||
- **Linux Registry**: `ghcr.io/harvard-edge/cs249r_book/quarto-linux`
|
||||
- **Windows Registry**: `ghcr.io/harvard-edge/cs249r_book/quarto-windows`
|
||||
|
||||
# Trigger Windows container build manually
|
||||
gh workflow run build-windows-container.yml
|
||||
```
|
||||
### Manual Builds
|
||||
You can build the containers locally using these commands:
|
||||
- **Linux**:
|
||||
```bash
|
||||
docker build -f docker/linux/Dockerfile -t mlsysbook-linux .
|
||||
```
|
||||
- **Windows**:
|
||||
```powershell
|
||||
docker build -f docker/windows/Dockerfile -t mlsysbook-windows .
|
||||
```
|
||||
|
||||
### Manual Build Test
|
||||
```bash
|
||||
@@ -64,8 +69,8 @@ gh workflow run quarto-build-container.yml --field os=ubuntu-latest --field form
|
||||
```
|
||||
|
||||
### Container Information
|
||||
- **Linux Registry**: `ghcr.io/harvard-edge/cs249r_book/quarto-build`
|
||||
- **Windows Registry**: `ghcr.io/harvard-edge/cs249r_book/build-quarto-windows`
|
||||
- **Linux Registry**: `ghcr.io/harvard-edge/cs249r_book/quarto-linux`
|
||||
- **Windows Registry**: `ghcr.io/harvard-edge/cs249r_book/quarto-windows`
|
||||
- **Tags**: `latest`, `main`, `dev`, branch-specific tags
|
||||
- **Linux Size**: ~2-3GB (includes TeX Live, R, Python packages)
|
||||
- **Windows Size**: ~4-5GB (includes Windows Server Core + dependencies)
|
||||
@@ -120,7 +125,7 @@ LC_ALL=en_US.UTF-8
|
||||
3. Test locally with `docker build -t test .`
|
||||
|
||||
### Build Issues
|
||||
1. Check if container exists: `ghcr.io/harvard-edge/cs249r_book/quarto-build:latest`
|
||||
1. Check if container exists: `ghcr.io/harvard-edge/cs249r_book/quarto-linux:latest`
|
||||
2. Verify container has all dependencies
|
||||
3. Compare with traditional build logs
|
||||
|
||||
@@ -170,8 +175,8 @@ To build the containers, use the standard `docker build` command:
|
||||
|
||||
```bash
|
||||
# For Linux
|
||||
docker build -f docker/build-quarto-linux/Dockerfile -t mlsysbook-linux .
|
||||
docker build -f docker/linux/Dockerfile -t mlsysbook-linux .
|
||||
|
||||
# For Windows
|
||||
docker build -f docker/build-quarto-windows/Dockerfile -t mlsysbook-windows .
|
||||
docker build -f docker/windows/Dockerfile -t mlsysbook-windows .
|
||||
```
|
||||
@@ -1,224 +0,0 @@
|
||||
# Container Build Fixes - January 2025
|
||||
|
||||
## Overview
|
||||
|
||||
This document summarizes the comprehensive fixes applied to the Docker container build system for MLSysBook. These fixes address critical issues that were preventing successful container builds and deployments.
|
||||
|
||||
## Issues Fixed
|
||||
|
||||
### 1. Linux Container (docker/build-quarto-linux/Dockerfile)
|
||||
|
||||
**Problems Identified:**
|
||||
- Incorrect dependency file paths after repository restructuring
|
||||
- Missing progress indicators and error handling
|
||||
- Suboptimal build phase organization
|
||||
- Inefficient TeX Live package installation loop
|
||||
- Missing proper PATH configuration for LaTeX tools
|
||||
|
||||
**Fixes Applied:**
|
||||
- ✅ Fixed COPY commands to use correct paths for dependency files
|
||||
- ✅ Added comprehensive progress tracking with emojis and timing
|
||||
- ✅ Reorganized build phases (1-11) for better clarity and debugging
|
||||
- ✅ Improved TeX Live installation with better error handling
|
||||
- ✅ Enhanced cleanup procedures for smaller image size
|
||||
- ✅ Fixed PATH environment variables for all tools
|
||||
- ✅ Added proper error handling in shell loops
|
||||
|
||||
### 2. Windows Container (docker/build-quarto-windows/Dockerfile)
|
||||
|
||||
**Problems Identified:**
|
||||
- Complex and error-prone PowerShell syntax
|
||||
- Inconsistent use of PowerShell commands
|
||||
- Missing progress indicators
|
||||
- Poor error handling in installation phases
|
||||
- **CRITICAL**: Ghostscript installation hanging due to complex direct download method
|
||||
|
||||
**Fixes Applied:**
|
||||
- ✅ Simplified and standardized PowerShell command syntax
|
||||
- ✅ Added comprehensive progress tracking with timing
|
||||
- ✅ Reorganized build phases (1-12) for better organization
|
||||
- ✅ Enhanced error handling and validation
|
||||
- ✅ Improved cleanup procedures
|
||||
- ✅ Fixed dependency file path references
|
||||
- ✅ **CRITICAL FIX**: Replaced hanging Ghostscript direct download with reliable chocolatey installation (most stable for containers)
|
||||
|
||||
### 3. Linux Container Workflow (.github/workflows/build-linux-container.yml)
|
||||
|
||||
**Problems Identified:**
|
||||
- Outdated Python package list in tests
|
||||
- Inefficient container image handling
|
||||
- Missing platform specification
|
||||
|
||||
**Fixes Applied:**
|
||||
- ✅ Updated Python package imports to match current requirements
|
||||
- ✅ Optimized container testing to use local images
|
||||
- ✅ Added platform specification (linux/amd64)
|
||||
- ✅ Fixed LOCAL_IMAGE variable handling
|
||||
|
||||
### 4. Windows Container Workflow (.github/workflows/build-windows-container.yml)
|
||||
|
||||
**Problems Identified:**
|
||||
- Using bash commands instead of PowerShell in Windows containers
|
||||
- Incorrect volume mounting paths for Windows
|
||||
- Inefficient container testing approach
|
||||
|
||||
**Fixes Applied:**
|
||||
- ✅ Converted all test commands from bash to PowerShell
|
||||
- ✅ Fixed volume mounting to use Windows paths (C:/workspace)
|
||||
- ✅ Updated all docker run commands to use pwsh instead of bash
|
||||
- ✅ Improved error handling in test scenarios
|
||||
- ✅ Optimized to use local container instead of pulling
|
||||
|
||||
## Container Build Phases
|
||||
|
||||
### Linux Container (11 Phases)
|
||||
1. **System Dependencies** - Core Ubuntu packages and libraries
|
||||
2. **Inkscape Installation** - SVG to PDF conversion capability
|
||||
3. **Quarto Installation** - Latest Quarto CLI (v1.7.31)
|
||||
4. **TeX Live Installation** - Complete LaTeX distribution
|
||||
5. **Ghostscript Installation** - PDF processing capabilities
|
||||
6. **R Installation** - R base and development packages
|
||||
7. **Python Installation** - Python 3 with pip
|
||||
8. **Python Packages** - All production requirements
|
||||
9. **R Packages** - All required R libraries
|
||||
10. **R Package Verification** - Validation of successful installation
|
||||
11. **Comprehensive Cleanup** - Size optimization and cache clearing
|
||||
|
||||
### Windows Container (12 Phases)
|
||||
1. **PowerShell 7 Installation** - Modern PowerShell for better scripting
|
||||
2. **Chocolatey Installation** - Package manager for Windows
|
||||
3. **Quarto Installation** - Latest Quarto CLI (v1.7.31)
|
||||
4. **Python 3.13 Installation** - Latest Python with full package support
|
||||
5. **Python Package Installation** - All production requirements
|
||||
6. **Ghostscript Installation** - PDF processing capabilities
|
||||
7. **Inkscape Installation** - SVG to PDF conversion capability
|
||||
8. **TeX Live Installation** - Complete LaTeX distribution for Windows
|
||||
9. **R Installation** - R base with development packages
|
||||
10. **R Package Installation** - All required R libraries
|
||||
11. **R Package Verification** - Validation of successful installation
|
||||
12. **Cleanup** - Temporary file removal and optimization
|
||||
|
||||
## Testing Improvements
|
||||
|
||||
### Linux Container Tests (17 scenarios)
|
||||
All tests run successfully with proper error handling and validation:
|
||||
- ✅ Quarto functionality
|
||||
- ✅ Python packages (updated to match current requirements)
|
||||
- ✅ R packages (all from install_packages.R)
|
||||
- ✅ TeX Live and LaTeX engines
|
||||
- ✅ Inkscape SVG to PDF conversion
|
||||
- ✅ Ghostscript PDF compression
|
||||
- ✅ Fonts and graphics libraries
|
||||
- ✅ Quarto render test
|
||||
- ✅ TikZ compilation test
|
||||
- ✅ SVG to PDF conversion test
|
||||
- ✅ System resources check
|
||||
- ✅ Network connectivity
|
||||
- ✅ Book structure compatibility
|
||||
- ✅ Quarto configuration files
|
||||
- ✅ Dependencies files accessibility
|
||||
- ✅ Quarto check (same as workflow)
|
||||
- ✅ Actual build process simulation
|
||||
|
||||
### Windows Container Tests (11 scenarios)
|
||||
Converted from bash to PowerShell with proper Windows paths:
|
||||
- ✅ Quarto functionality (using pwsh commands)
|
||||
- ✅ Python packages (using Windows python command)
|
||||
- ✅ R packages (using Windows Rscript)
|
||||
- ✅ TeX Live and LaTeX engines
|
||||
- ✅ Ghostscript PDF compression
|
||||
- ✅ Quarto render test (with Windows file checking)
|
||||
- ✅ TikZ compilation test (with Windows file checking)
|
||||
- ✅ System resources (using Windows WMI commands)
|
||||
- ✅ Network connectivity (using PowerShell web requests)
|
||||
- ✅ Book structure compatibility (using Windows file system commands)
|
||||
- ✅ Quarto check test
|
||||
|
||||
## Performance Impact
|
||||
|
||||
### Before Fixes:
|
||||
- Build failures due to missing dependencies
|
||||
- Path errors preventing tool execution
|
||||
- Inefficient testing causing false positives
|
||||
- Large container sizes due to poor cleanup
|
||||
|
||||
### After Fixes:
|
||||
- **Linux Container**: ~2-3GB (optimized with multi-layer cleanup)
|
||||
- **Windows Container**: ~4-5GB (optimized for Windows base requirements)
|
||||
- **Build Time**: 5-10 minutes (Linux), 10-15 minutes (Windows)
|
||||
- **Reliability**: Comprehensive testing with proper error handling
|
||||
- **Maintainability**: Clear phase organization and progress tracking
|
||||
|
||||
## Files Modified
|
||||
|
||||
### Container Definitions:
|
||||
- `docker/build-quarto-linux/Dockerfile` - A single, unified Dockerfile for Linux builds.
|
||||
- `docker/build-quarto-windows/Dockerfile` - A single, unified Dockerfile for Windows builds.
|
||||
|
||||
### Workflow Files:
|
||||
- `.github/workflows/build-linux-container.yml` - Updated tests and platform specification
|
||||
- `.github/workflows/build-windows-container.yml` - Converted to PowerShell commands throughout
|
||||
|
||||
### Documentation:
|
||||
- `docker/build-quarto-linux/README.md` - Updated with new phase information
|
||||
- `docker/build-quarto-windows/README.md` - Enhanced with Windows-specific details
|
||||
- `docs/CONTAINER_FIXES_2025.md` - This comprehensive summary
|
||||
|
||||
## Verification Steps
|
||||
|
||||
To verify the fixes work:
|
||||
|
||||
1. **Trigger Linux Container Build:**
|
||||
```bash
|
||||
gh workflow run build-linux-container.yml
|
||||
```
|
||||
|
||||
2. **Trigger Windows Container Build:**
|
||||
```bash
|
||||
gh workflow run build-windows-container.yml
|
||||
```
|
||||
|
||||
3. **Test Containerized Builds:**
|
||||
```bash
|
||||
gh workflow run quarto-build-container.yml --field os=ubuntu-latest --field format=html
|
||||
```
|
||||
|
||||
## Future Improvements
|
||||
|
||||
1. **Multi-stage builds** for even smaller container sizes
|
||||
2. **Parallel package installation** where possible
|
||||
3. **Container image caching** optimization
|
||||
4. **Health checks** for running containers
|
||||
5. **Security scanning** integration
|
||||
|
||||
## Critical Fix: Ghostscript Installation
|
||||
|
||||
The most important fix addresses the **hanging Ghostscript installation** in the Windows container. The original approach used a complex direct download method that would hang during installation:
|
||||
|
||||
### Before (Problematic):
|
||||
```powershell
|
||||
# Complex direct download approach that hangs
|
||||
$url = 'https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs10051/gs10051w64.exe'
|
||||
Invoke-WebRequest -Uri $url -OutFile $installer -UseBasicParsing
|
||||
Start-Process -FilePath $installer -ArgumentList '/S', '/D=C:/Program Files/gs/gs10.05.1' -Wait -NoNewWindow
|
||||
```
|
||||
|
||||
### After (Working Solution):
|
||||
```powershell
|
||||
# Simplified chocolatey-only approach (most reliable for containers)
|
||||
choco install ghostscript -y
|
||||
Write-Host '✅ Ghostscript installed via chocolatey'
|
||||
```
|
||||
|
||||
This change ensures reliable, non-hanging Ghostscript installation using chocolatey, which is the most reliable package manager for Windows containers.
|
||||
|
||||
## Conclusion
|
||||
|
||||
These comprehensive fixes restore the container build system to full functionality, providing:
|
||||
- Reliable, reproducible builds
|
||||
- Significant time savings (from 45 minutes to 5-15 minutes)
|
||||
- Better error handling and debugging
|
||||
- Comprehensive testing coverage
|
||||
- Clear documentation and progress tracking
|
||||
|
||||
The container build system is now ready for production use and will provide consistent, fast builds for the MLSysBook project.
|
||||
@@ -1 +1 @@
|
||||
config/_quarto-pdf.yml
|
||||
config/_quarto-html.yml
|
||||
BIN
quarto/contents/labs/raspi/llm/images/png/ollama.png
Normal file
BIN
quarto/contents/labs/raspi/llm/images/png/ollama.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 7.3 KiB |
@@ -87,8 +87,7 @@ We must note that we use large models beyond text, calling them *multi-modal mod
|
||||
Open models are particularly relevant for running models on edge devices like Raspberry Pi as they can be more easily adapted, optimized, and deployed in resource-constrained environments. Still, it is crucial to verify their Licenses. Open models come with various open-source licenses that may affect their use in commercial applications, while closed models have clear, albeit restrictive, terms of service.
|
||||
|
||||
::: {.content-visible when-format="pdf"}
|
||||
](images/png/llms_slm.png)
|
||||
](images/png/llms-slm.png)
|
||||
:::
|
||||
|
||||
::: {.content-visible when-format="html"}
|
||||
@@ -125,7 +124,7 @@ For more information on SLMs, the paper, [LLM Pruning and Distillation in Practi
|
||||
|
||||
## Ollama {#sec-small-language-models-slm-ollama-bd3e}
|
||||
|
||||

|
||||

|
||||
|
||||
[Ollama](https://ollama.com/) is an open-source framework that allows us to run language models (LMs), large or small, locally on our machines. Here are some critical points about Ollama:
|
||||
|
||||
|
||||
@@ -1,95 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import re
|
||||
import requests
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
import argparse
|
||||
|
||||
def find_qmd_files(directory):
|
||||
return list(Path(directory).rglob("*.qmd"))
|
||||
|
||||
def process_file(qmd_file, dry_run=False):
|
||||
try:
|
||||
with open(qmd_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to read {qmd_file}: {e}")
|
||||
return
|
||||
|
||||
# A simpler regex to find any markdown image with an external URL
|
||||
pattern = r'!\[(.*?)\]\((https?://[^\)]+)\)'
|
||||
|
||||
matches = list(re.finditer(pattern, content))
|
||||
|
||||
if not matches:
|
||||
return
|
||||
|
||||
print(f"📄 Processing {qmd_file}")
|
||||
images_dir = qmd_file.parent / "images"
|
||||
|
||||
new_content = content
|
||||
|
||||
for match in matches:
|
||||
caption = match.group(1)
|
||||
url = match.group(2)
|
||||
|
||||
print(f" 🔍 Found external image: {url}")
|
||||
|
||||
try:
|
||||
image_name = Path(urlparse(url).path).name
|
||||
if not image_name:
|
||||
# If the URL path ends in a slash, there's no name, so we'll make one
|
||||
url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
|
||||
image_name = f"image_{url_hash}.png" # Assume png, or we can try to guess
|
||||
|
||||
local_path = images_dir / image_name
|
||||
relative_path = os.path.join("images", image_name)
|
||||
|
||||
if dry_run:
|
||||
print(f" 🧪 [DRY RUN] Would download to {local_path}")
|
||||
print(f" 🧪 [DRY RUN] Would replace with {relative_path}")
|
||||
continue
|
||||
|
||||
images_dir.mkdir(parents=True, exist_ok=True)
|
||||
response = requests.get(url, stream=True, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(local_path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
|
||||
print(f" ✅ Downloaded to {local_path}")
|
||||
|
||||
# Replace the old URL with the new relative path
|
||||
original_md_image = f""
|
||||
replacement_md_image = f""
|
||||
new_content = new_content.replace(original_md_image, replacement_md_image)
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Failed to process {url}: {e}")
|
||||
|
||||
if not dry_run and new_content != content:
|
||||
try:
|
||||
with open(qmd_file, 'w', encoding='utf-8') as f:
|
||||
f.write(new_content)
|
||||
print(f" ✅ Updated {qmd_file}")
|
||||
except Exception as e:
|
||||
print(f" ❌ Failed to write updated file {qmd_file}: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Download external images from Quarto markdown files")
|
||||
parser.add_argument("-d", "--directory", type=str, required=True, help="Directory to process")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show what would be downloaded without actually downloading")
|
||||
args = parser.parse_args()
|
||||
|
||||
qmd_files = find_qmd_files(args.directory)
|
||||
print(f"🔍 Found {len(qmd_files)} .qmd files to process")
|
||||
|
||||
for qmd_file in qmd_files:
|
||||
process_file(qmd_file, args.dry_run)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user