Files
cs249r_book/.github/workflows/quarto-build.yml
Vijay Janapa Reddi 7fc8781064 Adds nltk to install dependencies
Includes nltk in the list of install dependencies
to resolve any missing dependency issues when running
the quarto build.
2025-06-30 06:45:55 -04:00

493 lines
18 KiB
YAML

name: '📚 Quarto Build'
# This workflow builds a Quarto project and deploys it to either a development site or GitHub Pages
# It handles both Windows and Linux environments with extensive caching for better performance
on:
workflow_call:
inputs:
environment:
required: true
type: string
description: 'Build environment (development/production/test)'
os:
required: true
type: string
description: 'Operating system to run on (ubuntu-latest/windows-latest)'
quarto-version:
required: false
type: string
default: '1.7.31'
description: 'Version of Quarto to use'
r-version:
required: false
type: string
default: '4.3.2'
description: 'Version of R to use'
target:
required: false
type: string
default: ''
description: 'Target branch (dev/main) - determines build behavior'
format:
required: false
type: string
default: 'all'
description: 'Format to build (html/pdf/epub/all)'
deploy:
required: false
type: boolean
default: true
description: 'Whether to deploy the build or just validate'
secrets:
SSH_DEPLOY_KEY:
required: false
permissions:
contents: write
pages: write
jobs:
build:
runs-on: ${{ inputs.os }}
timeout-minutes: 60 # ⏰ Set job timeout to 60 minutes
environment:
name: ${{ inputs.environment }}
env:
R_LIBS_USER: ${{ github.workspace }}/.r-lib
QUARTO_LOG_LEVEL: DEBUG
steps:
- name: 🔍 Validate inputs
shell: pwsh
run: |
Write-Output "🔄 Validating workflow inputs..."
Write-Output "👉 Target: ${{ inputs.target }}"
Write-Output "👉 OS: ${{ inputs.os }}"
Write-Output "👉 Environment: ${{ inputs.environment }}"
Write-Output "👉 Format: ${{ inputs.format }}"
Write-Output "👉 Deploy: ${{ inputs.deploy }}"
$valid_formats = @("html", "pdf", "epub", "all")
if ("${{ inputs.format }}" -notin $valid_formats) {
Write-Error "❌ Format must be one of: $($valid_formats -join ', ')"
exit 1
}
Write-Output "✅ Input validation passed"
- name: 📥 Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: 📦 Setup Quarto
uses: quarto-dev/quarto-actions/setup@v2
with:
version: ${{ inputs.quarto-version }}
# Outputs quarto version information after installation
id: quarto-setup
- name: 📋 Quarto Setup Info
shell: bash
run: |
echo "🔄 Checking Quarto installation..."
quarto check
echo "📊 Quarto version info:"
quarto --version
echo "📍 Quarto installation location:"
which quarto || where.exe quarto
- name: 🐍 Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- name: 📦 Install jupyterlab-quarto (Mac/Linux)
if: runner.os != 'Windows'
run: |
python -m pip install --upgrade pip
python -m pip install jupyterlab-quarto jupyter nltk
- name: 📦 Install jupyterlab-quarto (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: |
py -m pip install --upgrade pip
py -m pip install jupyterlab-quarto jupyter nltk
# Cache Linux system packages without hardcoded paths
- name: 💾 Cache APT packages
if: runner.os == 'Linux'
uses: actions/cache@v4
id: cache-apt
with:
path: ~/.apt-cache
key: apt-${{ runner.os }}-${{ hashFiles('.github/workflows/*.yml') }}
restore-keys: |
apt-${{ runner.os }}-
- name: 🛠️ Install Linux Dependencies
if: runner.os == 'Linux' && steps.cache-apt.outputs.cache-hit != 'true'
shell: bash
run: |
echo "🔄 Installing Linux dependencies..."
echo "📦 Creating APT cache directory"
mkdir -p ~/.apt-cache
echo "📦 Updating package lists"
sudo apt-get update
echo "📦 Installing required system libraries"
sudo apt-get -o dir::cache::archives="$HOME/.apt-cache" install -y \
fonts-dejavu \
fonts-freefont-ttf \
gdk-pixbuf2.0-bin \
libcairo2 \
libfontconfig1 \
libfreetype6 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libpangoft2-1.0-0 \
libxml2-dev \
libcurl4-openssl-dev \
libjpeg-dev \
libtiff5-dev \
libpng-dev
echo "✅ Linux dependencies installed"
- name: 🎨 Install Inkscape and font dependencies (Linux)
if: runner.os == 'Linux'
run: |
# First remove any existing Inkscape
sudo apt-get remove -y inkscape || true
# Install Inkscape from PPA for more reliable version
echo "📦 Installing Inkscape from PPA..."
sudo add-apt-repository ppa:inkscape.dev/stable -y
sudo apt-get update
sudo apt-get install -y inkscape
# Install font dependencies
echo "📦 Installing font dependencies..."
sudo apt-get install -y \
fonts-freefont-ttf \
fonts-liberation \
fontconfig
# Update font cache after installing Inkscape and fonts
echo "🧹 Updating font cache..."
sudo fc-cache -fv
# Verify Inkscape installation
echo "📊 Inkscape version:"
inkscape --version
# Test SVG to PDF conversion with the new Inkscape
echo "🧪 Testing Inkscape SVG to PDF conversion..."
echo '<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100"><circle cx="50" cy="50" r="40" fill="red"/></svg>' > test.svg
inkscape --export-type=pdf --export-filename=test.pdf test.svg
# Verify if the PDF was created
if [ -f test.pdf ]; then
echo "✅ Inkscape SVG to PDF conversion successful!"
ls -lh test.pdf
else
echo "❌ Inkscape SVG to PDF conversion failed."
echo "🔍 Checking Inkscape installation..."
dpkg -l | grep inkscape
which inkscape
ldd $(which inkscape) | grep "not found" || echo "All dependencies resolved"
fi
- name: 🎨 Install Inkscape (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: |
choco install inkscape -y
echo "C:\Program Files\Inkscape\bin" | Out-File -Append -Encoding ascii $env:GITHUB_PATH
# Verify Inkscape installation
inkscape --version
- name: 📦 Install & Update TeX Live
if: inputs.format == 'pdf' || inputs.format == 'epub' || inputs.format == 'all'
uses: teatimeguest/setup-texlive-action@v3
with:
version: 2025
packages: |
scheme-basic
collection-latex
collection-latexrecommended
collection-fontsrecommended
collection-latexextra
collection-pictures
collection-luatex
koma-script
standalone
tikz-cd
marginfix
newpx
luatex85
listings
update-all-packages: false # this slows builds down significantly
cache: true
- name: 📊 Setup R
uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ inputs.r-version }}
use-public-rspm: true
- name: 📋 R Setup Info
shell: Rscript {0}
run: |
cat("🔄 R Version Information:\n")
cat(paste("R version:", R.version$version.string, "\n"))
cat(paste("R home:", R.home(), "\n"))
cat(paste("R library paths:", paste(.libPaths(), collapse=", "), "\n"))
# Cache R packages using standard paths
- name: 💾 Cache R packages
uses: actions/cache@v4
id: cache-r-packages
with:
path: |
${{ env.R_LIBS_USER }}
key: r-pkgs-${{ runner.os }}-${{ inputs.r-version }}-${{ hashFiles('**/install_packages.R', '**/*.qmd') }}
restore-keys: |
r-pkgs-${{ runner.os }}-${{ inputs.r-version }}-
- name: 📦 Install R packages
if: steps.cache-r-packages.outputs.cache-hit != 'true'
shell: Rscript {0}
run: |
# Set options for better package installation
options(repos = c(CRAN = "https://cran.rstudio.com"))
cat("🔄 Installing R packages...\n")
cat(paste("R library path:", Sys.getenv("R_LIBS_USER"), "\n"))
# Create and set library path
lib_path <- Sys.getenv("R_LIBS_USER")
dir.create(lib_path, showWarnings = FALSE, recursive = TRUE)
.libPaths(lib_path)
# Install packages
cat("📦 Installing remotes package...\n")
install.packages("remotes")
if (file.exists("install_packages.R")) {
cat("📦 Installing packages from install_packages.R...\n")
source("install_packages.R")
} else {
cat("⚠️ No install_packages.R found, installing common packages\n")
pkgs <- c("rmarkdown", "knitr", "tidyverse", "ggplot2", "bookdown")
cat(paste("📦 Installing packages:", paste(pkgs, collapse=", "), "\n"))
install.packages(pkgs)
}
cat("✅ R package installation complete\n")
cat("📊 Installed packages:\n")
ip <- installed.packages()[, c("Package", "Version")]
print(head(ip, 10))
cat(paste("Total packages installed:", nrow(ip), "\n"))
# Install before render cause we need it for rendering
- name: 📦 Install Ghostscript (Linux)
if: runner.os == 'Linux' && (inputs.format == 'pdf' || inputs.format == 'all')
run: sudo apt-get install -y ghostscript
- name: 📦 Install Ghostscript (Windows)
if: runner.os == 'Windows' && (inputs.format == 'pdf' || inputs.format == 'all')
shell: pwsh
run: |
choco install ghostscript
$gsPath = Get-ChildItem "C:\Program Files\gs" | Sort-Object Name -Descending | Select-Object -First 1
$binPath = Join-Path $gsPath.FullName "bin"
echo "Adding Ghostscript path: $binPath"
echo "$binPath" | Out-File -Append -FilePath $env:GITHUB_PATH
- name: 🔨 Render Quarto Project (HTML)
if: inputs.format == 'html' || inputs.format == 'all'
uses: quarto-dev/quarto-actions/render@v2
with:
to: html
# Build PDF after HTML to ensure all assets are available (esp. PDF)
- name: 🔨 Render Quarto Project (PDF)
if: inputs.format == 'pdf' || inputs.format == 'all'
uses: quarto-dev/quarto-actions/render@v2
with:
to: titlepage-pdf
- name: 🔨 Render Quarto Project (EPUB)
if: inputs.format == 'epub' || inputs.format == 'all'
uses: quarto-dev/quarto-actions/render@v2
with:
to: epub
- name: 📋 Check Quarto Build Output
shell: bash
run: |
echo "🔄 Checking Quarto build output..."
if [ -d "_book" ]; then
echo "✅ _book directory exists"
echo "📊 Files in _book directory:"
ls -la _book | head -n 20
echo "📊 Total files in _book:"
find _book -type f | wc -l
if [ -f "_book/Machine-Learning-Systems.pdf" ] && [ "${{ inputs.format }}" != "html" ]; then
echo "✅ PDF file exists"
echo "📊 PDF file size:"
du -h "_book/Machine-Learning-Systems.pdf"
elif [ "${{ inputs.format }}" != "html" ]; then
echo "⚠️ PDF file not found!"
fi
else
echo "❌ _book directory not found!"
fi
- name: 📉 Compress PDF with Ghostscript (Linux)
if: runner.os == 'Linux' && (inputs.format == 'pdf' || inputs.format == 'all')
run: |
if [ -f "_book/Machine-Learning-Systems.pdf" ]; then
gs \
-sDEVICE=pdfwrite \
-dCompatibilityLevel=1.4 \
-dPDFSETTINGS=/ebook \
-dNOPAUSE \
-dQUIET \
-dBATCH \
-sOutputFile="./_book/ebook.pdf" \
"./_book/Machine-Learning-Systems.pdf"
# Replace original with compressed
mv ./_book/ebook.pdf ./_book/Machine-Learning-Systems.pdf
else
echo "⚠️ PDF file not found for compression"
fi
- name: 📉 Compress PDF with Ghostscript (Windows)
if: runner.os == 'Windows' && (inputs.format == 'pdf' || inputs.format == 'all')
shell: pwsh
run: |
$input = "./_book/Machine-Learning-Systems.pdf"
$output = "./_book/ebook.pdf"
if (!(Test-Path $input)) {
Write-Warning "⚠️ Input PDF not found! Skipping compression..."
exit 0 # Non-zero exit would fail the workflow
}
Write-Output "📉 Compressing PDF using Ghostscript..."
# Unless you are a sucker for pain, don't try to line-break this command (it was a nightmare)
# Note that compatability should be written as -dCompatibilityLevel:1.4, not -dCompatibilityLevel=1.4!!!
& gswin64c -sDEVICE=pdfwrite -dCompatibilityLevel:1.4 -dPDFSETTINGS=/ebook -dNOPAUSE -dBATCH -sOutputFile="$output" "$input"
if (Test-Path $output) {
$afterSize = (Get-Item $output).Length / 1MB
Write-Output ("📏 Compressed PDF size: {0:N2} MB" -f $afterSize)
Write-Output "✅ Compression successful"
Move-Item -Force $output $input
} else {
Write-Warning "⚠️ Compression failed but continuing"
}
- name: 📤 Upload _book directory as artifact
if: github.event_name == 'pull_request' || inputs.deploy == false || inputs.format != 'html'
uses: actions/upload-artifact@v4
with:
name: _book-${{ inputs.os }}-${{ inputs.format }}
path: _book
- name: 🚀 Stage to Dev Site
if: inputs.target == 'dev' && inputs.deploy == true && inputs.format == 'html' && runner.os == 'Linux'
shell: bash
env:
SSH_DEPLOY_KEY: ${{ secrets.SSH_DEPLOY_KEY }}
run: |
echo "🔐 Starting ssh-agent..."
eval "$(ssh-agent -s)"
echo "$SSH_DEPLOY_KEY" | tr -d '\r' | ssh-add - > /dev/null
git config --global user.email "khoshnevis.naeem@gmail.com"
git config --global user.name "github-actions"
echo "🔄 Cloning target repo..."
git clone --depth=1 git@github.com:harvard-edge/cs249r_book_dev.git target-repo
cd target-repo
git checkout main
git pull origin main
echo "🧹 Cleaning existing docs/"
rm -rf docs
cp -r "${GITHUB_WORKSPACE}/_book" docs
rm -f docs/CNAME # remove if present
echo "🔍 Validating deployment content..."
test -f docs/index.html || (echo "❌ index.html missing" && exit 1)
[ "$(ls -1 docs | wc -l)" -gt 3 ] || (echo "❌ docs/ too empty — something went wrong" && exit 1)
echo "📦 Committing and pushing changes..."
git add docs
git commit -m "📚 Push dev branch build (manual version)" || echo "🟡 Nothing to commit"
git push origin main
- name: 📋 Dev Deployment Info
if: inputs.target == 'dev' && runner.os == 'Linux' && inputs.deploy == true
shell: bash
run: |
echo "🔄 Development deployment information:"
echo "📊 Deployed to repository: harvard-edge/cs249r_book_dev"
echo "📊 Target branch: main"
echo "📊 Target directory: docs"
echo "📊 Source directory: _book"
echo "✅ Deployment should be complete"
# Deploy to GitHub Pages
- name: 🚀 Deploy to GitHub Pages
if: inputs.target == 'main' && runner.os == 'Linux' && inputs.deploy == true
uses: quarto-dev/quarto-actions/publish@v2
with:
target: gh-pages
render: false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: 📋 GitHub Pages Deployment Info
if: inputs.target == 'main' && runner.os == 'Linux' && inputs.deploy == true
shell: bash
run: |
echo "🔄 GitHub Pages deployment information:"
echo "📊 Deployed to: gh-pages branch"
echo "📊 Source directory: _book"
echo "✅ Deployment should be complete"
echo "🌐 Site should be available at: https://${{ github.repository_owner }}.github.io/${{ github.event.repository.name }}"
- name: 📋 Build Summary
shell: pwsh
run: |
# Create a variable for the TeX Live status
$texLiveStatus = if ("${{ inputs.format }}" -eq "pdf" -or "${{ inputs.format }}" -eq "all") {
"Installed via teatimeguest/setup-texlive-action@v3"
} else {
"Skipped (not needed for HTML only)"
}
@"
## 📊 Build Status Summary
🎯 Target: ${{ inputs.target }}
💻 OS: ${{ inputs.os }}
🔧 Environment: ${{ inputs.environment }}
📚 Quarto Version: ${{ inputs.quarto-version }}
🔬 R Version: ${{ inputs.r-version }}
📄 Format: ${{ inputs.format }}
🚀 Deploy: ${{ inputs.deploy }}
🧩 Cache Status:
- TeX Live: $texLiveStatus
- R Packages: ${{ steps.cache-r-packages.outputs.cache-hit == 'true' && '✅ Hit' || '❌ Miss' }}
⏰ Completed at: $(Get-Date -Format "yyyy-MM-dd HH:mm:ss")
"@ | Add-Content $env:GITHUB_STEP_SUMMARY