2 Commits

Author SHA1 Message Date
Vijay Janapa Reddi
09a67d865f chore(epub): remove obsolete bash wrapper script
Removed fix_epub_references.sh as it has been replaced by the
cross-platform Python wrapper epub_postprocess.py
2025-11-26 09:04:30 +01:00
Vijay Janapa Reddi
44bff4bab9 fix(epub): replace bash script with cross-platform Python wrapper
Replaced fix_epub_references.sh with epub_postprocess.py to support
Windows builds. The new Python wrapper provides identical functionality
using only Python stdlib (zipfile, tempfile, shutil) and imports the
existing fix_cross_references.py module directly.

Key changes:
- Created epub_postprocess.py: Cross-platform wrapper for EPUB post-processing
- Updated _quarto-epub.yml: Changed post-render hook from .sh to .py
- Removed dependency on bash/shell for Windows compatibility

The wrapper extracts the EPUB, runs cross-reference fixes using the
existing dynamic section mapping system, and re-packages the EPUB
following EPUB3 standards (uncompressed mimetype first).
2025-11-26 09:04:19 +01:00
3 changed files with 163 additions and 49 deletions

View File

@@ -19,7 +19,7 @@ project:
output-dir: _build/epub
post-render:
- scripts/clean_svgs.py
- scripts/fix_epub_references.sh
- scripts/epub_postprocess.py
preview:
browser: false

View File

@@ -0,0 +1,162 @@
#!/usr/bin/env python3
"""
Cross-platform EPUB post-processor wrapper.
Extracts EPUB, fixes cross-references, and re-packages it.
Works on Windows, macOS, and Linux.
"""
import sys
import os
import shutil
import tempfile
import zipfile
from pathlib import Path
# Import the fix_cross_references module functions directly
# This avoids subprocess complications and works cross-platform
sys.path.insert(0, str(Path(__file__).parent))
from fix_cross_references import (
build_epub_section_mapping,
process_html_file
)
def extract_epub(epub_path, temp_dir):
"""Extract EPUB to temporary directory."""
print(" Extracting EPUB...")
with zipfile.ZipFile(epub_path, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
def fix_cross_references_in_extracted_epub(temp_dir):
"""Fix cross-references in extracted EPUB directory."""
print(" Fixing cross-references...")
# Build EPUB section mapping
epub_mapping = build_epub_section_mapping(temp_dir)
print(f" Found {len(epub_mapping)} section IDs across chapters")
# Find all XHTML files
epub_text_dir = temp_dir / "EPUB" / "text"
if not epub_text_dir.exists():
print(f" ⚠️ No EPUB/text directory found")
return 0
xhtml_files = list(epub_text_dir.glob("*.xhtml"))
print(f" Scanning {len(xhtml_files)} XHTML files...")
# Process each file
files_fixed = []
total_refs_fixed = 0
all_unmapped = set()
skip_patterns = ['nav.xhtml', 'cover.xhtml', 'title_page.xhtml']
for xhtml_file in xhtml_files:
# Skip certain files
if any(skip in xhtml_file.name for skip in skip_patterns):
continue
rel_path, fixed_count, unmapped = process_html_file(
xhtml_file,
temp_dir, # base_dir for relative paths
epub_mapping
)
if fixed_count > 0:
files_fixed.append((rel_path or xhtml_file.name, fixed_count))
total_refs_fixed += fixed_count
all_unmapped.update(unmapped)
if files_fixed:
print(f" ✅ Fixed {total_refs_fixed} cross-references in {len(files_fixed)} files")
for path, count in files_fixed:
print(f" 📄 {path}: {count} refs")
else:
print(f" ✅ No unresolved cross-references found")
if all_unmapped:
print(f" ⚠️ Unmapped references: {', '.join(sorted(list(all_unmapped)[:5]))}")
return total_refs_fixed
def repackage_epub(temp_dir, output_path):
"""Re-package EPUB from temporary directory."""
print(" Re-packaging EPUB...")
# Create new EPUB zip file
with zipfile.ZipFile(output_path, 'w') as epub_zip:
# EPUB requires mimetype to be first and uncompressed
mimetype_path = temp_dir / "mimetype"
if mimetype_path.exists():
epub_zip.write(mimetype_path, "mimetype", compress_type=zipfile.ZIP_STORED)
# Add all other files recursively
for item in ["META-INF", "EPUB"]:
item_path = temp_dir / item
if item_path.exists():
if item_path.is_dir():
for file_path in item_path.rglob("*"):
if file_path.is_file():
arcname = file_path.relative_to(temp_dir)
epub_zip.write(file_path, arcname, compress_type=zipfile.ZIP_DEFLATED)
else:
epub_zip.write(item_path, item, compress_type=zipfile.ZIP_DEFLATED)
def main():
"""Main entry point."""
# Determine EPUB file path
if len(sys.argv) > 1:
epub_file = Path(sys.argv[1])
else:
# Running as post-render hook - find the EPUB
epub_file = Path("_build/epub/Machine-Learning-Systems.epub")
if not epub_file.exists():
print(f"⚠️ EPUB file not found: {epub_file}")
return 0
print(f"📚 Post-processing EPUB: {epub_file}")
# Get absolute path to EPUB file
epub_abs = epub_file.resolve()
# Create temporary directory
temp_dir = Path(tempfile.mkdtemp())
try:
# Extract EPUB
extract_epub(epub_abs, temp_dir)
# Fix cross-references
fixes = fix_cross_references_in_extracted_epub(temp_dir)
# Create a temporary output file
fixed_epub = temp_dir / "fixed.epub"
# Re-package EPUB
repackage_epub(temp_dir, fixed_epub)
# Replace original with fixed version
shutil.move(str(fixed_epub), str(epub_abs))
print("✅ EPUB post-processing complete")
return 0
except Exception as e:
print(f"❌ Error during EPUB post-processing: {e}")
import traceback
traceback.print_exc()
return 1
finally:
# Clean up temporary directory
if temp_dir.exists():
shutil.rmtree(temp_dir, ignore_errors=True)
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env bash
# Post-process EPUB to fix cross-references
# This script extracts the EPUB, fixes references, and re-packages it
set -e
EPUB_FILE="$1"
if [ -z "$EPUB_FILE" ]; then
# Running as post-render hook - find the EPUB
EPUB_FILE="_build/epub/Machine-Learning-Systems.epub"
fi
if [ ! -f "$EPUB_FILE" ]; then
echo "⚠️ EPUB file not found: $EPUB_FILE"
exit 0
fi
echo "📚 Post-processing EPUB: $EPUB_FILE"
# Get absolute path to EPUB file
EPUB_ABS=$(cd "$(dirname "$EPUB_FILE")" && pwd)/$(basename "$EPUB_FILE")
# Create temporary directory
TEMP_DIR=$(mktemp -d)
trap "rm -rf $TEMP_DIR" EXIT
# Extract EPUB
echo " Extracting EPUB..."
unzip -q "$EPUB_ABS" -d "$TEMP_DIR"
# Fix cross-references using Python script
echo " Fixing cross-references..."
cd "$TEMP_DIR"
python3 "$(dirname "$0")/fix_cross_references.py" >/dev/null 2>&1 || true
# Re-package EPUB
echo " Re-packaging EPUB..."
cd "$TEMP_DIR"
# EPUB requires mimetype to be first and uncompressed
zip -0 -X fixed.epub mimetype
# Add all other files recursively
zip -r -X fixed.epub META-INF EPUB
# Replace original with fixed version
mv "$TEMP_DIR/fixed.epub" "$EPUB_ABS"
echo "✅ EPUB post-processing complete"