refactor: move publish scripts to quarto/publish/ for better architecture

Move compression scripts from tools/scripts/publish/ to quarto/publish/: - compress_epub.py → quarto/publish/compress_epub.py - compress_pdf.py → quarto/publish/compress_pdf.py Rationale: - Publishing scripts are part of Quarto workflow, not general tooling - quarto/ directory is mounted in containers, tools/ is not - Cleaner separation: tools/ = environment, quarto/publish/ = content packaging - Fixes container path issues where tools/scripts/publish/ was not available Updated all workflow references to use new paths. This should resolve the 'No such file or directory' errors in containers.
2026-05-06 17:49:07 -05:00 · 2025-08-21 21:25:09 -04:00
parent 2911369115
commit 1a6dfdb528
4 changed files with 833 additions and 12 deletions
--- a/.github/workflows/quarto-build-baremetal.yml
+++ b/.github/workflows/quarto-build-baremetal.yml
@@ -626,7 +626,7 @@ jobs:
        run: |
          if [ -f "Machine-Learning-Systems.pdf" ]; then
            echo "📉 Compressing PDF with professional compression tool..."
-            python3 ${{ github.workspace }}/tools/scripts/publish/compress_pdf.py \
+            python3 ${{ github.workspace }}/quarto/publish/compress_pdf.py \
              --input "Machine-Learning-Systems.pdf" \
              --output "compressed.pdf" \
              --quality minimal \
@@ -657,7 +657,7 @@ jobs:

          Write-Output "📉 Compressing PDF with professional compression tool..."

-          python ${{ github.workspace }}/tools/scripts/publish/compress_pdf.py --input $input --output $output --quality minimal --verbose
+          python ${{ github.workspace }}/quarto/publish/compress_pdf.py --input $input --output $output --quality minimal --verbose

          if (Test-Path $output) {
            Write-Output "✅ PDF compression completed"
@@ -674,9 +674,9 @@ jobs:
            echo "📚 Compressing EPUB with optimized compression tool..."
            echo "🔍 DEBUG: GITHUB_WORKSPACE=${{ github.workspace }}"
            echo "🔍 DEBUG: PWD=$(pwd)"
-            echo "🔍 DEBUG: Script path: ${{ github.workspace }}/tools/scripts/publish/compress_epub.py"
-            ls -la "${{ github.workspace }}/tools/scripts/publish/" || echo "❌ Directory not found"
-            python3 ${{ github.workspace }}/tools/scripts/publish/compress_epub.py \
+            echo "🔍 DEBUG: Script path: ${{ github.workspace }}/quarto/publish/compress_epub.py"
+            ls -la "${{ github.workspace }}/quarto/publish/" || echo "❌ Directory not found"
+            python3 ${{ github.workspace }}/quarto/publish/compress_epub.py \
              --input "Machine-Learning-Systems.epub" \
              --output "compressed.epub" \
              --verbose
@@ -706,7 +706,7 @@ jobs:

          Write-Output "📚 Compressing EPUB with optimized compression tool..."

-          python ${{ github.workspace }}/tools/scripts/publish/compress_epub.py --input $input --output $output --verbose
+          python ${{ github.workspace }}/quarto/publish/compress_epub.py --input $input --output $output --verbose

          if (Test-Path $output) {
            Write-Output "✅ EPUB compression completed (using optimized defaults: quality=50, max-size=1000px)"
--- a/.github/workflows/quarto-build-container.yml
+++ b/.github/workflows/quarto-build-container.yml
@@ -255,7 +255,7 @@ jobs:
        run: |
          if [ -f "Machine-Learning-Systems.pdf" ]; then
            echo "📉 Compressing PDF with professional compression tool..."
-            python3 ${{ github.workspace }}/tools/scripts/publish/compress_pdf.py \
+            python3 ${{ github.workspace }}/quarto/publish/compress_pdf.py \
              --input "Machine-Learning-Systems.pdf" \
              --output "compressed.pdf" \
              --quality minimal \
@@ -274,7 +274,7 @@ jobs:
          docker run --rm -v "$($PWD.Path):C:\workspace" -w "C:\workspace\quarto\${{ matrix.output_dir }}" ${{ env.CONTAINER_IMAGE }} powershell -Command "
            if (Test-Path 'Machine-Learning-Systems.pdf') {
              Write-Host '📉 Compressing PDF with professional compression tool...'
-              python C:\workspace\tools\scripts\publish\compress_pdf.py --input 'Machine-Learning-Systems.pdf' --output 'compressed.pdf' --quality minimal --verbose
+              python C:\workspace\quarto\publish\compress_pdf.py --input 'Machine-Learning-Systems.pdf' --output 'compressed.pdf' --quality minimal --verbose
              if (Test-Path 'compressed.pdf') {
                Move-Item -Force 'compressed.pdf' 'Machine-Learning-Systems.pdf'
                Write-Host '✅ PDF compression completed'
@@ -293,9 +293,9 @@ jobs:
            echo "📚 Compressing EPUB with optimized compression tool..."
            echo "🔍 DEBUG: GITHUB_WORKSPACE=${{ github.workspace }}"
            echo "🔍 DEBUG: PWD=$(pwd)"
-            echo "🔍 DEBUG: Script path: ${{ github.workspace }}/tools/scripts/publish/compress_epub.py"
-            ls -la "${{ github.workspace }}/tools/scripts/publish/" || echo "❌ Directory not found"
-            python3 ${{ github.workspace }}/tools/scripts/publish/compress_epub.py \
+            echo "🔍 DEBUG: Script path: ${{ github.workspace }}/quarto/publish/compress_epub.py"
+            ls -la "${{ github.workspace }}/quarto/publish/" || echo "❌ Directory not found"
+            python3 ${{ github.workspace }}/quarto/publish/compress_epub.py \
              --input "Machine-Learning-Systems.epub" \
              --output "compressed.epub" \
              --verbose
@@ -313,7 +313,7 @@ jobs:
          docker run --rm -v "$($PWD.Path):C:\workspace" -w "C:\workspace\quarto\${{ matrix.output_dir }}" ${{ env.CONTAINER_IMAGE }} powershell -Command "
            if (Test-Path 'Machine-Learning-Systems.epub') {
              Write-Host '📚 Compressing EPUB with optimized compression tool...'
-              python C:\workspace\tools\scripts\publish\compress_epub.py --input 'Machine-Learning-Systems.epub' --output 'compressed.epub' --verbose
+              python C:\workspace\quarto\publish\compress_epub.py --input 'Machine-Learning-Systems.epub' --output 'compressed.epub' --verbose
              if (Test-Path 'compressed.epub') {
                Move-Item -Force 'compressed.epub' 'Machine-Learning-Systems.epub'
                Write-Host '✅ EPUB compression completed (using optimized defaults: quality=50, max-size=1000px)'
--- a/quarto/publish/compress_epub.py
+++ b/quarto/publish/compress_epub.py
@@ -0,0 +1,435 @@
+#!/usr/bin/env python3
+"""
+EPUB Compression Tool for MLSysBook
+
+This tool compresses EPUB files by optimizing embedded images while maintaining
+EPUB format compliance. It extracts the EPUB, compresses images, and repacks
+the archive following EPUB specifications.
+
+Usage:
+    python compress_epub.py --input input.epub --output output.epub [options]
+
+Author: MLSysBook Team
+License: MIT
+"""
+
+import argparse
+import logging
+import os
+import shutil
+import sys
+import tempfile
+import zipfile
+from pathlib import Path
+from typing import Optional, Tuple
+
+try:
+    from PIL import Image
+except ImportError:
+    print("❌ Error: Pillow library is required. Install with: pip install Pillow")
+    sys.exit(1)
+
+
+class EPUBCompressor:
+    """
+    A class for compressing EPUB files by optimizing embedded images.
+    
+    This compressor maintains EPUB format compliance while reducing file size
+    through image optimization techniques including quality reduction, resizing,
+    and format optimization.
+    """
+    
+    SUPPORTED_IMAGE_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff')
+    
+    def __init__(self, quality: int = 50, max_size: int = 1000, verbose: bool = False):
+        """
+        Initialize the EPUB compressor.
+        
+        Args:
+            quality: JPEG compression quality (1-100, higher = better quality)
+            max_size: Maximum dimension for image resizing (pixels)
+            verbose: Enable verbose logging output
+        """
+        self.quality = quality
+        self.max_size = max_size
+        self.verbose = verbose
+        self._setup_logging()
+        
+    def _setup_logging(self) -> None:
+        """Configure logging based on verbosity level."""
+        level = logging.DEBUG if self.verbose else logging.INFO
+        logging.basicConfig(
+            level=level,
+            format='%(levelname)s: %(message)s',
+            handlers=[logging.StreamHandler()]
+        )
+        self.logger = logging.getLogger(__name__)
+    
+    def _validate_inputs(self, input_path: Path, output_path: Path) -> None:
+        """
+        Validate input parameters and file paths.
+        
+        Args:
+            input_path: Path to input EPUB file
+            output_path: Path for output EPUB file
+            
+        Raises:
+            FileNotFoundError: If input file doesn't exist
+            ValueError: If parameters are invalid
+        """
+        if not input_path.exists():
+            raise FileNotFoundError(f"Input EPUB file not found: {input_path}")
+            
+        if not input_path.suffix.lower() == '.epub':
+            raise ValueError(f"Input file must be an EPUB: {input_path}")
+            
+        if not 1 <= self.quality <= 100:
+            raise ValueError(f"Quality must be between 1-100, got: {self.quality}")
+            
+        if not 100 <= self.max_size <= 5000:
+            raise ValueError(f"Max size must be between 100-5000 pixels, got: {self.max_size}")
+            
+        # Ensure output directory exists
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        self.logger.info(f"📖 Input EPUB: {input_path}")
+        self.logger.info(f"📦 Output EPUB: {output_path}")
+        self.logger.info(f"🎨 Image quality: {self.quality}%")
+        self.logger.info(f"📏 Max image size: {self.max_size}px")
+    
+    def _compress_image(self, image_path: Path) -> Tuple[bool, Optional[str]]:
+        """
+        Compress a single image file in place.
+        
+        Args:
+            image_path: Path to the image file to compress
+            
+        Returns:
+            Tuple of (success: bool, error_message: Optional[str])
+        """
+        try:
+            original_size = image_path.stat().st_size
+            
+            with Image.open(image_path) as img:
+                img_format = img.format
+                original_dimensions = img.size
+                
+                # Resize if image is too large
+                if max(img.size) > self.max_size:
+                    # Use backward-compatible resampling for older Pillow versions
+                    try:
+                        # Pillow >= 10.0.0
+                        resample = Image.Resampling.LANCZOS
+                    except AttributeError:
+                        # Pillow < 10.0.0
+                        resample = Image.LANCZOS
+                    
+                    img.thumbnail((self.max_size, self.max_size), resample)
+                    self.logger.debug(f"  📏 Resized {original_dimensions} → {img.size}")
+                
+                # Optimize based on format
+                if img_format in ('JPEG', 'JPG'):
+                    # Convert RGBA to RGB if needed (simpler approach)
+                    if img.mode in ('RGBA', 'LA'):
+                        rgb_img = Image.new('RGB', img.size, (255, 255, 255))
+                        rgb_img.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
+                        img = rgb_img
+                    
+                    img.save(image_path, 'JPEG', quality=self.quality, optimize=True)
+                    
+                elif img_format == 'PNG':
+                    # Always try aggressive palette conversion for maximum compression
+                    try:
+                        img = img.convert('P', palette=Image.ADAPTIVE)
+                        img.save(image_path, 'PNG', optimize=True)
+                    except Exception:
+                        # Fallback to original PNG optimization if palette conversion fails
+                        img.save(image_path, 'PNG', optimize=True)
+                    
+                else:
+                    # For other formats, convert to JPEG if RGB, PNG if has transparency
+                    if img.mode in ('RGBA', 'LA'):
+                        img.save(image_path, 'PNG', optimize=True)
+                    else:
+                        if img.mode != 'RGB':
+                            img = img.convert('RGB')
+                        img.save(image_path, 'JPEG', quality=self.quality, optimize=True)
+            
+            new_size = image_path.stat().st_size
+            compression_ratio = (1 - new_size / original_size) * 100 if original_size > 0 else 0
+            
+            self.logger.debug(f"  💾 {original_size:,} → {new_size:,} bytes ({compression_ratio:.1f}% reduction)")
+            return True, None
+            
+        except Exception as e:
+            error_msg = f"Failed to compress {image_path.name}: {str(e)}"
+            self.logger.warning(f"  ⚠️ {error_msg}")
+            return False, error_msg
+    
+    def _extract_epub(self, epub_path: Path, extract_dir: Path) -> None:
+        """
+        Extract EPUB contents to temporary directory.
+        
+        Args:
+            epub_path: Path to EPUB file
+            extract_dir: Directory to extract contents to
+        """
+        self.logger.info("📂 Extracting EPUB contents...")
+        
+        try:
+            with zipfile.ZipFile(epub_path, 'r') as zip_file:
+                zip_file.extractall(extract_dir)
+                
+            self.logger.debug(f"  ✅ Extracted to: {extract_dir}")
+            
+        except zipfile.BadZipFile:
+            raise ValueError(f"Invalid EPUB file (not a valid ZIP): {epub_path}")
+        except Exception as e:
+            raise RuntimeError(f"Failed to extract EPUB: {str(e)}")
+    
+    def _compress_images_in_directory(self, directory: Path) -> Tuple[int, int]:
+        """
+        Find and compress all images in the extracted EPUB directory.
+        
+        Args:
+            directory: Root directory to search for images
+            
+        Returns:
+            Tuple of (total_images: int, compressed_images: int)
+        """
+        self.logger.info("🎨 Compressing images...")
+        
+        image_files = []
+        for ext in self.SUPPORTED_IMAGE_EXTENSIONS:
+            image_files.extend(directory.rglob(f'*{ext}'))
+            image_files.extend(directory.rglob(f'*{ext.upper()}'))
+        
+        total_images = len(image_files)
+        compressed_images = 0
+        
+        if total_images == 0:
+            self.logger.info("  ℹ️ No images found to compress")
+            return 0, 0
+        
+        self.logger.info(f"  📊 Found {total_images} images to process")
+        
+        for i, image_path in enumerate(image_files, 1):
+            self.logger.debug(f"  🖼️ [{i}/{total_images}] {image_path.name}")
+            success, error = self._compress_image(image_path)
+            if success:
+                compressed_images += 1
+        
+        self.logger.info(f"  ✅ Successfully compressed {compressed_images}/{total_images} images")
+        return total_images, compressed_images
+    
+    def _repack_epub(self, source_dir: Path, output_path: Path) -> None:
+        """
+        Repack the directory contents into a new EPUB file.
+        
+        Args:
+            source_dir: Directory containing extracted and processed EPUB contents
+            output_path: Path for the output EPUB file
+        """
+        self.logger.info("📦 Repacking EPUB...")
+        
+        try:
+            with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+                # First, add mimetype uncompressed (EPUB specification requirement)
+                mimetype_path = source_dir / 'mimetype'
+                if mimetype_path.exists():
+                    zip_file.write(mimetype_path, 'mimetype', compress_type=zipfile.ZIP_STORED)
+                    self.logger.debug("  📄 Added mimetype (uncompressed)")
+                
+                # Add all other files with compression
+                files_added = 0
+                for file_path in source_dir.rglob('*'):
+                    if file_path.is_file() and file_path.name != 'mimetype':
+                        arcname = file_path.relative_to(source_dir)
+                        zip_file.write(file_path, arcname)
+                        files_added += 1
+                
+                self.logger.debug(f"  ✅ Added {files_added} files to EPUB")
+                
+        except Exception as e:
+            raise RuntimeError(f"Failed to repack EPUB: {str(e)}")
+    
+    def compress(self, input_path: Path, output_path: Path) -> dict:
+        """
+        Compress an EPUB file by optimizing embedded images.
+        
+        Args:
+            input_path: Path to input EPUB file
+            output_path: Path for compressed output EPUB file
+            
+        Returns:
+            Dictionary with compression statistics
+            
+        Raises:
+            FileNotFoundError: If input file doesn't exist
+            ValueError: If input parameters are invalid
+            RuntimeError: If compression process fails
+        """
+        # Validate inputs
+        self._validate_inputs(input_path, output_path)
+        
+        # Get original file size
+        original_size = input_path.stat().st_size
+        self.logger.info(f"📊 Original EPUB size: {original_size:,} bytes ({original_size/1024/1024:.1f} MB)")
+        
+        # Create temporary directory for processing
+        with tempfile.TemporaryDirectory(prefix='epub_compress_') as temp_dir:
+            temp_path = Path(temp_dir)
+            
+            try:
+                # Extract EPUB
+                self._extract_epub(input_path, temp_path)
+                
+                # Compress images
+                total_images, compressed_images = self._compress_images_in_directory(temp_path)
+                
+                # Repack EPUB
+                self._repack_epub(temp_path, output_path)
+                
+            except Exception as e:
+                # Clean up output file if it was partially created
+                if output_path.exists():
+                    output_path.unlink()
+                raise e
+        
+        # Calculate final statistics
+        final_size = output_path.stat().st_size
+        compression_ratio = (1 - final_size / original_size) * 100 if original_size > 0 else 0
+        
+        stats = {
+            'original_size': original_size,
+            'final_size': final_size,
+            'compression_ratio': compression_ratio,
+            'size_saved': original_size - final_size,
+            'total_images': total_images,
+            'compressed_images': compressed_images
+        }
+        
+        self.logger.info(f"✅ Compression complete!")
+        self.logger.info(f"📊 Final size: {final_size:,} bytes ({final_size/1024/1024:.1f} MB)")
+        self.logger.info(f"💾 Size reduction: {compression_ratio:.1f}% ({stats['size_saved']:,} bytes saved)")
+        
+        return stats
+
+
+def create_argument_parser() -> argparse.ArgumentParser:
+    """Create and configure the command-line argument parser."""
+    parser = argparse.ArgumentParser(
+        description="Compress EPUB files by optimizing embedded images while maintaining format compliance.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s --input input.epub --output output.epub
+  %(prog)s -i input.epub -o output.epub
+  %(prog)s -i input.epub -o output.epub --quality 60 --max-size 1200
+  %(prog)s -i input.epub -o output.epub --verbose
+  %(prog)s -i input.epub -o output.epub -q 40 -s 800 -v
+
+Quality Guidelines:
+  90-100: Highest quality, larger files
+  50-89:  Good quality, balanced size (recommended)
+  35-49:  Acceptable quality, smaller files
+  1-34:   Lower quality, smallest files
+
+Max Size Guidelines:
+  1000px: Default, optimized balance of quality and size
+  1200px: Higher quality for detailed images
+  800px:  Compact, suitable for basic readers
+  600px:  Maximum compression for size-critical applications
+        """
+    )
+    
+    parser.add_argument(
+        '--input', '-i',
+        type=Path,
+        required=True,
+        metavar='EPUB_FILE',
+        help='Path to the input EPUB file to compress'
+    )
+    
+    parser.add_argument(
+        '--output', '-o',
+        type=Path,
+        required=True,
+        metavar='EPUB_FILE',
+        help='Path for the compressed output EPUB file'
+    )
+    
+    parser.add_argument(
+        '--quality', '-q',
+        type=int,
+        default=50,
+        metavar='N',
+        help='JPEG compression quality (1-100, default: 50)'
+    )
+    
+    parser.add_argument(
+        '--max-size', '-s',
+        type=int,
+        default=1000,
+        metavar='PIXELS',
+        help='Maximum image dimension in pixels (default: 1000)'
+    )
+    
+    parser.add_argument(
+        '--verbose', '-v',
+        action='store_true',
+        help='Enable verbose output with detailed progress information'
+    )
+    
+    parser.add_argument(
+        '--version',
+        action='version',
+        version='%(prog)s 1.0.0'
+    )
+    
+    return parser
+
+
+def main() -> int:
+    """
+    Main entry point for the EPUB compression tool.
+    
+    Returns:
+        Exit code (0 for success, 1 for error)
+    """
+    parser = create_argument_parser()
+    args = parser.parse_args()
+    
+    try:
+        # Create compressor instance
+        compressor = EPUBCompressor(
+            quality=args.quality,
+            max_size=args.max_size,
+            verbose=args.verbose
+        )
+        
+        # Perform compression
+        stats = compressor.compress(args.input, args.output)
+        
+        # Success message
+        print(f"\n🎉 EPUB compression successful!")
+        print(f"📁 Output: {args.output}")
+        print(f"💾 Size reduction: {stats['compression_ratio']:.1f}%")
+        print(f"🖼️ Images processed: {stats['compressed_images']}/{stats['total_images']}")
+        
+        return 0
+        
+    except KeyboardInterrupt:
+        print("\n❌ Operation cancelled by user")
+        return 1
+        
+    except Exception as e:
+        print(f"\n❌ Error: {str(e)}")
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/quarto/publish/compress_pdf.py
+++ b/quarto/publish/compress_pdf.py
@@ -0,0 +1,386 @@
+#!/usr/bin/env python3
+"""
+PDF Compression Tool for MLSysBook
+
+This tool compresses PDF files using Ghostscript with optimized settings for
+academic textbooks. It reduces file size while maintaining readability and
+print quality suitable for educational content.
+
+Usage:
+    python compress_pdf.py --input input.pdf --output output.pdf [options]
+
+Author: MLSysBook Team
+License: MIT
+"""
+
+import argparse
+import logging
+import os
+import platform
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+from typing import Optional
+
+class PDFCompressor:
+    """
+    A class for compressing PDF files using Ghostscript.
+    
+    This compressor uses Ghostscript with optimized settings for academic
+    textbooks, balancing file size reduction with quality preservation
+    for educational content.
+    """
+    
+    # Ghostscript quality presets
+    QUALITY_PRESETS = {
+        'screen': '/screen',      # Lowest quality, smallest files (72 dpi)
+        'ebook': '/ebook',        # Good for e-readers (150 dpi) - DEFAULT
+        'printer': '/printer',    # Good for printing (300 dpi)
+        'prepress': '/prepress',  # Highest quality (300+ dpi)
+        'default': '/default',    # Ghostscript default settings
+        'minimal': '/ebook'       # Minimal mode - matches original workflow exactly
+    }
+    
+    def __init__(self, quality: str = 'ebook', compatibility: str = '1.4', verbose: bool = False):
+        """
+        Initialize the PDF compressor.
+        
+        Args:
+            quality: Compression quality preset (screen, ebook, printer, prepress, default)
+            compatibility: PDF compatibility level (1.3, 1.4, 1.5, 1.6, 1.7)
+            verbose: Enable verbose logging output
+        """
+        self.quality = quality
+        self.compatibility = compatibility
+        self.verbose = verbose
+        self._setup_logging()
+        self._validate_dependencies()
+        
+    def _setup_logging(self) -> None:
+        """Configure logging based on verbosity level."""
+        level = logging.DEBUG if self.verbose else logging.INFO
+        logging.basicConfig(
+            level=level,
+            format='%(levelname)s: %(message)s',
+            handlers=[logging.StreamHandler()]
+        )
+        self.logger = logging.getLogger(__name__)
+    
+    def _validate_dependencies(self) -> None:
+        """Check if Ghostscript is available and determine the correct executable."""
+        # Determine platform-specific Ghostscript executable
+        if platform.system() == 'Windows':
+            # On Windows, try gswin64c first, then gs
+            gs_candidates = ['gswin64c', 'gs']
+        else:
+            # On Linux/macOS, use gs
+            gs_candidates = ['gs']
+        
+        self.gs_executable = None
+        for gs_cmd in gs_candidates:
+            try:
+                result = subprocess.run([gs_cmd, '--version'], 
+                                      capture_output=True, text=True, check=True)
+                gs_version = result.stdout.strip()
+                self.gs_executable = gs_cmd
+                self.logger.debug(f"Found Ghostscript executable: {gs_cmd}")
+                self.logger.debug(f"Ghostscript version: {gs_version}")
+                break
+            except (subprocess.CalledProcessError, FileNotFoundError):
+                continue
+        
+        if not self.gs_executable:
+            raise RuntimeError(
+                "Ghostscript is not installed or not in PATH. "
+                f"Tried: {', '.join(gs_candidates)}. "
+                "Please install Ghostscript to use this tool."
+            )
+    
+    def _validate_inputs(self, input_path: Path, output_path: Path) -> None:
+        """
+        Validate input parameters and file paths.
+        
+        Args:
+            input_path: Path to input PDF file
+            output_path: Path for output PDF file
+            
+        Raises:
+            FileNotFoundError: If input file doesn't exist
+            ValueError: If parameters are invalid
+        """
+        if not input_path.exists():
+            raise FileNotFoundError(f"Input PDF file not found: {input_path}")
+            
+        if not input_path.suffix.lower() == '.pdf':
+            raise ValueError(f"Input file must be a PDF: {input_path}")
+            
+        if self.quality not in self.QUALITY_PRESETS:
+            raise ValueError(f"Quality must be one of {list(self.QUALITY_PRESETS.keys())}, got: {self.quality}")
+            
+        if self.compatibility not in ['1.3', '1.4', '1.5', '1.6', '1.7']:
+            raise ValueError(f"Compatibility must be 1.3-1.7, got: {self.compatibility}")
+            
+        # Ensure output directory exists
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        self.logger.info(f"📄 Input PDF: {input_path}")
+        self.logger.info(f"📦 Output PDF: {output_path}")
+        self.logger.info(f"🎨 Quality preset: {self.quality}")
+        self.logger.info(f"📋 PDF compatibility: {self.compatibility}")
+    
+    def _format_file_size(self, size_bytes: int) -> str:
+        """Convert bytes to human-readable format."""
+        for unit in ['B', 'KB', 'MB', 'GB']:
+            if size_bytes < 1024.0:
+                return f"{size_bytes:.1f} {unit}"
+            size_bytes /= 1024.0
+        return f"{size_bytes:.1f} TB"
+    
+    def _build_ghostscript_command(self, input_path: Path, output_path: Path) -> list[str]:
+        """
+        Build the Ghostscript command with optimized parameters.
+        
+        Args:
+            input_path: Path to input PDF file
+            output_path: Path for output PDF file
+            
+        Returns:
+            List of command arguments for subprocess
+        """
+        quality_setting = self.QUALITY_PRESETS[self.quality]
+        
+        if self.quality == 'minimal':
+            # Minimal mode: exactly match original workflow commands
+            command = [
+                self.gs_executable,
+                '-sDEVICE=pdfwrite',
+                f'-dCompatibilityLevel={self.compatibility}',
+                f'-dPDFSETTINGS={quality_setting}',
+                '-dNOPAUSE',
+                '-dQUIET' if not self.verbose else '-dNOQUIET',
+                '-dBATCH',
+                f'-sOutputFile={output_path}',
+                str(input_path)
+            ]
+        else:
+            # Enhanced mode: with additional quality improvements
+            command = [
+                self.gs_executable,  # Use platform-specific executable
+                '-sDEVICE=pdfwrite',
+                f'-dCompatibilityLevel={self.compatibility}',
+                f'-dPDFSETTINGS={quality_setting}',
+                '-dNOPAUSE',
+                '-dQUIET' if not self.verbose else '-dNOQUIET',
+                '-dBATCH',
+                '-dSAFER',  # Security setting
+                '-dAutoRotatePages=/None',  # Preserve page orientation
+                '-dColorImageDownsampleType=/Bicubic',  # Better image quality
+                '-dGrayImageDownsampleType=/Bicubic',
+                '-dMonoImageDownsampleType=/Bicubic',
+                f'-sOutputFile={output_path}',
+                str(input_path)
+            ]
+        
+        return command
+    
+    def compress(self, input_path: Path, output_path: Path) -> dict:
+        """
+        Compress a PDF file using Ghostscript.
+        
+        Args:
+            input_path: Path to input PDF file
+            output_path: Path for compressed output PDF file
+            
+        Returns:
+            Dictionary with compression statistics
+            
+        Raises:
+            FileNotFoundError: If input file doesn't exist
+            ValueError: If input parameters are invalid
+            RuntimeError: If compression process fails
+        """
+        # Validate inputs
+        self._validate_inputs(input_path, output_path)
+        
+        # Get original file size
+        original_size = input_path.stat().st_size
+        self.logger.info(f"📊 Original PDF size: {original_size:,} bytes ({self._format_file_size(original_size)})")
+        
+        # Build Ghostscript command
+        command = self._build_ghostscript_command(input_path, output_path)
+        
+        self.logger.info("🔄 Compressing PDF with Ghostscript...")
+        self.logger.debug(f"Command: {' '.join(command)}")
+        
+        try:
+            # Run Ghostscript compression
+            result = subprocess.run(
+                command,
+                check=True,
+                capture_output=not self.verbose,
+                text=True
+            )
+            
+            self.logger.debug(f"Ghostscript return code: {result.returncode}")
+            
+        except subprocess.CalledProcessError as e:
+            # Clean up output file if it was partially created
+            if output_path.exists():
+                output_path.unlink()
+            
+            error_msg = f"Ghostscript compression failed (exit code {e.returncode})"
+            if e.stderr:
+                error_msg += f": {e.stderr.strip()}"
+            
+            raise RuntimeError(error_msg)
+        
+        except Exception as e:
+            # Clean up output file if it was partially created
+            if output_path.exists():
+                output_path.unlink()
+            raise RuntimeError(f"PDF compression failed: {str(e)}")
+        
+        # Verify output file was created
+        if not output_path.exists():
+            raise RuntimeError("Ghostscript completed but output file was not created")
+        
+        # Calculate final statistics
+        final_size = output_path.stat().st_size
+        compression_ratio = (1 - final_size / original_size) * 100 if original_size > 0 else 0
+        
+        stats = {
+            'original_size': original_size,
+            'final_size': final_size,
+            'compression_ratio': compression_ratio,
+            'size_saved': original_size - final_size,
+            'quality_preset': self.quality,
+            'pdf_compatibility': self.compatibility
+        }
+        
+        self.logger.info(f"✅ Compression complete!")
+        self.logger.info(f"📊 Final size: {final_size:,} bytes ({self._format_file_size(final_size)})")
+        self.logger.info(f"💾 Size reduction: {compression_ratio:.1f}% ({self._format_file_size(stats['size_saved'])} saved)")
+        
+        return stats
+
+
+def create_argument_parser() -> argparse.ArgumentParser:
+    """Create and configure the command-line argument parser."""
+    parser = argparse.ArgumentParser(
+        description="Compress PDF files using Ghostscript with optimized settings for academic textbooks.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s --input input.pdf --output output.pdf
+  %(prog)s -i input.pdf -o output.pdf
+  %(prog)s -i input.pdf -o output.pdf --quality printer
+  %(prog)s -i input.pdf -o output.pdf --verbose
+  %(prog)s -i input.pdf -o output.pdf -q screen -c 1.5 -v
+
+Quality Presets:
+  screen:    Lowest quality, smallest files (72 dpi) - for web viewing
+  ebook:     Good for e-readers (150 dpi) - DEFAULT, balanced size/quality
+  printer:   Good for printing (300 dpi) - higher quality
+  prepress:  Highest quality (300+ dpi) - for professional printing
+  default:   Ghostscript default settings - no optimization
+  minimal:   Exact match to original workflow commands - for compatibility
+
+PDF Compatibility:
+  1.3: Oldest, most compatible (Acrobat 4.0+)
+  1.4: Good compatibility (Acrobat 5.0+) - DEFAULT
+  1.5: Modern features (Acrobat 6.0+)
+  1.6: Advanced features (Acrobat 7.0+)
+  1.7: Latest features (Acrobat 8.0+)
+        """
+    )
+    
+    parser.add_argument(
+        '--input', '-i',
+        type=Path,
+        required=True,
+        metavar='PDF_FILE',
+        help='Path to the input PDF file to compress'
+    )
+    
+    parser.add_argument(
+        '--output', '-o',
+        type=Path,
+        required=True,
+        metavar='PDF_FILE',
+        help='Path for the compressed output PDF file'
+    )
+    
+    parser.add_argument(
+        '--quality', '-q',
+        choices=['screen', 'ebook', 'printer', 'prepress', 'default', 'minimal'],
+        default='ebook',
+        help='Compression quality preset (default: ebook)'
+    )
+    
+    parser.add_argument(
+        '--compatibility', '-c',
+        choices=['1.3', '1.4', '1.5', '1.6', '1.7'],
+        default='1.4',
+        metavar='VERSION',
+        help='PDF compatibility level (default: 1.4)'
+    )
+    
+    parser.add_argument(
+        '--verbose', '-v',
+        action='store_true',
+        help='Enable verbose output with detailed progress information'
+    )
+    
+    parser.add_argument(
+        '--version',
+        action='version',
+        version='%(prog)s 1.0.0'
+    )
+    
+    return parser
+
+
+def main() -> int:
+    """
+    Main entry point for the PDF compression tool.
+    
+    Returns:
+        Exit code (0 for success, 1 for error)
+    """
+    parser = create_argument_parser()
+    args = parser.parse_args()
+    
+    try:
+        # Create compressor instance
+        compressor = PDFCompressor(
+            quality=args.quality,
+            compatibility=args.compatibility,
+            verbose=args.verbose
+        )
+        
+        # Perform compression
+        stats = compressor.compress(args.input, args.output)
+        
+        # Success message
+        print(f"\n🎉 PDF compression successful!")
+        print(f"📁 Output: {args.output}")
+        print(f"💾 Size reduction: {stats['compression_ratio']:.1f}%")
+        print(f"🎨 Quality preset: {stats['quality_preset']}")
+        
+        return 0
+        
+    except KeyboardInterrupt:
+        print("\n❌ Operation cancelled by user")
+        return 1
+        
+    except Exception as e:
+        print(f"\n❌ Error: {str(e)}")
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())