cs249r_book/scripts/quarto_publish/compress_and_publish.py

import os
import subprocess
import argparse
import PyPDF4
import sys
from PIL import Image
import io
import ghostscript  # Ensure ghostscript is installed and available

# Default input and output paths
DEFAULT_INPUT_PATH = 'Machine-Learning-Systems.pdf'
DEFAULT_OUTPUT_PATH = 'Machine-Learning-Systems_output.pdf'  # Overwrite the file!

def quarto_pdf_render(output_path):
    """
    Install Quarto's TinyTeX and render the book to PDF.
    """
    print("Installing Quarto TinyTeX")
    subprocess.run(['quarto', 'install', 'tinytex'])
    process = subprocess.run(['quarto', 'render', '--output', output_path, '--to', 'pdf'], check=True)

    print(f"Quarto render process return value: {process.returncode}")

def quarto_publish():
    """
    Publish the rendered book using Quarto.
    """
    print("Publishing the rendered book using Quarto")
    process = subprocess.run(['quarto', 'publish', '--no-render', 'gh-pages'], check=True)

def compress_pdf_pypdf(input_path, output_path):
    """
    Compress a PDF file using PyPDF4 by copying its contents to a new file.

    Args:
        input_path (str): Path to the input PDF file.
        output_path (str): Path to the output compressed PDF file.
    """
    if not os.path.exists(input_path):
        print("Input file does not exist:", input_path)
        return

    print("Compressing PDF using PyPDF4")
    with open(input_path, 'rb') as input_file:
        reader = PyPDF4.PdfFileReader(input_file)
        writer = PyPDF4.PdfFileWriter()
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            writer.addPage(page)
        with open(output_path, 'wb') as output_file:
            writer.write(output_file)

def rename_and_overwrite_file(old_path, new_path):
    """
    Rename the new file to the old filename and overwrite it.

    Args:
        old_path (str): Path to the old file.
        new_path (str): Path to the new file.
    """
    print("Renaming", new_path, "to", old_path)
    os.rename(old_path, new_path)  # Rename the new file to the old filename

def get_file_size(file_path):
    """
    Get the size of a file in bytes.
    """
    return os.path.getsize(file_path)

# Function to measure file size
def get_file_size(file_path):
    # Implementation to get file size from file_path
    # Ensure to handle cases where the file does not exist or cannot be accessed
    try:
        file_size = os.path.getsize(file_path)
        return file_size
    except OSError:
        print(f"Unable to get file size for {file_path}")
        return None

# Function to convert bytes to appropriate units
def convert_bytes_to_human_readable(size_in_bytes):
    if size_in_bytes is None:
        return "Unknown"

    size_kb = size_in_bytes / 1024
    size_mb = size_kb / 1024
    size_gb = size_mb / 1024

    if size_gb >= 1:
        return f"{size_gb:.2f} GB"
    elif size_mb >= 1:
        return f"{size_mb:.2f} MB"
    elif size_kb >= 1:
        return f"{size_kb:.2f} KB"
    else:
        return f"{size_in_bytes} bytes"

def compress_pdf_ghostscript(input_path, output_path):
    """
    Compress a PDF file using ghostscript.

    Args:
        input_path (str): Path to the input PDF file.
        output_path (str): Path to the output compressed PDF file.
    """
    print("Compressing PDF using ghostscript")

    # Measure input file size
    input_size_before = get_file_size(input_path)
    print(f"Input file size: {convert_bytes_to_human_readable(input_size_before)}")

    # Command for file conversion
    command = ['ps2pdf', '-dQUIET', '-dBATCH', '-sDEVICE=pdfwrite',
                '-dPDFSETTINGS=/ebook',
                '-dNOPAUSE',
                f'-sOutputFile={output_path}',
                input_path]

    subprocess.run(command, check=True)

    # Measure output file size
    output_size_after = get_file_size(output_path)
    print(f"Output file size: {convert_bytes_to_human_readable(output_size_after)}")

def main():
    """
    Main function to parse command-line arguments and execute the program.
    """
    parser = argparse.ArgumentParser(description="Convert a book to PDF and optionally reduce its size")
    parser.add_argument('-c', '--compress', nargs='?', const='ghostscript', default='ghostscript', choices=['pypdf', 'ghostscript'], help='Compress the PDF file. Default method: ghostscript')
    parser.add_argument('input_path', nargs='?', default=DEFAULT_INPUT_PATH, help='Path to the rendered book file (default: {})'.format(DEFAULT_INPUT_PATH))
    parser.add_argument('output_path', nargs='?', default=DEFAULT_OUTPUT_PATH, help='Path to the output PDF file (default: {})'.format(DEFAULT_OUTPUT_PATH))
    args = parser.parse_args()

    quarto_pdf_render(args.input_path)

    full_input_path = os.path.abspath(os.path.join('_book', args.input_path))
    full_output_path = os.path.abspath(os.path.join('_book', args.output_path))

    # Compress if specified
    if args.compress:
        print("Compressing", full_input_path, "to", full_output_path, "using", args.compress)
        if args.compress == 'ghostscript':
            compress_pdf_ghostscript(full_input_path, full_output_path)
        elif args.compress == 'pypdf':  # This option allows for future expansion
            compress_pdf_pypdf(full_input_path, full_output_path)

    rename_and_overwrite_file(full_output_path, full_input_path)

    quarto_publish()

if __name__ == "__main__":
    main()