Files
cs249r_book/scripts/quarto_publish/compress_and_publish.py
2024-03-12 23:42:49 -04:00

155 lines
5.3 KiB
Python

import os
import subprocess
import argparse
import PyPDF4
import sys
from PIL import Image
import io
import ghostscript # Ensure ghostscript is installed and available
# Default input and output paths
DEFAULT_INPUT_PATH = 'Machine-Learning-Systems.pdf'
DEFAULT_OUTPUT_PATH = 'Machine-Learning-Systems_output.pdf' # Overwrite the file!
def quarto_pdf_render(output_path):
"""
Install Quarto's TinyTeX and render the book to PDF.
"""
print("Installing Quarto TinyTeX")
subprocess.run(['quarto', 'install', 'tinytex'])
process = subprocess.run(['quarto', 'render', '--output', output_path, '--to', 'pdf'], check=True)
print(f"Quarto render process return value: {process.returncode}")
def quarto_publish():
"""
Publish the rendered book using Quarto.
"""
print("Publishing the rendered book using Quarto")
process = subprocess.run(['quarto', 'publish', '--no-render', 'gh-pages'], check=True)
def compress_pdf_pypdf(input_path, output_path):
"""
Compress a PDF file using PyPDF4 by copying its contents to a new file.
Args:
input_path (str): Path to the input PDF file.
output_path (str): Path to the output compressed PDF file.
"""
if not os.path.exists(input_path):
print("Input file does not exist:", input_path)
return
print("Compressing PDF using PyPDF4")
with open(input_path, 'rb') as input_file:
reader = PyPDF4.PdfFileReader(input_file)
writer = PyPDF4.PdfFileWriter()
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
writer.addPage(page)
with open(output_path, 'wb') as output_file:
writer.write(output_file)
def rename_and_overwrite_file(old_path, new_path):
"""
Rename the new file to the old filename and overwrite it.
Args:
old_path (str): Path to the old file.
new_path (str): Path to the new file.
"""
print("Renaming", new_path, "to", old_path)
os.rename(old_path, new_path) # Rename the new file to the old filename
def get_file_size(file_path):
"""
Get the size of a file in bytes.
"""
return os.path.getsize(file_path)
# Function to measure file size
def get_file_size(file_path):
# Implementation to get file size from file_path
# Ensure to handle cases where the file does not exist or cannot be accessed
try:
file_size = os.path.getsize(file_path)
return file_size
except OSError:
print(f"Unable to get file size for {file_path}")
return None
# Function to convert bytes to appropriate units
def convert_bytes_to_human_readable(size_in_bytes):
if size_in_bytes is None:
return "Unknown"
size_kb = size_in_bytes / 1024
size_mb = size_kb / 1024
size_gb = size_mb / 1024
if size_gb >= 1:
return f"{size_gb:.2f} GB"
elif size_mb >= 1:
return f"{size_mb:.2f} MB"
elif size_kb >= 1:
return f"{size_kb:.2f} KB"
else:
return f"{size_in_bytes} bytes"
def compress_pdf_ghostscript(input_path, output_path):
"""
Compress a PDF file using ghostscript.
Args:
input_path (str): Path to the input PDF file.
output_path (str): Path to the output compressed PDF file.
"""
print("Compressing PDF using ghostscript")
# Measure input file size
input_size_before = get_file_size(input_path)
print(f"Input file size: {convert_bytes_to_human_readable(input_size_before)}")
# Command for file conversion
command = ['ps2pdf', '-dQUIET', '-dBATCH', '-sDEVICE=pdfwrite',
'-dPDFSETTINGS=/ebook',
'-dNOPAUSE',
f'-sOutputFile={output_path}',
input_path]
subprocess.run(command, check=True)
# Measure output file size
output_size_after = get_file_size(output_path)
print(f"Output file size: {convert_bytes_to_human_readable(output_size_after)}")
def main():
"""
Main function to parse command-line arguments and execute the program.
"""
parser = argparse.ArgumentParser(description="Convert a book to PDF and optionally reduce its size")
parser.add_argument('-c', '--compress', nargs='?', const='ghostscript', default='ghostscript', choices=['pypdf', 'ghostscript'], help='Compress the PDF file. Default method: ghostscript')
parser.add_argument('input_path', nargs='?', default=DEFAULT_INPUT_PATH, help='Path to the rendered book file (default: {})'.format(DEFAULT_INPUT_PATH))
parser.add_argument('output_path', nargs='?', default=DEFAULT_OUTPUT_PATH, help='Path to the output PDF file (default: {})'.format(DEFAULT_OUTPUT_PATH))
args = parser.parse_args()
quarto_pdf_render(args.input_path)
full_input_path = os.path.abspath(os.path.join('_book', args.input_path))
full_output_path = os.path.abspath(os.path.join('_book', args.output_path))
# Compress if specified
if args.compress:
print("Compressing", full_input_path, "to", full_output_path, "using", args.compress)
if args.compress == 'ghostscript':
compress_pdf_ghostscript(full_input_path, full_output_path)
elif args.compress == 'pypdf': # This option allows for future expansion
compress_pdf_pypdf(full_input_path, full_output_path)
rename_and_overwrite_file(full_output_path, full_input_path)
quarto_publish()
if __name__ == "__main__":
main()