mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-01 01:59:10 -05:00
155 lines
5.3 KiB
Python
155 lines
5.3 KiB
Python
import os
|
|
import subprocess
|
|
import argparse
|
|
import PyPDF4
|
|
import sys
|
|
from PIL import Image
|
|
import io
|
|
import ghostscript # Ensure ghostscript is installed and available
|
|
|
|
# Default input and output paths
|
|
DEFAULT_INPUT_PATH = 'Machine-Learning-Systems.pdf'
|
|
DEFAULT_OUTPUT_PATH = 'Machine-Learning-Systems_output.pdf' # Overwrite the file!
|
|
|
|
def quarto_pdf_render(output_path):
|
|
"""
|
|
Install Quarto's TinyTeX and render the book to PDF.
|
|
"""
|
|
print("Installing Quarto TinyTeX")
|
|
subprocess.run(['quarto', 'install', 'tinytex'])
|
|
process = subprocess.run(['quarto', 'render', '--output', output_path, '--to', 'pdf'], check=True)
|
|
|
|
print(f"Quarto render process return value: {process.returncode}")
|
|
|
|
def quarto_publish():
|
|
"""
|
|
Publish the rendered book using Quarto.
|
|
"""
|
|
print("Publishing the rendered book using Quarto")
|
|
process = subprocess.run(['quarto', 'publish', '--no-render', 'gh-pages'], check=True)
|
|
|
|
def compress_pdf_pypdf(input_path, output_path):
|
|
"""
|
|
Compress a PDF file using PyPDF4 by copying its contents to a new file.
|
|
|
|
Args:
|
|
input_path (str): Path to the input PDF file.
|
|
output_path (str): Path to the output compressed PDF file.
|
|
"""
|
|
if not os.path.exists(input_path):
|
|
print("Input file does not exist:", input_path)
|
|
return
|
|
|
|
print("Compressing PDF using PyPDF4")
|
|
with open(input_path, 'rb') as input_file:
|
|
reader = PyPDF4.PdfFileReader(input_file)
|
|
writer = PyPDF4.PdfFileWriter()
|
|
for page_num in range(len(reader.pages)):
|
|
page = reader.pages[page_num]
|
|
writer.addPage(page)
|
|
with open(output_path, 'wb') as output_file:
|
|
writer.write(output_file)
|
|
|
|
def rename_and_overwrite_file(old_path, new_path):
|
|
"""
|
|
Rename the new file to the old filename and overwrite it.
|
|
|
|
Args:
|
|
old_path (str): Path to the old file.
|
|
new_path (str): Path to the new file.
|
|
"""
|
|
print("Renaming", new_path, "to", old_path)
|
|
os.rename(old_path, new_path) # Rename the new file to the old filename
|
|
|
|
def get_file_size(file_path):
|
|
"""
|
|
Get the size of a file in bytes.
|
|
"""
|
|
return os.path.getsize(file_path)
|
|
|
|
# Function to measure file size
|
|
def get_file_size(file_path):
|
|
# Implementation to get file size from file_path
|
|
# Ensure to handle cases where the file does not exist or cannot be accessed
|
|
try:
|
|
file_size = os.path.getsize(file_path)
|
|
return file_size
|
|
except OSError:
|
|
print(f"Unable to get file size for {file_path}")
|
|
return None
|
|
|
|
# Function to convert bytes to appropriate units
|
|
def convert_bytes_to_human_readable(size_in_bytes):
|
|
if size_in_bytes is None:
|
|
return "Unknown"
|
|
|
|
size_kb = size_in_bytes / 1024
|
|
size_mb = size_kb / 1024
|
|
size_gb = size_mb / 1024
|
|
|
|
if size_gb >= 1:
|
|
return f"{size_gb:.2f} GB"
|
|
elif size_mb >= 1:
|
|
return f"{size_mb:.2f} MB"
|
|
elif size_kb >= 1:
|
|
return f"{size_kb:.2f} KB"
|
|
else:
|
|
return f"{size_in_bytes} bytes"
|
|
|
|
def compress_pdf_ghostscript(input_path, output_path):
|
|
"""
|
|
Compress a PDF file using ghostscript.
|
|
|
|
Args:
|
|
input_path (str): Path to the input PDF file.
|
|
output_path (str): Path to the output compressed PDF file.
|
|
"""
|
|
print("Compressing PDF using ghostscript")
|
|
|
|
# Measure input file size
|
|
input_size_before = get_file_size(input_path)
|
|
print(f"Input file size: {convert_bytes_to_human_readable(input_size_before)}")
|
|
|
|
# Command for file conversion
|
|
command = ['ps2pdf', '-dQUIET', '-dBATCH', '-sDEVICE=pdfwrite',
|
|
'-dPDFSETTINGS=/ebook',
|
|
'-dNOPAUSE',
|
|
f'-sOutputFile={output_path}',
|
|
input_path]
|
|
|
|
subprocess.run(command, check=True)
|
|
|
|
# Measure output file size
|
|
output_size_after = get_file_size(output_path)
|
|
print(f"Output file size: {convert_bytes_to_human_readable(output_size_after)}")
|
|
|
|
def main():
|
|
"""
|
|
Main function to parse command-line arguments and execute the program.
|
|
"""
|
|
parser = argparse.ArgumentParser(description="Convert a book to PDF and optionally reduce its size")
|
|
parser.add_argument('-c', '--compress', nargs='?', const='ghostscript', default='ghostscript', choices=['pypdf', 'ghostscript'], help='Compress the PDF file. Default method: ghostscript')
|
|
parser.add_argument('input_path', nargs='?', default=DEFAULT_INPUT_PATH, help='Path to the rendered book file (default: {})'.format(DEFAULT_INPUT_PATH))
|
|
parser.add_argument('output_path', nargs='?', default=DEFAULT_OUTPUT_PATH, help='Path to the output PDF file (default: {})'.format(DEFAULT_OUTPUT_PATH))
|
|
args = parser.parse_args()
|
|
|
|
quarto_pdf_render(args.input_path)
|
|
|
|
full_input_path = os.path.abspath(os.path.join('_book', args.input_path))
|
|
full_output_path = os.path.abspath(os.path.join('_book', args.output_path))
|
|
|
|
# Compress if specified
|
|
if args.compress:
|
|
print("Compressing", full_input_path, "to", full_output_path, "using", args.compress)
|
|
if args.compress == 'ghostscript':
|
|
compress_pdf_ghostscript(full_input_path, full_output_path)
|
|
elif args.compress == 'pypdf': # This option allows for future expansion
|
|
compress_pdf_pypdf(full_input_path, full_output_path)
|
|
|
|
rename_and_overwrite_file(full_output_path, full_input_path)
|
|
|
|
quarto_publish()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|