Remove EPUB functionality from pre-commit hooks and build system

- Remove EPUB from GitHub workflow valid formats and build steps
- Remove EPUB config references from binder script
- Remove EPUB rendering and compression functions from publish script
- Remove EPUB file patterns from .gitignore
- Update README.md to remove EPUB config reference
- Update help messages to only mention HTML and PDF formats
- Tested binder commands and status functionality
This commit is contained in:
Vijay Janapa Reddi
2025-07-31 00:57:47 -04:00
parent f129b1f215
commit 243b14bc61
8 changed files with 3561 additions and 180 deletions

View File

@@ -71,7 +71,7 @@ jobs:
Write-Output "👉 Format: ${{ inputs.format }}"
Write-Output "👉 Deploy: ${{ inputs.deploy }}"
$valid_formats = @("html", "pdf", "epub", "all")
$valid_formats = @("html", "pdf", "all")
if ("${{ inputs.format }}" -notin $valid_formats) {
Write-Error "❌ Format must be one of: $($valid_formats -join ', ')"
exit 1
@@ -346,18 +346,7 @@ jobs:
echo "🔨 Building PDF..."
quarto render --to titlepage-pdf
- name: 🔨 Build EPUB (copy config and render)
if: inputs.format == 'epub' || inputs.format == 'all'
shell: bash
run: |
echo "🚀 Setting up EPUB configuration..."
cd book
rm -f _quarto.yml
cp config/_quarto-epub.yml _quarto.yml
echo "✅ Configuration set to EPUB"
echo "🔨 Building EPUB..."
quarto render --to epub
- name: 📋 Check Quarto Build Output
shell: bash
@@ -398,26 +387,7 @@ jobs:
fi
fi
# EPUB check (disabled for now)
# if [[ "${{ inputs.format }}" == "epub" || "${{ inputs.format }}" == "all" ]]; then
# if [ -d "build/epub" ]; then
# echo "✅ build/epub directory exists"
# echo "📊 Files in build/epub directory:"
# ls -la build/epub | head -n 20
# echo "📊 Total files in build/epub:"
# find build/epub -type f | wc -l
#
# if [ -f "build/epub/Machine-Learning-Systems.epub" ]; then
# echo "✅ EPUB file exists"
# echo "📊 EPUB file size:"
# du -h "build/epub/Machine-Learning-Systems.epub"
# else
# echo "⚠️ EPUB file not found!"
# fi
# else
# echo "❌ build/epub directory not found!"
# fi
# fi
- name: 📉 Compress PDF with Ghostscript (Linux)
if: runner.os == 'Linux' && (inputs.format == 'pdf' || inputs.format == 'all')
@@ -473,8 +443,7 @@ jobs:
name: build-${{ inputs.os }}-${{ inputs.format }}
path: |
build/html
build/pdf
build/epub
build/pdf
- name: 🚀 Stage to Dev Site
if: inputs.target == 'dev' && inputs.deploy == true && inputs.format == 'html' && runner.os == 'Linux'

10
.gitignore vendored
View File

@@ -67,11 +67,11 @@ book/index.pdf
Machine-Learning-Systems.tex
Machine-Learning-Systems.log
Machine-Learning-Systems.pdf
Machine-Learning-Systems.epub
# Book outputs
/*.ipynb
/*.epub
/*.pdf
# -----------------------------------------------------------------------------
@@ -79,20 +79,20 @@ Machine-Learning-Systems.epub
# -----------------------------------------------------------------------------
# Generated figures (specific to core content)
book/contents/core/**/figure-latex/
book/contents/core/**/figure-epub/
book/contents/core/**/figure-pdf/
book/contents/core/**/figure-html/
book/contents/core/**/mediabag/
# Lab-specific generated content
book/contents/labs/**/figure-latex/
book/contents/labs/**/figure-epub/
book/contents/labs/**/figure-pdf/
book/contents/labs/**/mediabag/
# Any other figure directories
**/figure-latex/
**/figure-epub/
# -----------------------------------------------------------------------------
# Local Development & Temporary Files

View File

@@ -202,7 +202,7 @@ MLSysBook/
│ ├── data/ # Cross-reference and metadata files
│ ├── _quarto-html.yml # Website build configuration
│ ├── _quarto-pdf.yml # PDF build configuration
│ ├── _quarto-epub.yml # EPUB build configuration
│ ├── _quarto.yml # Active config (symlink)
│ ├── index.qmd # Main entry point
│ └── assets/ # Images, styles, media

54
binder
View File

@@ -31,7 +31,6 @@ class BookBinder:
self.build_dir = self.root_dir / "build"
self.html_config = self.book_dir / "config" / "_quarto-html.yml"
self.pdf_config = self.book_dir / "config" / "_quarto-pdf.yml"
self.epub_config = self.book_dir / "config" / "_quarto-epub.yml"
self.active_config = self.book_dir / "_quarto.yml"
def show_banner(self):
@@ -92,7 +91,6 @@ class BookBinder:
# Check for commented lines
html_commented = 0
pdf_commented = 0
epub_commented = 0
try:
if self.html_config.exists():
@@ -108,19 +106,13 @@ class BookBinder:
except:
pass
try:
if self.epub_config.exists():
with open(self.epub_config, 'r') as f:
epub_commented = sum(1 for line in f if "FAST_BUILD_COMMENTED" in line)
except:
pass
return {
'active_config': active_config,
'html_commented': html_commented,
'pdf_commented': pdf_commented,
'epub_commented': epub_commented,
'is_clean': html_commented == 0 and pdf_commented == 0 and epub_commented == 0
'is_clean': html_commented == 0 and pdf_commented == 0
}
def show_status(self):
@@ -136,7 +128,7 @@ class BookBinder:
if status['is_clean']:
table.add_row("✅ State", "[green]Configs are clean[/green]")
else:
table.add_row("⚠️ State", f"[yellow]{status['html_commented'] + status['pdf_commented'] + status['epub_commented']} commented lines[/yellow]")
table.add_row("⚠️ State", f"[yellow]{status['html_commented'] + status['pdf_commented']} commented lines[/yellow]")
console.print(Panel(table, border_style="green"))
@@ -511,8 +503,7 @@ class BookBinder:
config_file = "config/_quarto-html.yml"
elif format_type == "pdf":
config_file = "config/_quarto-pdf.yml"
elif format_type == "epub":
config_file = "config/_quarto-epub.yml"
else:
raise ValueError(f"Unknown format type: {format_type}")
@@ -565,10 +556,7 @@ class BookBinder:
config_file = self.pdf_config
format_arg = "titlepage-pdf"
build_subdir = "pdf"
elif format_type == "epub":
config_file = self.epub_config
format_arg = "epub"
build_subdir = "epub"
else:
raise ValueError(f"Unknown format type: {format_type}")
@@ -659,10 +647,7 @@ class BookBinder:
config_file = self.pdf_config
format_arg = "titlepage-pdf"
build_subdir = "pdf"
elif format_type == "epub":
config_file = self.epub_config
format_arg = "epub"
build_subdir = "epub"
else:
raise ValueError(f"Unknown format type: {format_type}")
@@ -797,9 +782,7 @@ class BookBinder:
pdf_config = self.book_dir / "config" / "_quarto-pdf.yml"
self.ensure_clean_config(pdf_config)
# Restore EPUB config
epub_config = self.book_dir / "config" / "_quarto-epub.yml"
self.ensure_clean_config(epub_config)
# Show current symlink status
symlink_path = self.book_dir / "_quarto.yml"
@@ -862,7 +845,7 @@ class BookBinder:
(self.book_dir / ".quarto", "Quarto cache"),
(self.book_dir / "config" / "_quarto-html.yml.fast-build-backup", "HTML config backup"),
(self.book_dir / "config" / "_quarto-pdf.yml.fast-build-backup", "PDF config backup"),
(self.book_dir / "config" / "_quarto-epub.yml.fast-build-backup", "EPUB config backup"),
]
for path, description in potential_artifacts:
@@ -881,8 +864,8 @@ class BookBinder:
def switch(self, format_type):
"""Switch configuration format"""
if format_type not in ["html", "pdf", "epub"]:
console.print("[red]❌ Format must be 'html', 'pdf', or 'epub'[/red]")
if format_type not in ["html", "pdf"]:
console.print("[red]❌ Format must be 'html' or 'pdf'[/red]")
return False
console.print(f"[blue]🔗 Switching to {format_type} config...[/blue]")
@@ -911,8 +894,7 @@ class BookBinder:
render_to = "html"
elif format_type == "pdf":
render_to = "titlepage-pdf"
elif format_type == "epub":
render_to = "epub"
else:
raise ValueError(f"Unknown format type: {format_type}")
@@ -1070,8 +1052,8 @@ def main():
return
chapters = sys.argv[2]
format_type = sys.argv[3]
if format_type not in ["html", "pdf", "epub"]:
console.print("[red]❌ Format must be 'html', 'pdf', or 'epub'[/red]")
if format_type not in ["html", "pdf"]:
console.print("[red]❌ Format must be 'html' or 'pdf'[/red]")
return
if chapters == "-" or chapters == "all":
@@ -1090,15 +1072,15 @@ def main():
elif command == "build-full":
format_type = sys.argv[2] if len(sys.argv) > 2 else "html"
if format_type not in ["html", "pdf", "epub"]:
console.print("[red]❌ Format must be 'html', 'pdf', or 'epub'[/red]")
if format_type not in ["html", "pdf"]:
console.print("[red]❌ Format must be 'html' or 'pdf'[/red]")
return
binder.build_full(format_type)
elif command == "preview-full":
format_type = sys.argv[2] if len(sys.argv) > 2 else "html"
if format_type not in ["html", "pdf", "epub"]:
console.print("[red]❌ Format must be 'html', 'pdf', or 'epub'[/red]")
if format_type not in ["html", "pdf"]:
console.print("[red]❌ Format must be 'html' or 'pdf'[/red]")
return
binder.preview_full(format_type)
@@ -1112,7 +1094,7 @@ def main():
elif command == "switch":
if len(sys.argv) < 3:
console.print("[red]❌ Usage: ./binder switch <html|pdf|epub>[/red]")
console.print("[red]❌ Usage: ./binder switch <html|pdf>[/red]")
return
format_type = sys.argv[2]
binder.switch(format_type)

View File

@@ -286,24 +286,24 @@ website:
# <!------------------------------------------------->
# <!-- Backmatter -->
# <!------------------------------------------------->
- text: "---"
- text: "---"
- section: "Resources"
id: resources
collapsed: true
contents:
- text: "PhD Survival Guide"
href: contents/backmatter/resources/phd_survival_guide.qmd
- section: "Resources"
id: resources
collapsed: true
contents:
- text: "PhD Survival Guide"
href: contents/backmatter/resources/phd_survival_guide.qmd
- text: "---"
- text: "---"
- section: "References"
id: references
collapsed: true
contents:
- text: "Complete Bibliography"
href: contents/backmatter/references.qmd
- section: "References"
id: references
collapsed: true
contents:
- text: "Complete Bibliography"
href: contents/backmatter/references.qmd
repo-url: https://github.com/harvard-edge/cs249r_book
repo-branch: widget_quiz
@@ -346,9 +346,9 @@ bibliography:
- contents/core/workflow/workflow.bib
- contents/core/conclusion/conclusion.bib
comments:
giscus:
repo: harvard-edge/cs249r_book
#comments:
# giscus:
# repo: harvard-edge/cs249r_book
crossref:
appendix-title: "Appendix"
@@ -361,11 +361,13 @@ crossref:
reference-prefix: Video
filters:
- pandoc-ext/diagram
- ../config/lua/sidenote.lua
- ../config/lua/inject-parts.lua
- ../config/lua/inject_quizzes.lua
- pandoc-ext/diagram
#- ../config/lua/inject_crossrefs.lua # This must come before custom-numbered-blocks (relies on \ref{...})
- custom-numbered-blocks
- ../config/lua/margin-connections.lua # This filter must come after custom-numbered-blocks
- ../config/lua/margin-connections.lua # This filter must come after custom-numbered-blocks
# Filter configurations and metadata
filter-metadata:

View File

@@ -244,7 +244,7 @@ filters:
- ../config/lua/inject-parts.lua
- ../config/lua/inject_quizzes.lua
- pandoc-ext/diagram
- ../config/lua/inject_crossrefs.lua # This must come before custom-numbered-blocks (relies on \ref{...})
#- ../config/lua/inject_crossrefs.lua # This must come before custom-numbered-blocks (relies on \ref{...})
- custom-numbered-blocks
- ../config/lua/margin-connections.lua # This filter must come after custom-numbered-blocks

File diff suppressed because it is too large Load Diff

View File

@@ -18,82 +18,9 @@ def compress_image(image_path, quality=DEFAULT_COMPRESSION_QUALITY):
except Exception as e:
print(f"Error compressing image {image_path}: {e}")
def compress_images_in_epub(epub_file, quality=DEFAULT_COMPRESSION_QUALITY):
temp_dir = tempfile.mkdtemp()
output_path = os.path.join(temp_dir, os.path.basename(epub_file))
# Extract ePub contents
with ZipFile(epub_file, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
# Locate image files
image_files = []
for root, dirs, files in os.walk(temp_dir):
for file in files:
if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
image_files.append(os.path.join(root, file))
# Measure original file size
total_original_size = sum(os.path.getsize(file) for file in image_files)
# Compress images
total_compressed_size = 0
for file in image_files:
original_size = os.path.getsize(file)
compress_image(file, quality)
compressed_size = os.path.getsize(file)
total_compressed_size += compressed_size
print(f"Compressed {file}: {convert_bytes_to_human_readable(original_size)} -> {convert_bytes_to_human_readable(compressed_size)}")
print(f"Original total size: {convert_bytes_to_human_readable(total_original_size)}")
print(f"Compressed total size: {convert_bytes_to_human_readable(total_compressed_size)}")
# Repackage ePub file
with ZipFile(output_path, 'w') as zip_ref:
for root, dirs, files in os.walk(temp_dir):
for file in files:
file_path = os.path.join(root, file)
zip_ref.write(file_path, os.path.relpath(file_path, temp_dir))
return output_path
def quarto_epub_render():
"""
Install Quarto's TinyTeX and render the book to ePub.
Returns:
str: Path to the generated ePub file.
"""
print("Rendering book to ePub...")
try:
process = subprocess.run(['quarto', 'render', '--no-clean', '--to', 'epub'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True)
except subprocess.CalledProcessError as e:
print("Error:", e)
sys.exit(1)
epub_path = None
output_lines = process.stdout.splitlines()
for line in output_lines:
match = re.search(r'Output created: (.+\.epub)', line)
if match:
epub_path = match.group(1)
break
if not epub_path:
output_lines_err = process.stderr.splitlines()
for line in output_lines_err:
match = re.search(r'Output created: (.+\.epub)', line)
if match:
epub_path = match.group(1)
break
if not epub_path:
print("Error: ePub file path not found.")
sys.exit(1)
print(f"Quarto render process return value: {process.returncode}")
return epub_path
def quarto_pdf_render():
"""
@@ -209,16 +136,14 @@ def main():
"""
Main function to parse command-line arguments and execute the program.
"""
parser = argparse.ArgumentParser(description="Convert a book to PDF/ePub and optionally reduce its size")
parser.add_argument('--compress', action='store_true', default=True, help='Compress the ePub file (default: %(default)s)')
parser = argparse.ArgumentParser(description="Convert a book to PDF and optionally reduce its size")
parser.add_argument('--compress', action='store_true', default=True, help='Compress the PDF file (default: %(default)s)')
parser.add_argument('--quality', type=int, default=DEFAULT_COMPRESSION_QUALITY, help='Compression quality (default: %(default)s)')
parser.add_argument('--pdf', action='store_true', default=True, help='Render to PDF (default: %(default)s)')
parser.add_argument('--epub', action='store_true', default=True, help='Render to ePub (default: %(default)s)')
parser.add_argument('--publish', action='store_true', default=True, help='Publish to gh-pages (default: %(default)s)')
parser.add_argument('--html', action='store_true', default=True, help='Build HTML (default: %(default)s)')
parser.add_argument('--no-pdf', dest='pdf', action='store_false', help="Don't render to PDF")
parser.add_argument('--no-html', dest='html', action='store_false', help="Don't render to HTML")
parser.add_argument('--no-epub', dest='epub', action='store_false', help="Don't render to ePub")
parser.add_argument('--no-publish', dest='publish', action='store_false', help="Don't publish")
args = parser.parse_args()
@@ -238,11 +163,7 @@ def main():
else:
print(f"Output saved to {output_pdf_path}")
if args.epub:
output_epub_path = quarto_epub_render()
output_epub_temp_path = compress_images_in_epub(output_epub_path, args.quality)
shutil.move(output_epub_temp_path, output_epub_path)
print(f"Compression of {output_epub_path} completed. Output saved to {output_epub_path}")
if args.html:
quarto_render_html()