mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2025-12-05 19:17:28 -06:00
Compare commits
12 Commits
75785d63a6
...
7ab9c684d4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7ab9c684d4 | ||
|
|
8bbc76d5dd | ||
|
|
ab6a836c4a | ||
|
|
aa65ce98d5 | ||
|
|
d78a588205 | ||
|
|
1f3be88a56 | ||
|
|
d4f42722d9 | ||
|
|
b01474f192 | ||
|
|
e55363d316 | ||
|
|
31883f4fe3 | ||
|
|
d85278f87e | ||
|
|
040b95ef00 |
41
cli/main.py
41
cli/main.py
@@ -76,8 +76,8 @@ class MLSysBookCLI:
|
||||
fast_table.add_row("build [chapter[,ch2,...]]", "Build static files to disk (HTML)", "./binder build intro,ops")
|
||||
fast_table.add_row("html [chapter[,ch2,...]]", "Build HTML using quarto-html.yml", "./binder html intro")
|
||||
fast_table.add_row("preview [chapter[,ch2,...]]", "Start live dev server with hot reload", "./binder preview intro")
|
||||
fast_table.add_row("pdf [chapter[,ch2,...]]", "Build PDF (only specified chapters)", "./binder pdf intro")
|
||||
fast_table.add_row("epub [chapter[,ch2,...]]", "Build EPUB (only specified chapters)", "./binder epub intro")
|
||||
fast_table.add_row("pdf [chapter[,ch2,...]]", "Build PDF (specified chapters)", "./binder pdf intro")
|
||||
fast_table.add_row("epub [chapter[,ch2,...]]", "Build EPUB (specified chapters)", "./binder epub intro")
|
||||
|
||||
# Full Book Commands
|
||||
full_table = Table(show_header=True, header_style="bold blue", box=None)
|
||||
@@ -86,10 +86,10 @@ class MLSysBookCLI:
|
||||
full_table.add_column("Example", style="dim", width=30)
|
||||
|
||||
full_table.add_row("build", "Build entire book as static HTML", "./binder build")
|
||||
full_table.add_row("html", "Build ALL chapters using quarto-html.yml", "./binder html")
|
||||
full_table.add_row("html --all", "Build ALL chapters using quarto-html.yml", "./binder html --all")
|
||||
full_table.add_row("preview", "Start live dev server for entire book", "./binder preview")
|
||||
full_table.add_row("pdf", "Build full book (auto-uncomments all chapters)", "./binder pdf")
|
||||
full_table.add_row("epub", "Build full book (auto-uncomments all chapters)", "./binder epub")
|
||||
full_table.add_row("pdf --all", "Build full book (auto-uncomments all)", "./binder pdf --all")
|
||||
full_table.add_row("epub --all", "Build full book (auto-uncomments all)", "./binder epub --all")
|
||||
|
||||
# Management Commands
|
||||
mgmt_table = Table(show_header=True, header_style="bold blue", box=None)
|
||||
@@ -119,11 +119,11 @@ class MLSysBookCLI:
|
||||
examples.append("# Build multiple chapters (HTML)\n", style="dim")
|
||||
examples.append(" ./binder html intro ", style="cyan")
|
||||
examples.append("# Build HTML with index.qmd + intro chapter only\n", style="dim")
|
||||
examples.append(" ./binder html ", style="cyan")
|
||||
examples.append(" ./binder html --all ", style="cyan")
|
||||
examples.append("# Build HTML with ALL chapters\n", style="dim")
|
||||
examples.append(" ./binder pdf intro ", style="cyan")
|
||||
examples.append("# Build single chapter as PDF\n", style="dim")
|
||||
examples.append(" ./binder pdf ", style="cyan")
|
||||
examples.append(" ./binder pdf --all ", style="cyan")
|
||||
examples.append("# Build entire book as PDF (uncomments all)\n", style="dim")
|
||||
|
||||
console.print(Panel(examples, title="💡 Pro Tips", border_style="magenta"))
|
||||
@@ -158,9 +158,14 @@ class MLSysBookCLI:
|
||||
def handle_html_command(self, args):
|
||||
"""Handle HTML build command."""
|
||||
self.config_manager.show_symlink_status()
|
||||
|
||||
|
||||
if len(args) < 1:
|
||||
# No chapters specified - build all chapters using HTML config
|
||||
# No target specified - show error
|
||||
console.print("[red]❌ Error: Please specify chapters or use --all flag[/red]")
|
||||
console.print("[yellow]💡 Usage: ./binder html <chapter> or ./binder html --all[/yellow]")
|
||||
return False
|
||||
elif args[0] == "--all":
|
||||
# Build all chapters using HTML config
|
||||
console.print("[green]🌐 Building HTML with ALL chapters...[/green]")
|
||||
return self.build_command.build_html_only()
|
||||
else:
|
||||
@@ -173,9 +178,14 @@ class MLSysBookCLI:
|
||||
def handle_pdf_command(self, args):
|
||||
"""Handle PDF build command."""
|
||||
self.config_manager.show_symlink_status()
|
||||
|
||||
|
||||
if len(args) < 1:
|
||||
# No target specified - build entire book
|
||||
# No target specified - show error
|
||||
console.print("[red]❌ Error: Please specify chapters or use --all flag[/red]")
|
||||
console.print("[yellow]💡 Usage: ./binder pdf <chapter> or ./binder pdf --all[/yellow]")
|
||||
return False
|
||||
elif args[0] == "--all":
|
||||
# Build entire book
|
||||
console.print("[red]📄 Building entire book (PDF)...[/red]")
|
||||
return self.build_command.build_full("pdf")
|
||||
else:
|
||||
@@ -188,9 +198,14 @@ class MLSysBookCLI:
|
||||
def handle_epub_command(self, args):
|
||||
"""Handle EPUB build command."""
|
||||
self.config_manager.show_symlink_status()
|
||||
|
||||
|
||||
if len(args) < 1:
|
||||
# No target specified - build entire book
|
||||
# No target specified - show error
|
||||
console.print("[red]❌ Error: Please specify chapters or use --all flag[/red]")
|
||||
console.print("[yellow]💡 Usage: ./binder epub <chapter> or ./binder epub --all[/yellow]")
|
||||
return False
|
||||
elif args[0] == "--all":
|
||||
# Build entire book
|
||||
console.print("[purple]📚 Building entire book (EPUB)...[/purple]")
|
||||
return self.build_command.build_full("epub")
|
||||
else:
|
||||
|
||||
@@ -8,21 +8,28 @@
|
||||
*/
|
||||
|
||||
/* ==========================================================================
|
||||
Color Variables - Harvard Crimson Theme
|
||||
Color Values - Harvard Crimson Theme
|
||||
========================================================================== */
|
||||
|
||||
:root {
|
||||
--crimson: #A51C30;
|
||||
--crimson-dark: #8B1729;
|
||||
--crimson-light: #C5344A;
|
||||
--text-primary: #1a202c;
|
||||
--text-secondary: #4a5568;
|
||||
--text-muted: #6c757d;
|
||||
--background-light: #f8f9fa;
|
||||
--background-code: #f1f3f4;
|
||||
--border-light: #e9ecef;
|
||||
--border-medium: #dee2e6;
|
||||
}
|
||||
/*
|
||||
* NOTE: CSS custom properties (variables like --crimson, --text-primary, etc.)
|
||||
* have been replaced with literal hex values throughout this stylesheet.
|
||||
*
|
||||
* REASON: Some EPUB readers (e.g., ClearView) have strict XML parsers that flag
|
||||
* double-hyphens in CSS as XML comment violations, causing parsing errors.
|
||||
*
|
||||
* Color reference for maintenance:
|
||||
* - Crimson: #A51C30
|
||||
* - Crimson Dark: #8B1729
|
||||
* - Crimson Light: #C5344A
|
||||
* - Text Primary: #1a202c
|
||||
* - Text Secondary: #4a5568
|
||||
* - Text Muted: #6c757d
|
||||
* - Background Light: #f8f9fa
|
||||
* - Background Code: #f1f3f4
|
||||
* - Border Light: #e9ecef
|
||||
* - Border Medium: #dee2e6
|
||||
*/
|
||||
|
||||
/* ==========================================================================
|
||||
Base & Typography
|
||||
@@ -37,7 +44,7 @@ body {
|
||||
text-align: justify;
|
||||
widows: 3;
|
||||
orphans: 3;
|
||||
color: var(--text-primary);
|
||||
color: #1a202c;
|
||||
}
|
||||
|
||||
p {
|
||||
@@ -55,21 +62,21 @@ h1, h2, h3, h4, h5, h6 {
|
||||
text-align: left;
|
||||
page-break-after: avoid;
|
||||
page-break-inside: avoid;
|
||||
color: var(--text-primary);
|
||||
color: #1a202c;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2.2em;
|
||||
margin-top: 0;
|
||||
page-break-before: always;
|
||||
border-bottom: 2px solid var(--crimson);
|
||||
border-bottom: 2px solid #A51C30;
|
||||
padding-bottom: 0.3em;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 1.8em;
|
||||
border-left: 5px solid var(--crimson);
|
||||
border-left: 5px solid #A51C30;
|
||||
border-bottom: 1px solid rgba(165, 28, 48, 0.3);
|
||||
padding-left: 16px;
|
||||
padding-bottom: 8px;
|
||||
@@ -79,7 +86,7 @@ h2 {
|
||||
|
||||
h3 {
|
||||
font-size: 1.5em;
|
||||
border-left: 4px solid var(--crimson);
|
||||
border-left: 4px solid #A51C30;
|
||||
border-bottom: 1px solid rgba(165, 28, 48, 0.25);
|
||||
padding-left: 14px;
|
||||
padding-bottom: 6px;
|
||||
@@ -88,7 +95,7 @@ h3 {
|
||||
|
||||
h4 {
|
||||
font-size: 1.2em;
|
||||
border-left: 3px solid var(--crimson);
|
||||
border-left: 3px solid #A51C30;
|
||||
border-bottom: 1px solid rgba(165, 28, 48, 0.2);
|
||||
padding-left: 12px;
|
||||
padding-bottom: 4px;
|
||||
@@ -98,7 +105,7 @@ h4 {
|
||||
|
||||
h5 {
|
||||
font-size: 1.1em;
|
||||
border-left: 2px solid var(--crimson);
|
||||
border-left: 2px solid #A51C30;
|
||||
border-bottom: 1px solid rgba(165, 28, 48, 0.15);
|
||||
padding-left: 10px;
|
||||
padding-bottom: 3px;
|
||||
@@ -108,7 +115,7 @@ h5 {
|
||||
|
||||
h6 {
|
||||
font-size: 1em;
|
||||
border-left: 1px solid var(--crimson);
|
||||
border-left: 1px solid #A51C30;
|
||||
border-bottom: 1px solid rgba(165, 28, 48, 0.1);
|
||||
padding-left: 8px;
|
||||
padding-bottom: 2px;
|
||||
@@ -121,25 +128,25 @@ h6 {
|
||||
========================================================================== */
|
||||
|
||||
a {
|
||||
color: var(--crimson);
|
||||
color: #A51C30;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: var(--crimson-dark);
|
||||
color: #8B1729;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
a:visited {
|
||||
color: var(--crimson-dark);
|
||||
color: #8B1729;
|
||||
}
|
||||
|
||||
blockquote {
|
||||
margin: 1.5em;
|
||||
padding: 0 1.5em;
|
||||
border-left: 3px solid var(--crimson);
|
||||
border-left: 3px solid #A51C30;
|
||||
font-style: italic;
|
||||
color: var(--text-secondary);
|
||||
color: #4a5568;
|
||||
background-color: rgba(165, 28, 48, 0.05);
|
||||
border-radius: 0 4px 4px 0;
|
||||
}
|
||||
@@ -150,8 +157,8 @@ blockquote {
|
||||
|
||||
/* Enhanced code blocks with syntax highlighting support */
|
||||
pre {
|
||||
background-color: var(--background-code);
|
||||
border: 1px solid var(--border-light);
|
||||
background-color: #f1f3f4;
|
||||
border: 1px solid #e9ecef;
|
||||
border-radius: 6px;
|
||||
padding: 0.75em;
|
||||
white-space: pre-wrap;
|
||||
@@ -166,11 +173,11 @@ pre {
|
||||
|
||||
code {
|
||||
font-family: "SF Mono", Monaco, "Cascadia Code", "Roboto Mono", Consolas, "Courier New", monospace;
|
||||
background-color: var(--border-light);
|
||||
background-color: #e9ecef;
|
||||
padding: 2px 6px;
|
||||
border-radius: 4px;
|
||||
font-size: 0.85em;
|
||||
color: var(--text-primary);
|
||||
color: #1a202c;
|
||||
}
|
||||
|
||||
pre code {
|
||||
@@ -198,7 +205,7 @@ pre code {
|
||||
/* Code listings with enhanced styling */
|
||||
.listing {
|
||||
margin: 1rem 0;
|
||||
border: 2px solid var(--border-medium);
|
||||
border: 2px solid #dee2e6;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
background: linear-gradient(135deg, #f8f9fa 0%, #ffffff 100%);
|
||||
@@ -207,11 +214,11 @@ pre code {
|
||||
.listing figcaption,
|
||||
.listing .listing-caption {
|
||||
background: linear-gradient(135deg, #f7f9fc 0%, #edf2f7 100%);
|
||||
border-bottom: 2px solid var(--border-medium);
|
||||
border-bottom: 2px solid #dee2e6;
|
||||
padding: 1rem 1.25rem;
|
||||
margin: 0;
|
||||
font-size: 0.9rem;
|
||||
color: var(--text-primary);
|
||||
color: #1a202c;
|
||||
font-weight: 600;
|
||||
line-height: 1.4;
|
||||
text-align: left;
|
||||
@@ -219,7 +226,7 @@ pre code {
|
||||
|
||||
.listing .sourceCode {
|
||||
padding: 0.5rem 1rem;
|
||||
background-color: var(--background-code);
|
||||
background-color: #f1f3f4;
|
||||
margin: 0;
|
||||
border: none;
|
||||
}
|
||||
@@ -258,17 +265,17 @@ table {
|
||||
}
|
||||
|
||||
th, td {
|
||||
border: 1px solid var(--border-light);
|
||||
border: 1px solid #e9ecef;
|
||||
padding: 12px 16px;
|
||||
text-align: left;
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
th {
|
||||
background-color: var(--background-light);
|
||||
background-color: #f8f9fa;
|
||||
font-weight: 600;
|
||||
text-align: left;
|
||||
border-bottom: 2px solid var(--crimson);
|
||||
border-bottom: 2px solid #A51C30;
|
||||
font-size: 0.85rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.3px;
|
||||
@@ -278,7 +285,7 @@ th {
|
||||
/* Special treatment for technology comparison headers */
|
||||
th:not(:first-child) {
|
||||
background-color: rgba(165, 28, 48, 0.04);
|
||||
border-bottom: 3px solid var(--crimson);
|
||||
border-bottom: 3px solid #A51C30;
|
||||
font-weight: 600;
|
||||
color: #2c3e50;
|
||||
}
|
||||
@@ -295,8 +302,8 @@ td:first-child,
|
||||
th:first-child {
|
||||
font-weight: 500;
|
||||
color: #2c3e50;
|
||||
background-color: var(--background-light);
|
||||
border-right: 1px solid var(--border-light);
|
||||
background-color: #f8f9fa;
|
||||
border-right: 1px solid #e9ecef;
|
||||
}
|
||||
|
||||
/* Zebra striping for better readability */
|
||||
@@ -317,7 +324,7 @@ table caption,
|
||||
font-weight: 500;
|
||||
margin-bottom: 0.75rem;
|
||||
margin-top: 1.5rem;
|
||||
color: var(--text-secondary);
|
||||
color: #4a5568;
|
||||
font-size: 0.9rem;
|
||||
line-height: 1.4;
|
||||
}
|
||||
@@ -355,7 +362,7 @@ h1[epub|type="title"],
|
||||
#titlepage h1 {
|
||||
font-size: 2.5em;
|
||||
font-weight: 700;
|
||||
color: var(--crimson);
|
||||
color: #A51C30;
|
||||
margin-bottom: 0.5rem;
|
||||
line-height: 1.2;
|
||||
text-align: center;
|
||||
@@ -369,7 +376,7 @@ h2[epub|type="subtitle"],
|
||||
#titlepage h2 {
|
||||
font-size: 1.4em;
|
||||
font-weight: 400;
|
||||
color: var(--text-secondary);
|
||||
color: #4a5568;
|
||||
margin-bottom: 2rem;
|
||||
font-style: italic;
|
||||
line-height: 1.3;
|
||||
@@ -386,7 +393,7 @@ p[epub|type="author"],
|
||||
#titlepage p:contains("Prof.") {
|
||||
font-size: 1.2em;
|
||||
font-weight: 500;
|
||||
color: var(--text-primary);
|
||||
color: #1a202c;
|
||||
margin: 1.5rem 0;
|
||||
text-align: center;
|
||||
}
|
||||
@@ -397,7 +404,7 @@ p[epub|type="author"],
|
||||
#titlepage .publisher,
|
||||
#titlepage .affiliation {
|
||||
font-size: 1em;
|
||||
color: var(--text-secondary);
|
||||
color: #4a5568;
|
||||
margin: 0.5rem 0;
|
||||
text-align: center;
|
||||
}
|
||||
@@ -406,7 +413,7 @@ p[epub|type="author"],
|
||||
.title-page .date,
|
||||
#titlepage .date {
|
||||
font-size: 0.9em;
|
||||
color: var(--text-muted);
|
||||
color: #6c757d;
|
||||
margin-top: 2rem;
|
||||
text-align: center;
|
||||
}
|
||||
@@ -417,7 +424,7 @@ p[epub|type="author"],
|
||||
#titlepage .rights,
|
||||
#titlepage .copyright {
|
||||
font-size: 0.8em;
|
||||
color: var(--text-muted);
|
||||
color: #6c757d;
|
||||
margin-top: auto;
|
||||
text-align: center;
|
||||
padding-top: 2rem;
|
||||
@@ -435,7 +442,7 @@ details[class*="callout"] {
|
||||
border-radius: 0.5rem;
|
||||
border-left-width: 5px;
|
||||
border-left-style: solid;
|
||||
border: 1px solid var(--border-light);
|
||||
border: 1px solid #e9ecef;
|
||||
font-size: 0.9rem;
|
||||
box-shadow: 0 2px 8px rgba(165, 28, 48, 0.1);
|
||||
page-break-inside: avoid;
|
||||
@@ -494,7 +501,7 @@ details[class*="callout"] > summary {
|
||||
}
|
||||
|
||||
.callout-important {
|
||||
border-left-color: var(--crimson);
|
||||
border-left-color: #A51C30;
|
||||
}
|
||||
|
||||
.callout-important .callout-header {
|
||||
@@ -524,7 +531,7 @@ details.callout-definition {
|
||||
border-radius: 0.5rem;
|
||||
border-left-width: 5px;
|
||||
border-left-style: solid;
|
||||
border: 1px solid var(--border-light);
|
||||
border: 1px solid #e9ecef;
|
||||
border-left: 5px solid #1B4F72;
|
||||
}
|
||||
|
||||
@@ -553,7 +560,7 @@ details.callout-example {
|
||||
border-radius: 0.5rem;
|
||||
border-left-width: 5px;
|
||||
border-left-style: solid;
|
||||
border: 1px solid var(--border-light);
|
||||
border: 1px solid #e9ecef;
|
||||
border-left: 5px solid #148F77;
|
||||
}
|
||||
|
||||
@@ -610,7 +617,7 @@ details.callout-quiz-question {
|
||||
border-radius: 0.5rem;
|
||||
border-left-width: 5px;
|
||||
border-left-style: solid;
|
||||
border: 1px solid var(--border-light);
|
||||
border: 1px solid #e9ecef;
|
||||
border-left: 5px solid #5B4B8A;
|
||||
}
|
||||
|
||||
@@ -638,7 +645,7 @@ details.callout-quiz-answer {
|
||||
border-radius: 0.5rem;
|
||||
border-left-width: 5px;
|
||||
border-left-style: solid;
|
||||
border: 1px solid var(--border-light);
|
||||
border: 1px solid #e9ecef;
|
||||
border-left: 5px solid #4a7c59;
|
||||
}
|
||||
|
||||
@@ -662,15 +669,15 @@ div.callout-chapter-forward,
|
||||
.callout-chapter-forward,
|
||||
details.callout-chapter-connection,
|
||||
details.callout-chapter-forward {
|
||||
border-left-color: var(--crimson);
|
||||
border-left-color: #A51C30;
|
||||
background-color: rgba(165, 28, 48, 0.05);
|
||||
margin: 1.25rem 0;
|
||||
padding: 0.75rem 0.85rem;
|
||||
border-radius: 0.5rem;
|
||||
border-left-width: 5px;
|
||||
border-left-style: solid;
|
||||
border: 1px solid var(--border-light);
|
||||
border-left: 5px solid var(--crimson);
|
||||
border: 1px solid #e9ecef;
|
||||
border-left: 5px solid #A51C30;
|
||||
}
|
||||
|
||||
div.callout-chapter-connection::before {
|
||||
@@ -679,7 +686,7 @@ div.callout-chapter-connection::before {
|
||||
font-weight: 600;
|
||||
font-size: 0.9rem;
|
||||
margin-bottom: 0.5rem;
|
||||
color: var(--crimson);
|
||||
color: #A51C30;
|
||||
}
|
||||
|
||||
div.callout-chapter-forward::before {
|
||||
@@ -688,7 +695,7 @@ div.callout-chapter-forward::before {
|
||||
font-weight: 600;
|
||||
font-size: 0.9rem;
|
||||
margin-bottom: 0.5rem;
|
||||
color: var(--crimson);
|
||||
color: #A51C30;
|
||||
}
|
||||
|
||||
.callout-chapter-connection .callout-header,
|
||||
@@ -708,7 +715,7 @@ details.callout-chapter-recall {
|
||||
border-radius: 0.5rem;
|
||||
border-left-width: 5px;
|
||||
border-left-style: solid;
|
||||
border: 1px solid var(--border-light);
|
||||
border: 1px solid #e9ecef;
|
||||
border-left: 5px solid #C06014;
|
||||
}
|
||||
|
||||
@@ -742,7 +749,7 @@ details.callout-resource-exercises {
|
||||
border-radius: 0.5rem;
|
||||
border-left-width: 5px;
|
||||
border-left-style: solid;
|
||||
border: 1px solid var(--border-light);
|
||||
border: 1px solid #e9ecef;
|
||||
border-left: 5px solid #20B2AA;
|
||||
}
|
||||
|
||||
@@ -792,7 +799,7 @@ details.callout-code {
|
||||
border-radius: 0.5rem;
|
||||
border-left-width: 5px;
|
||||
border-left-style: solid;
|
||||
border: 1px solid var(--border-light);
|
||||
border: 1px solid #e9ecef;
|
||||
border-left: 5px solid #3C4858;
|
||||
}
|
||||
|
||||
@@ -843,7 +850,7 @@ figcaption {
|
||||
font-style: italic;
|
||||
text-align: left;
|
||||
margin-top: 1rem;
|
||||
color: var(--text-muted);
|
||||
color: #6c757d;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
@@ -857,7 +864,7 @@ a[href^="#fn-"],
|
||||
font-size: 0.75em;
|
||||
vertical-align: super;
|
||||
text-decoration: none;
|
||||
color: var(--crimson);
|
||||
color: #A51C30;
|
||||
font-weight: 600;
|
||||
padding: 0 2px;
|
||||
border-radius: 2px;
|
||||
@@ -876,10 +883,10 @@ div[id^="fn-"],
|
||||
.footnote {
|
||||
margin-top: 2rem;
|
||||
padding-top: 1rem;
|
||||
border-top: 2px solid var(--border-light);
|
||||
border-top: 2px solid #e9ecef;
|
||||
font-size: 0.85rem;
|
||||
line-height: 1.5;
|
||||
color: var(--text-secondary);
|
||||
color: #4a5568;
|
||||
}
|
||||
|
||||
/* Individual footnote entries */
|
||||
@@ -896,7 +903,7 @@ div[id^="fn-"],
|
||||
|
||||
/* Footnote numbers */
|
||||
.footnotes li::marker {
|
||||
color: var(--crimson);
|
||||
color: #A51C30;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
@@ -904,7 +911,7 @@ div[id^="fn-"],
|
||||
a[href^="#fnref-"],
|
||||
.footnote-back {
|
||||
font-size: 0.8em;
|
||||
color: var(--text-muted);
|
||||
color: #6c757d;
|
||||
text-decoration: none;
|
||||
margin-left: 0.5rem;
|
||||
padding: 2px 4px;
|
||||
@@ -914,7 +921,7 @@ a[href^="#fnref-"],
|
||||
|
||||
a[href^="#fnref-"]:hover,
|
||||
.footnote-back:hover {
|
||||
color: var(--crimson);
|
||||
color: #A51C30;
|
||||
background-color: rgba(165, 28, 48, 0.05);
|
||||
text-decoration: none;
|
||||
}
|
||||
@@ -925,7 +932,7 @@ a[href^="#fnref-"]:hover,
|
||||
display: block;
|
||||
width: 60px;
|
||||
height: 1px;
|
||||
background-color: var(--crimson);
|
||||
background-color: #A51C30;
|
||||
margin: 0 0 1rem 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ project:
|
||||
output-dir: _build/epub
|
||||
post-render:
|
||||
- scripts/clean_svgs.py
|
||||
- scripts/fix_epub_references.sh
|
||||
|
||||
preview:
|
||||
browser: false
|
||||
|
||||
@@ -249,29 +249,29 @@ The evolution of specialized hardware architectures illustrates a principle in c
|
||||
|
||||
@tbl-hw-evolution summarizes key milestones in the evolution of hardware specialization, showing how each era produced architectures tailored to the prevailing computational demands. While these accelerators initially emerged to optimize domain-specific workloads, including floating-point operations, graphics rendering, and media processing, they also introduced architectural strategies that persist in contemporary systems. The specialization principles outlined in earlier generations now underpin the design of modern AI accelerators. Understanding this historical trajectory provides context for analyzing how hardware specialization continues to enable scalable, efficient execution of machine learning workloads across diverse deployment environments.
|
||||
|
||||
+-----------+------------------------------------+---------------------------------------------+-----------------------------------------+
|
||||
| **Era** | **Computational Pattern** | **Architecture Examples** | **Characteristics** |
|
||||
+==========:+:===================================+:============================================+:========================================+
|
||||
| **1980s** | Floating-Point & Signal Processing | FPU, DSP | <li>Single-purpose engines</li> |
|
||||
| | | | <li>Focused instruction sets</li> |
|
||||
| | | | <li>Coprocessor interfaces</li> |
|
||||
+-----------+------------------------------------+---------------------------------------------+-----------------------------------------+
|
||||
| **1990s** | 3D Graphics & Multimedia | GPU, SIMD Units | <li>Many identical compute units</li> |
|
||||
| | | | <li>Regular data patterns</li> |
|
||||
| | | | <li>Wide memory interfaces</li> |
|
||||
+-----------+------------------------------------+---------------------------------------------+-----------------------------------------+
|
||||
| **2000s** | Real-time Media Coding | Media Codecs, Network Processors | <li>Fixed-function pipelines</li> |
|
||||
| | | | <li>High throughput processing</li> |
|
||||
| | | | <li>Power-performance optimization</li> |
|
||||
+-----------+------------------------------------+---------------------------------------------+-----------------------------------------+
|
||||
| **2010s** | Deep Learning Tensor Operations | TPU, GPU Tensor Cores | <li>Matrix multiplication units</li> |
|
||||
| | | | <li>Massive parallelism</li> |
|
||||
| | | | <li>Memory bandwidth optimization</li> |
|
||||
+-----------+------------------------------------+---------------------------------------------+-----------------------------------------+
|
||||
| **2020s** | Application-Specific Acceleration | ML Engines, Smart NICs, Domain Accelerators | <li>Workload-specific datapaths</li> |
|
||||
| | | | <li>Customized memory hierarchies</li> |
|
||||
| | | | <li>Application-optimized designs</li> |
|
||||
+-----------+------------------------------------+---------------------------------------------+-----------------------------------------+
|
||||
+-----------+------------------------------------+---------------------------------------------+----------------------------------------------+
|
||||
| **Era** | **Computational Pattern** | **Architecture Examples** | **Characteristics** |
|
||||
+==========:+:===================================+:============================================+:=============================================+
|
||||
| **1980s** | Floating-Point & Signal Processing | FPU, DSP | <ul><li>Single-purpose engines</li> |
|
||||
| | | | <li>Focused instruction sets</li> |
|
||||
| | | | <li>Coprocessor interfaces</li></ul> |
|
||||
+-----------+------------------------------------+---------------------------------------------+----------------------------------------------+
|
||||
| **1990s** | 3D Graphics & Multimedia | GPU, SIMD Units | <ul><li>Many identical compute units</li> |
|
||||
| | | | <li>Regular data patterns</li> |
|
||||
| | | | <li>Wide memory interfaces</li></ul> |
|
||||
+-----------+------------------------------------+---------------------------------------------+----------------------------------------------+
|
||||
| **2000s** | Real-time Media Coding | Media Codecs, Network Processors | <ul><li>Fixed-function pipelines</li> |
|
||||
| | | | <li>High throughput processing</li> |
|
||||
| | | | <li>Power-performance optimization</li></ul> |
|
||||
+-----------+------------------------------------+---------------------------------------------+----------------------------------------------+
|
||||
| **2010s** | Deep Learning Tensor Operations | TPU, GPU Tensor Cores | <ul><li>Matrix multiplication units</li> |
|
||||
| | | | <li>Massive parallelism</li> |
|
||||
| | | | <li>Memory bandwidth optimization</li></ul> |
|
||||
+-----------+------------------------------------+---------------------------------------------+----------------------------------------------+
|
||||
| **2020s** | Application-Specific Acceleration | ML Engines, Smart NICs, Domain Accelerators | <ul><li>Workload-specific datapaths</li> |
|
||||
| | | | <li>Customized memory hierarchies</li> |
|
||||
| | | | <li>Application-optimized designs</li></ul> |
|
||||
+-----------+------------------------------------+---------------------------------------------+----------------------------------------------+
|
||||
|
||||
: **Hardware Specialization Trends**: Successive computing eras progressively integrate specialized hardware to accelerate prevalent workloads, moving from general-purpose CPUs to domain-specific architectures and ultimately to customizable AI accelerators. This evolution reflects a fundamental principle: tailoring hardware to computational patterns improves performance and energy efficiency, driving innovation in machine learning systems. {#tbl-hw-evolution}
|
||||
|
||||
|
||||
@@ -12,13 +12,13 @@ These labs provide a unique opportunity to gain practical experience with machin
|
||||
|
||||
## Setup {#sec-overview-setup-cde0}
|
||||
|
||||
- [Setup Nicla Vision](./setup/setup.qmd)
|
||||
- [Setup Nicla Vision](@sec-setup-overview-dcdd)
|
||||
|
||||
## Exercises {#sec-overview-exercises-f4f3}
|
||||
|
||||
| **Modality** | **Task** | **Description** | **Link** |
|
||||
|:--------------|:--------------|:-----------------|:----------|
|
||||
| Vision | Image Classification | Learn to classify images | [Link](./image_classification/image_classification.qmd) |
|
||||
| Vision | Object Detection | Implement object detection | [Link](./object_detection/object_detection.qmd) |
|
||||
| Sound | Keyword Spotting | Explore voice recognition systems | [Link](./kws/kws.qmd) |
|
||||
| IMU | Motion Classification and Anomaly Detection | Classify motion data and detect anomalies | [Link](./motion_classification/motion_classification.qmd) |
|
||||
| Vision | Image Classification | Learn to classify images | [Link](@sec-image-classification-overview-7420) |
|
||||
| Vision | Object Detection | Implement object detection | [Link](@sec-object-detection-overview-9d59) |
|
||||
| Sound | Keyword Spotting | Explore voice recognition systems | [Link](@sec-keyword-spotting-kws-overview-0ae6) |
|
||||
| IMU | Motion Classification and Anomaly Detection | Classify motion data and detect anomalies | [Link](@sec-motion-classification-anomaly-detection-overview-b1a8) |
|
||||
|
||||
@@ -120,7 +120,7 @@ After completing hardware selection and development environment setup, you're re
|
||||
|
||||
For detailed platform-specific setup instructions, refer to the individual setup guides:
|
||||
|
||||
- [XIAOML Kit Setup](seeed/xiao_esp32s3/setup/setup.qmd)
|
||||
- [Arduino Nicla Vision Setup](arduino/nicla_vision/setup/setup.qmd)
|
||||
- [Grove Vision AI V2 Setup](seeed/grove_vision_ai_v2/grove_vision_ai_v2.qmd)
|
||||
- [Raspberry Pi Setup](raspi/setup/setup.qmd)
|
||||
- [XIAOML Kit Setup](@sec-setup-overview-d638)
|
||||
- [Arduino Nicla Vision Setup](@sec-setup-overview-dcdd)
|
||||
- [Grove Vision AI V2 Setup](@sec-setup-nocode-applications-introduction-b740)
|
||||
- [Raspberry Pi Setup](@sec-setup-overview-0ec9)
|
||||
|
||||
@@ -353,7 +353,7 @@ python3 get_img_data.py
|
||||
Access the web interface:
|
||||
|
||||
- On the Raspberry Pi itself (if you have a GUI): Open a web browser and go to `http://localhost:5000`
|
||||
- From another device on the same network: Open a web browser and go to `http://<raspberry_pi_ip>:5000` (R`eplace `<raspberry_pi_ip>` with your Raspberry Pi's IP address).
|
||||
- From another device on the same network: Open a web browser and go to `http://<raspberry_pi_ip>:5000` (Replace `<raspberry_pi_ip>` with your Raspberry Pi's IP address).
|
||||
For example: `http://192.168.4.210:5000/`
|
||||
|
||||
\noindent
|
||||
|
||||
@@ -16,18 +16,18 @@ These labs offer invaluable hands-on experience with machine learning systems, l
|
||||
|
||||
## Setup {#sec-overview-setup-02c7}
|
||||
|
||||
- [Setup Raspberry Pi](./setup/setup.qmd)
|
||||
- [Setup Raspberry Pi](@sec-setup-overview-0ec9)
|
||||
|
||||
## Exercises {#sec-overview-exercises-6edf}
|
||||
|
||||
+--------------+------------------------+----------------------------+---------------------------------------------------------+
|
||||
| **Modality** | **Task** | **Description** | **Link** |
|
||||
+:=============+:=======================+:===========================+:========================================================+
|
||||
| **Vision** | Image Classification | Learn to classify images | [Link](./image_classification/image_classification.qmd) |
|
||||
+--------------+------------------------+----------------------------+---------------------------------------------------------+
|
||||
| **Vision** | Object Detection | Implement object detection | [Link](./object_detection/object_detection.qmd) |
|
||||
+--------------+------------------------+----------------------------+---------------------------------------------------------+
|
||||
| **GenAI** | Small Language Models | Deploy SLMs at the Edge | [Link](./llm/llm.qmd) |
|
||||
+--------------+------------------------+----------------------------+---------------------------------------------------------+
|
||||
| **GenAI** | Visual-Language Models | Deploy VLMs at the Edge | [Link](./vlm/vlm.qmd) |
|
||||
+--------------+------------------------+----------------------------+---------------------------------------------------------+
|
||||
+--------------+------------------------+----------------------------+----------------------------------------------------------+
|
||||
| **Modality** | **Task** | **Description** | **Link** |
|
||||
+:=============+:=======================+:===========================+=========================================================:+
|
||||
| **Vision** | Image Classification | Learn to classify images | [Link](@sec-image-classification-overview-3e02) |
|
||||
+--------------+------------------------+----------------------------+----------------------------------------------------------+
|
||||
| **Vision** | Object Detection | Implement object detection | [Link](@sec-object-detection-overview-1133) |
|
||||
+--------------+------------------------+----------------------------+----------------------------------------------------------+
|
||||
| **GenAI** | Small Language Models | Deploy SLMs at the Edge | [Link](@sec-small-language-models-slm-overview-ef83) |
|
||||
+--------------+------------------------+----------------------------+----------------------------------------------------------+
|
||||
| **GenAI** | Visual-Language Models | Deploy VLMs at the Edge | [Link](@sec-visionlanguage-models-vlm-introduction-4272) |
|
||||
+--------------+------------------------+----------------------------+----------------------------------------------------------+
|
||||
|
||||
@@ -21,14 +21,14 @@ This positioning makes it an ideal platform for learning advanced TinyML concept
|
||||
|
||||
## Setup and No-Code Applications {#sec-overview-setup-nocode-applications-e70f}
|
||||
|
||||
- [Setup and No-Code Apps](./setup_and_no_code_apps/setup_and_no_code_apps.qmd)
|
||||
- [Setup and No-Code Apps](@sec-setup-nocode-applications-introduction-b740)
|
||||
|
||||
## Exercises {#sec-overview-exercises-e8a6}
|
||||
|
||||
+--------------+----------------------+----------------------------+---------------------------------------------------------+
|
||||
| **Modality** | **Task** | **Description** | **Link** |
|
||||
+:=============+:=====================+:===========================+:========================================================+
|
||||
| **Vision** | Image Classification | Learn to classify images | [Link](./image_classification/image_classification.qmd) |
|
||||
+--------------+----------------------+----------------------------+---------------------------------------------------------+
|
||||
| **Vision** | Object Detection | Implement object detection | [Link](./object_detection/object_detection.qmd) |
|
||||
+--------------+----------------------+----------------------------+---------------------------------------------------------+
|
||||
+--------------+----------------------+----------------------------+-----------------------------------------------------+
|
||||
| **Modality** | **Task** | **Description** | **Link** |
|
||||
+:=============+:=====================+:===========================+:====================================================+
|
||||
| **Vision** | Image Classification | Learn to classify images | [Link](@sec-image-classification-introduction-59d5) |
|
||||
+--------------+----------------------+----------------------------+-----------------------------------------------------+
|
||||
| **Vision** | Object Detection | Implement object detection | TBD |
|
||||
+--------------+----------------------+----------------------------+-----------------------------------------------------+
|
||||
|
||||
@@ -49,7 +49,9 @@ In other words, recognizing voice commands is based on a multi-stage model or Ca
|
||||
|
||||
The video below shows an example where I emulate a Google Assistant on a Raspberry Pi (Stage 2), having an Arduino Nano 33 BLE as the tinyML device (Stage 1).
|
||||
|
||||
<iframe class="react-editor-embed react-editor-embed-override" src="https://www.youtube.com/embed/e_OPgcnsyvM" frameborder="0" style="box-sizing: border-box; align-self: center; flex: 1 1 0%; height: 363.068px; max-height: 100%; max-width: 100%; overflow: hidden; width: 645.455px; z-index: 1;"></iframe>
|
||||
::: {.content-visible when-format="html:js"}
|
||||
<iframe class="react-editor-embed react-editor-embed-override" src="https://www.youtube.com/embed/e_OPgcnsyvM" style="box-sizing: border-box; align-self: center; flex: 1 1 0%; height: 363.068px; max-height: 100%; max-width: 100%; overflow: hidden; width: 645.455px; z-index: 1; border: none;"></iframe>
|
||||
:::
|
||||
|
||||
> If you want to go deeper on the full project, please see my tutorial: [Building an Intelligent Voice Assistant From Scratch](https://www.hackster.io/mjrobot/building-an-intelligent-voice-assistant-from-scratch-2199c3).
|
||||
|
||||
@@ -689,7 +691,9 @@ You can find the complete code on the [project's GitHub.](https://github.com/Mjr
|
||||
|
||||
The idea is that the LED will be ON whenever the keyword YES is detected. In the same way, instead of turning on an LED, this could be a "trigger" for an external device, as we saw in the introduction.
|
||||
|
||||
<iframe class="react-editor-embed react-editor-embed-override" src="https://www.youtube.com/embed/wjhtEzXt60Q" frameborder="0" style="box-sizing: border-box; align-self: center; flex: 1 1 0%; height: 363.068px; max-height: 100%; max-width: 100%; overflow: hidden; width: 645.455px; z-index: 1;"></iframe>
|
||||
::: {.content-visible when-format="html:js"}
|
||||
<iframe class="react-editor-embed react-editor-embed-override" src="https://www.youtube.com/embed/wjhtEzXt60Q" style="box-sizing: border-box; align-self: center; flex: 1 1 0%; height: 363.068px; max-height: 100%; max-width: 100%; overflow: hidden; width: 645.455px; z-index: 1; border: none;"></iframe>
|
||||
:::
|
||||
|
||||
### With OLED Display {#sec-keyword-spotting-kws-oled-display-9676}
|
||||
|
||||
|
||||
@@ -333,7 +333,7 @@ For example, for an FFT length of 32 points, the Spectral Analysis Block's resul
|
||||
|
||||
Those 63 features will serve as the input tensor for a Neural Network Classifier and the Anomaly Detection model (K-Means).
|
||||
|
||||
> You can learn more by digging into the lab [DSP Spectral Features](../../../shared/dsp_spectral_features_block/dsp_spectral_features_block.qmd)
|
||||
> You can learn more by digging into the lab [DSP Spectral Features](@sec-dsp-spectral-features-overview-a7be)
|
||||
|
||||
## Model Design {#sec-motion-classification-anomaly-detection-model-design-d2d4}
|
||||
|
||||
@@ -734,7 +734,7 @@ The integration of motion classification with the XIAOML Kit demonstrates how mo
|
||||
## Resources {#sec-motion-classification-anomaly-detection-resources-cd54}
|
||||
|
||||
- [XIAOML KIT Code](https://github.com/Mjrovai/XIAO-ESP32S3-Sense/tree/main/XIAOML_Kit_code)
|
||||
- [DSP Spectral Features](../../../shared/dsp_spectral_features_block/dsp_spectral_features_block.qmd)
|
||||
- [DSP Spectral Features](@sec-dsp-spectral-features-overview-a7be)
|
||||
- [Edge Impulse Project](https://studio.edgeimpulse.com/public/750061/live)
|
||||
- [Edge Impulse Spectral Features Block Colab Notebook](https://colab.research.google.com/github/Mjrovai/Arduino_Nicla_Vision/blob/main/Motion_Classification/Edge_Impulse_Spectral_Features_Block.ipynb)
|
||||
- [Edge Impulse Documentation](https://docs.edgeimpulse.com/)
|
||||
|
||||
@@ -17,18 +17,18 @@ These labs provide a unique opportunity to gain practical experience with machin
|
||||
|
||||
## Setup {#sec-overview-setup-2491}
|
||||
|
||||
- [Setup the XIAOML Kit](./setup/setup.qmd)
|
||||
- [Setup the XIAOML Kit](@sec-setup-overview-d638)
|
||||
|
||||
## Exercises {#sec-overview-exercises-f0f7}
|
||||
|
||||
+--------------+---------------------------------------------+-------------------------------------------+-----------------------------------------------------------+
|
||||
| **Modality** | **Task** | **Description** | **Link** |
|
||||
+:=============+:============================================+:==========================================+:==========================================================+
|
||||
| **Vision** | Image Classification | Learn to classify images | [Link](./image_classification/image_classification.qmd) |
|
||||
+--------------+---------------------------------------------+-------------------------------------------+-----------------------------------------------------------+
|
||||
| **Vision** | Object Detection | Implement object detection | [Link](./object_detection/object_detection.qmd) |
|
||||
+--------------+---------------------------------------------+-------------------------------------------+-----------------------------------------------------------+
|
||||
| **Sound** | Keyword Spotting | Explore voice recognition systems | [Link](./kws/kws.qmd) |
|
||||
+--------------+---------------------------------------------+-------------------------------------------+-----------------------------------------------------------+
|
||||
| **IMU** | Motion Classification and Anomaly Detection | Classify motion data and detect anomalies | [Link](./motion_classification/motion_classification.qmd) |
|
||||
+--------------+---------------------------------------------+-------------------------------------------+-----------------------------------------------------------+
|
||||
+--------------+---------------------------------------------+-------------------------------------------+--------------------------------------------------------------------+
|
||||
| **Modality** | **Task** | **Description** | **Link** |
|
||||
+:=============+:============================================+:==========================================+===================================================================:+
|
||||
| **Vision** | Image Classification | Learn to classify images | [Link](@sec-image-classification-overview-9a37) |
|
||||
+--------------+---------------------------------------------+-------------------------------------------+--------------------------------------------------------------------+
|
||||
| **Vision** | Object Detection | Implement object detection | [Link](@sec-object-detection-overview-d035) |
|
||||
+--------------+---------------------------------------------+-------------------------------------------+--------------------------------------------------------------------+
|
||||
| **Sound** | Keyword Spotting | Explore voice recognition systems | [Link](@sec-keyword-spotting-kws-overview-4373) |
|
||||
+--------------+---------------------------------------------+-------------------------------------------+--------------------------------------------------------------------+
|
||||
| **IMU** | Motion Classification and Anomaly Detection | Classify motion data and detect anomalies | [Link](@sec-motion-classification-anomaly-detection-overview-cb1f) |
|
||||
+--------------+---------------------------------------------+-------------------------------------------+--------------------------------------------------------------------+
|
||||
|
||||
@@ -159,12 +159,11 @@ A short podcast, created with Google's Notebook LM and inspired by insights from
|
||||
|
||||
Thank you to all our readers and visitors. Your engagement with the material keeps us motivated.
|
||||
|
||||
::: {.content-visible when-format="html"}
|
||||
::: {.content-visible when-format="html:js"}
|
||||
```{=html}
|
||||
<div style="position: relative; padding-top: 56.25%; margin: 20px 0;">
|
||||
<iframe
|
||||
src="https://lookerstudio.google.com/embed/reporting/e7192975-a8a0-453d-b6fe-1580ac054dbf/page/0pNbE"
|
||||
frameborder="0"
|
||||
style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; border:0; border-radius: 8px;"
|
||||
allowfullscreen="allowfullscreen"
|
||||
sandbox="allow-storage-access-by-user-activation allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox">
|
||||
@@ -179,6 +178,12 @@ This textbook has reached readers across the globe, with visitors from over 100
|
||||
*Interactive analytics dashboard available in the online version at [mlsysbook.ai](https://mlsysbook.ai)*
|
||||
:::
|
||||
|
||||
::: {.content-visible when-format="epub"}
|
||||
This textbook has reached readers across the globe, with visitors from over 100 countries engaging with the material. The international community includes students, educators, researchers, and practitioners who are advancing the field of machine learning systems. From universities in North America and Europe to research institutions in Asia and emerging tech hubs worldwide, the content serves diverse learning needs and cultural contexts.
|
||||
|
||||
*Interactive analytics dashboard available in the online version at [mlsysbook.ai](https://mlsysbook.ai)*
|
||||
:::
|
||||
|
||||
## Want to Help Out? {.unnumbered}
|
||||
|
||||
This is a collaborative project, and your input matters! If you'd like to contribute, check out our [contribution guidelines](https://github.com/harvard-edge/cs249r_book/blob/dev/docs/contribute.md). Feedback, corrections, and new ideas are welcome. Simply file a GitHub [issue](https://github.com/harvard-edge/cs249r_book/issues).
|
||||
|
||||
@@ -84,7 +84,32 @@ CHAPTER_MAPPING = {
|
||||
# Subsections - Model Optimizations chapter
|
||||
"sec-model-optimizations-neural-architecture-search-3915": "contents/core/optimizations/optimizations.html#sec-model-optimizations-neural-architecture-search-3915",
|
||||
"sec-model-optimizations-numerical-precision-a93d": "contents/core/optimizations/optimizations.html#sec-model-optimizations-numerical-precision-a93d",
|
||||
"sec-model-optimizations-pruning-3f36": "contents/core/optimizations/optimizations.html#sec-model-optimizations-pruning-3f36"
|
||||
"sec-model-optimizations-pruning-3f36": "contents/core/optimizations/optimizations.html#sec-model-optimizations-pruning-3f36",
|
||||
|
||||
# Lab sections - Arduino Nicla Vision
|
||||
"sec-setup-overview-dcdd": "contents/labs/arduino/nicla_vision/setup/setup.html#sec-setup-overview-dcdd",
|
||||
"sec-image-classification-overview-7420": "contents/labs/arduino/nicla_vision/image_classification/image_classification.html#sec-image-classification-overview-7420",
|
||||
"sec-object-detection-overview-9d59": "contents/labs/arduino/nicla_vision/object_detection/object_detection.html#sec-object-detection-overview-9d59",
|
||||
"sec-keyword-spotting-kws-overview-0ae6": "contents/labs/arduino/nicla_vision/kws/kws.html#sec-keyword-spotting-kws-overview-0ae6",
|
||||
"sec-motion-classification-anomaly-detection-overview-b1a8": "contents/labs/arduino/nicla_vision/motion_classification/motion_classification.html#sec-motion-classification-anomaly-detection-overview-b1a8",
|
||||
|
||||
# Lab sections - Seeed XIAO ESP32S3
|
||||
"sec-setup-overview-d638": "contents/labs/seeed/xiao_esp32s3/setup/setup.html#sec-setup-overview-d638",
|
||||
"sec-image-classification-overview-9a37": "contents/labs/seeed/xiao_esp32s3/image_classification/image_classification.html#sec-image-classification-overview-9a37",
|
||||
"sec-object-detection-overview-d035": "contents/labs/seeed/xiao_esp32s3/object_detection/object_detection.html#sec-object-detection-overview-d035",
|
||||
"sec-keyword-spotting-kws-overview-4373": "contents/labs/seeed/xiao_esp32s3/kws/kws.html#sec-keyword-spotting-kws-overview-4373",
|
||||
"sec-motion-classification-anomaly-detection-overview-cb1f": "contents/labs/seeed/xiao_esp32s3/motion_classification/motion_classification.html#sec-motion-classification-anomaly-detection-overview-cb1f",
|
||||
|
||||
# Lab sections - Grove Vision AI V2
|
||||
"sec-setup-nocode-applications-introduction-b740": "contents/labs/seeed/grove_vision_ai_v2/setup_and_no_code_apps/setup_and_no_code_apps.html#sec-setup-nocode-applications-introduction-b740",
|
||||
"sec-image-classification-introduction-59d5": "contents/labs/seeed/grove_vision_ai_v2/image_classification/image_classification.html#sec-image-classification-introduction-59d5",
|
||||
|
||||
# Lab sections - Raspberry Pi
|
||||
"sec-setup-overview-0ec9": "contents/labs/raspi/setup/setup.html#sec-setup-overview-0ec9",
|
||||
"sec-image-classification-overview-3e02": "contents/labs/raspi/image_classification/image_classification.html#sec-image-classification-overview-3e02",
|
||||
"sec-object-detection-overview-1133": "contents/labs/raspi/object_detection/object_detection.html#sec-object-detection-overview-1133",
|
||||
"sec-small-language-models-slm-overview-ef83": "contents/labs/raspi/llm/llm.html#sec-small-language-models-slm-overview-ef83",
|
||||
"sec-visionlanguage-models-vlm-introduction-4272": "contents/labs/raspi/vlm/vlm.html#sec-visionlanguage-models-vlm-introduction-4272"
|
||||
}
|
||||
|
||||
# Chapter titles for readable link text
|
||||
@@ -124,21 +149,105 @@ CHAPTER_TITLES = {
|
||||
# Subsections - Model Optimizations chapter
|
||||
"sec-model-optimizations-neural-architecture-search-3915": "Neural Architecture Search",
|
||||
"sec-model-optimizations-numerical-precision-a93d": "Numerical Precision",
|
||||
"sec-model-optimizations-pruning-3f36": "Pruning"
|
||||
"sec-model-optimizations-pruning-3f36": "Pruning",
|
||||
|
||||
# Lab sections - Arduino Nicla Vision
|
||||
"sec-setup-overview-dcdd": "Setup Nicla Vision",
|
||||
"sec-image-classification-overview-7420": "Image Classification",
|
||||
"sec-object-detection-overview-9d59": "Object Detection",
|
||||
"sec-keyword-spotting-kws-overview-0ae6": "Keyword Spotting",
|
||||
"sec-motion-classification-anomaly-detection-overview-b1a8": "Motion Classification and Anomaly Detection",
|
||||
|
||||
# Lab sections - Seeed XIAO ESP32S3
|
||||
"sec-setup-overview-d638": "Setup the XIAOML Kit",
|
||||
"sec-image-classification-overview-9a37": "Image Classification",
|
||||
"sec-object-detection-overview-d035": "Object Detection",
|
||||
"sec-keyword-spotting-kws-overview-4373": "Keyword Spotting",
|
||||
"sec-motion-classification-anomaly-detection-overview-cb1f": "Motion Classification and Anomaly Detection",
|
||||
|
||||
# Lab sections - Grove Vision AI V2
|
||||
"sec-setup-nocode-applications-introduction-b740": "Setup and No-Code Apps",
|
||||
"sec-image-classification-introduction-59d5": "Image Classification",
|
||||
|
||||
# Lab sections - Raspberry Pi
|
||||
"sec-setup-overview-0ec9": "Setup Raspberry Pi",
|
||||
"sec-image-classification-overview-3e02": "Image Classification",
|
||||
"sec-object-detection-overview-1133": "Object Detection",
|
||||
"sec-small-language-models-slm-overview-ef83": "Small Language Models",
|
||||
"sec-visionlanguage-models-vlm-introduction-4272": "Visual-Language Models"
|
||||
}
|
||||
|
||||
def calculate_relative_path(from_file, to_path, build_dir):
|
||||
def build_epub_section_mapping(epub_dir):
|
||||
"""
|
||||
Calculate relative path from one HTML file to another.
|
||||
|
||||
Build mapping from section IDs to EPUB chapter files by scanning actual chapters.
|
||||
|
||||
Args:
|
||||
from_file: Path object of the source HTML file
|
||||
epub_dir: Path to EPUB build directory (_build/epub or extracted EPUB root)
|
||||
|
||||
Returns:
|
||||
Dictionary mapping section IDs to chapter filenames (e.g., {"sec-xxx": "ch004.xhtml"})
|
||||
"""
|
||||
mapping = {}
|
||||
|
||||
# Try different possible text directory locations
|
||||
possible_text_dirs = [
|
||||
epub_dir / "text", # For _build/epub structure
|
||||
epub_dir / "EPUB" / "text", # For extracted EPUB structure
|
||||
]
|
||||
|
||||
text_dir = None
|
||||
for dir_path in possible_text_dirs:
|
||||
if dir_path.exists():
|
||||
text_dir = dir_path
|
||||
break
|
||||
|
||||
if not text_dir:
|
||||
return mapping
|
||||
|
||||
# Scan all chapter files
|
||||
for xhtml_file in sorted(text_dir.glob("ch*.xhtml")):
|
||||
try:
|
||||
content = xhtml_file.read_text(encoding='utf-8')
|
||||
# Find all section IDs in this file using regex
|
||||
section_ids = re.findall(r'id="(sec-[^"]+)"', content)
|
||||
for sec_id in section_ids:
|
||||
# Map section ID to chapter filename only (no path, since we're in same dir)
|
||||
mapping[sec_id] = xhtml_file.name
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
return mapping
|
||||
|
||||
def calculate_relative_path(from_file, to_path, build_dir, epub_mapping=None):
|
||||
"""
|
||||
Calculate relative path from one file to another.
|
||||
|
||||
Args:
|
||||
from_file: Path object of the source file
|
||||
to_path: String path from build root (e.g., "contents/core/chapter/file.html#anchor")
|
||||
build_dir: Path object of the build directory root
|
||||
|
||||
epub_mapping: Optional dict mapping section IDs to EPUB chapter files
|
||||
|
||||
Returns:
|
||||
Relative path string from from_file to to_path
|
||||
"""
|
||||
# For EPUB builds, use chapter-to-chapter mapping
|
||||
if epub_mapping is not None:
|
||||
# Extract section ID from to_path
|
||||
if '#' in to_path:
|
||||
_, anchor_with_hash = to_path.split('#', 1)
|
||||
sec_id = anchor_with_hash # This is already just the section ID
|
||||
|
||||
# Look up which chapter file contains this section
|
||||
target_chapter = epub_mapping.get(sec_id)
|
||||
if target_chapter:
|
||||
# All chapters are in same directory (text/), so just use filename
|
||||
return f"{target_chapter}#{sec_id}"
|
||||
|
||||
# Fallback: if no mapping found, return original
|
||||
return to_path
|
||||
|
||||
# Original HTML logic for non-EPUB builds
|
||||
# Split anchor from path
|
||||
if '#' in to_path:
|
||||
target_path_str, anchor = to_path.split('#', 1)
|
||||
@@ -146,11 +255,11 @@ def calculate_relative_path(from_file, to_path, build_dir):
|
||||
else:
|
||||
target_path_str = to_path
|
||||
anchor = ''
|
||||
|
||||
|
||||
# Convert to absolute paths
|
||||
target_abs = build_dir / target_path_str
|
||||
source_abs = from_file
|
||||
|
||||
|
||||
# Calculate relative path
|
||||
try:
|
||||
rel_path = Path(target_abs).relative_to(source_abs.parent)
|
||||
@@ -161,7 +270,7 @@ def calculate_relative_path(from_file, to_path, build_dir):
|
||||
# Count how many levels up we need to go
|
||||
source_parts = source_abs.parent.parts
|
||||
target_parts = target_abs.parts
|
||||
|
||||
|
||||
# Find common prefix
|
||||
common_length = 0
|
||||
for s, t in zip(source_parts, target_parts):
|
||||
@@ -169,27 +278,27 @@ def calculate_relative_path(from_file, to_path, build_dir):
|
||||
common_length += 1
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
# Calculate relative path
|
||||
up_levels = len(source_parts) - common_length
|
||||
down_parts = target_parts[common_length:]
|
||||
|
||||
|
||||
rel_parts = ['..'] * up_levels + list(down_parts)
|
||||
result = '/'.join(rel_parts)
|
||||
|
||||
|
||||
return result + anchor
|
||||
|
||||
def fix_cross_reference_link(match, from_file, build_dir):
|
||||
def fix_cross_reference_link(match, from_file, build_dir, epub_mapping=None):
|
||||
"""Replace a single cross-reference link with proper HTML link."""
|
||||
full_match = match.group(0)
|
||||
sec_ref = match.group(1)
|
||||
|
||||
|
||||
abs_path = CHAPTER_MAPPING.get(sec_ref)
|
||||
title = CHAPTER_TITLES.get(sec_ref)
|
||||
|
||||
|
||||
if abs_path and title:
|
||||
# Calculate relative path from current file to target
|
||||
rel_path = calculate_relative_path(from_file, abs_path, build_dir)
|
||||
rel_path = calculate_relative_path(from_file, abs_path, build_dir, epub_mapping)
|
||||
# Create clean HTML link
|
||||
return f'<a href="{rel_path}">{title}</a>'
|
||||
else:
|
||||
@@ -197,31 +306,38 @@ def fix_cross_reference_link(match, from_file, build_dir):
|
||||
print(f"⚠️ No mapping found for: {sec_ref}")
|
||||
return full_match
|
||||
|
||||
def fix_cross_references(html_content, from_file, build_dir, verbose=False):
|
||||
def fix_cross_references(html_content, from_file, build_dir, epub_mapping=None, verbose=False):
|
||||
"""
|
||||
Fix all cross-reference links in HTML content.
|
||||
|
||||
Fix all cross-reference links in HTML/XHTML content.
|
||||
|
||||
Quarto generates two types of unresolved references when chapters aren't built:
|
||||
1. Full unresolved links: <a href="#sec-xxx" class="quarto-xref"><span class="quarto-unresolved-ref">...</span></a>
|
||||
2. Simple unresolved refs: <strong>?@sec-xxx</strong> (more common in selective builds)
|
||||
3. EPUB unresolved refs: <a href="@sec-xxx">Link Text</a> (EPUB-specific)
|
||||
"""
|
||||
# Pattern 1: Match Quarto's full unresolved cross-reference links
|
||||
# Example: <a href="#sec-xxx" class="quarto-xref"><span class="quarto-unresolved-ref">sec-xxx</span></a>
|
||||
pattern1 = r'<a href="#(sec-[a-zA-Z0-9-]+)" class="quarto-xref"><span class="quarto-unresolved-ref">[^<]*</span></a>'
|
||||
|
||||
|
||||
# Pattern 2: Match simple unresolved references (what we see in selective builds)
|
||||
# Example: <strong>?@sec-ml-systems</strong>
|
||||
# This is what Quarto outputs when it can't resolve a reference to an unbuilt chapter
|
||||
pattern2 = r'<strong>\?\@(sec-[a-zA-Z0-9-]+)</strong>'
|
||||
|
||||
|
||||
# Pattern 3: Match EPUB-specific unresolved references
|
||||
# Example: <a href="@sec-xxx">Link Text</a>
|
||||
# This is what Quarto outputs in EPUB when it can't resolve a reference
|
||||
pattern3 = r'<a href="@(sec-[a-zA-Z0-9-]+)"([^>]*)>([^<]*)</a>'
|
||||
|
||||
# Count matches before replacement
|
||||
matches1 = re.findall(pattern1, html_content)
|
||||
matches2 = re.findall(pattern2, html_content)
|
||||
total_matches = len(matches1) + len(matches2)
|
||||
|
||||
matches3 = re.findall(pattern3, html_content)
|
||||
total_matches = len(matches1) + len(matches2) + len(matches3)
|
||||
|
||||
# Fix Pattern 1 matches
|
||||
fixed_content = re.sub(pattern1, lambda m: fix_cross_reference_link(m, from_file, build_dir), html_content)
|
||||
|
||||
fixed_content = re.sub(pattern1, lambda m: fix_cross_reference_link(m, from_file, build_dir, epub_mapping), html_content)
|
||||
|
||||
# Fix Pattern 2 matches with proper relative path calculation
|
||||
unmapped_refs = []
|
||||
def fix_simple_reference(match):
|
||||
@@ -229,33 +345,61 @@ def fix_cross_references(html_content, from_file, build_dir, verbose=False):
|
||||
abs_path = CHAPTER_MAPPING.get(sec_ref)
|
||||
title = CHAPTER_TITLES.get(sec_ref)
|
||||
if abs_path and title:
|
||||
rel_path = calculate_relative_path(from_file, abs_path, build_dir)
|
||||
rel_path = calculate_relative_path(from_file, abs_path, build_dir, epub_mapping)
|
||||
return f'<strong><a href="{rel_path}">{title}</a></strong>'
|
||||
else:
|
||||
unmapped_refs.append(sec_ref)
|
||||
return match.group(0)
|
||||
|
||||
|
||||
fixed_content = re.sub(pattern2, fix_simple_reference, fixed_content)
|
||||
|
||||
|
||||
# Fix Pattern 3 matches (EPUB-specific)
|
||||
def fix_epub_reference(match):
|
||||
sec_ref = match.group(1)
|
||||
attrs = match.group(2) # Additional attributes
|
||||
link_text = match.group(3) # Original link text
|
||||
|
||||
# For EPUB with mapping, use direct chapter lookup
|
||||
if epub_mapping:
|
||||
target_chapter = epub_mapping.get(sec_ref)
|
||||
if target_chapter:
|
||||
return f'<a href="{target_chapter}#{sec_ref}"{attrs}>{link_text}</a>'
|
||||
else:
|
||||
unmapped_refs.append(sec_ref)
|
||||
return match.group(0)
|
||||
else:
|
||||
# Fallback to HTML path resolution
|
||||
abs_path = CHAPTER_MAPPING.get(sec_ref)
|
||||
title = CHAPTER_TITLES.get(sec_ref)
|
||||
if abs_path:
|
||||
rel_path = calculate_relative_path(from_file, abs_path, build_dir, None)
|
||||
return f'<a href="{rel_path}"{attrs}>{link_text}</a>'
|
||||
else:
|
||||
unmapped_refs.append(sec_ref)
|
||||
return match.group(0)
|
||||
|
||||
fixed_content = re.sub(pattern3, fix_epub_reference, fixed_content)
|
||||
|
||||
# Count successful replacements
|
||||
remaining1 = re.findall(pattern1, fixed_content)
|
||||
remaining2 = re.findall(pattern2, fixed_content)
|
||||
fixed_count = total_matches - len(remaining1) - len(remaining2)
|
||||
|
||||
remaining3 = re.findall(pattern3, fixed_content)
|
||||
fixed_count = total_matches - len(remaining1) - len(remaining2) - len(remaining3)
|
||||
|
||||
# Return info about what was fixed
|
||||
return fixed_content, fixed_count, unmapped_refs
|
||||
|
||||
def process_html_file(html_file, base_dir):
|
||||
"""Process a single HTML file to fix cross-references."""
|
||||
# Read HTML content
|
||||
def process_html_file(html_file, base_dir, epub_mapping=None):
|
||||
"""Process a single HTML/XHTML file to fix cross-references."""
|
||||
# Read file content
|
||||
try:
|
||||
html_content = html_file.read_text(encoding='utf-8')
|
||||
except Exception as e:
|
||||
return None, 0, []
|
||||
|
||||
|
||||
# Fix cross-reference links with proper relative path calculation
|
||||
fixed_content, fixed_count, unmapped = fix_cross_references(html_content, html_file, base_dir)
|
||||
|
||||
fixed_content, fixed_count, unmapped = fix_cross_references(html_content, html_file, base_dir, epub_mapping)
|
||||
|
||||
# Write back fixed content if changes were made
|
||||
if fixed_count > 0:
|
||||
try:
|
||||
@@ -267,59 +411,94 @@ def process_html_file(html_file, base_dir):
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main entry point. Runs in two modes:
|
||||
1. Post-render hook (no args): Processes ALL HTML files in _build/html/
|
||||
2. Manual mode (with file arg): Processes a specific HTML file
|
||||
|
||||
Main entry point. Runs in three modes:
|
||||
1. Post-render hook (no args): Processes HTML or EPUB builds from _build/
|
||||
2. Directory mode (dir arg): Processes extracted EPUB directory
|
||||
3. Manual mode (file arg): Processes a specific file
|
||||
|
||||
This allows both automatic fixing during builds and manual testing/debugging.
|
||||
"""
|
||||
if len(sys.argv) == 1:
|
||||
# MODE 1: Running as Quarto post-render hook
|
||||
# This happens automatically after `quarto render`
|
||||
# We process ALL HTML files because unresolved refs can appear anywhere
|
||||
build_dir = Path("_build/html")
|
||||
if not build_dir.exists():
|
||||
print("⚠️ Build directory not found - skipping")
|
||||
# Detect if this is HTML or EPUB build
|
||||
html_dir = Path("_build/html")
|
||||
epub_dir = Path("_build/epub")
|
||||
|
||||
# Determine build type
|
||||
epub_mapping = None
|
||||
if html_dir.exists() and (html_dir / "index.html").exists():
|
||||
build_dir = html_dir
|
||||
file_pattern = "*.html"
|
||||
file_type = "HTML"
|
||||
elif epub_dir.exists() and list(epub_dir.glob("*.xhtml")):
|
||||
build_dir = epub_dir
|
||||
file_pattern = "*.xhtml"
|
||||
file_type = "XHTML (EPUB)"
|
||||
# Build EPUB section mapping for dynamic chapter references
|
||||
print("📚 Building EPUB section mapping...")
|
||||
epub_mapping = build_epub_section_mapping(epub_dir)
|
||||
print(f" Found {len(epub_mapping)} section IDs across chapters")
|
||||
# Check for extracted EPUB structure (EPUB/ directory at current level)
|
||||
elif Path("EPUB").exists() and list(Path("EPUB").rglob("*.xhtml")):
|
||||
build_dir = Path(".")
|
||||
file_pattern = "*.xhtml"
|
||||
file_type = "XHTML (EPUB - extracted)"
|
||||
# Build EPUB section mapping
|
||||
print("📚 Building EPUB section mapping...")
|
||||
epub_mapping = build_epub_section_mapping(Path("."))
|
||||
print(f" Found {len(epub_mapping)} section IDs across chapters")
|
||||
else:
|
||||
print("⚠️ No HTML or EPUB build directory found - skipping")
|
||||
sys.exit(0)
|
||||
|
||||
# Find all HTML files recursively
|
||||
html_files = list(build_dir.rglob("*.html"))
|
||||
print(f"🔗 [Cross-Reference Fix] Scanning {len(html_files)} HTML files...")
|
||||
|
||||
|
||||
# Find all files
|
||||
files = list(build_dir.rglob(file_pattern))
|
||||
print(f"🔗 [Cross-Reference Fix] Scanning {len(files)} {file_type} files...")
|
||||
|
||||
files_fixed = []
|
||||
total_refs_fixed = 0
|
||||
all_unmapped = set()
|
||||
|
||||
for html_file in html_files:
|
||||
|
||||
for file in files:
|
||||
# Skip certain files that don't need processing
|
||||
if any(skip in str(html_file) for skip in ['search.html', '404.html', 'site_libs']):
|
||||
skip_patterns = ['search.html', '404.html', 'site_libs', 'nav.xhtml', 'cover.xhtml', 'title_page.xhtml']
|
||||
if any(skip in str(file) for skip in skip_patterns):
|
||||
continue
|
||||
|
||||
rel_path, fixed_count, unmapped = process_html_file(html_file, build_dir)
|
||||
|
||||
rel_path, fixed_count, unmapped = process_html_file(file, build_dir, epub_mapping)
|
||||
if fixed_count > 0:
|
||||
files_fixed.append((rel_path, fixed_count))
|
||||
total_refs_fixed += fixed_count
|
||||
all_unmapped.update(unmapped)
|
||||
|
||||
|
||||
if files_fixed:
|
||||
print(f"✅ Fixed {total_refs_fixed} cross-references in {len(files_fixed)} files:")
|
||||
for path, count in files_fixed:
|
||||
print(f" 📄 {path}: {count} refs")
|
||||
else:
|
||||
print(f"✅ No unresolved cross-references found")
|
||||
|
||||
|
||||
if all_unmapped:
|
||||
print(f"⚠️ Unmapped references: {', '.join(sorted(all_unmapped))}")
|
||||
|
||||
|
||||
elif len(sys.argv) == 2:
|
||||
# Running with explicit file argument
|
||||
# MODE 2: Running with explicit file argument
|
||||
html_file = Path(sys.argv[1])
|
||||
if not html_file.exists():
|
||||
print(f"❌ File not found: {html_file}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# Detect if this is an EPUB file (in text/ directory)
|
||||
epub_mapping = None
|
||||
if 'text' in html_file.parts and html_file.suffix == '.xhtml':
|
||||
# This is an EPUB chapter file, build mapping
|
||||
epub_base = html_file.parent.parent # Go up from text/ to EPUB/
|
||||
print("📚 Building EPUB section mapping...")
|
||||
epub_mapping = build_epub_section_mapping(epub_base)
|
||||
print(f" Found {len(epub_mapping)} section IDs across chapters")
|
||||
|
||||
print(f"🔗 Fixing cross-reference links in: {html_file}")
|
||||
rel_path, fixed_count, unmapped = process_html_file(html_file, html_file.parent)
|
||||
rel_path, fixed_count, unmapped = process_html_file(html_file, html_file.parent, epub_mapping)
|
||||
if fixed_count > 0:
|
||||
print(f"✅ Fixed {fixed_count} cross-references")
|
||||
if unmapped:
|
||||
@@ -327,7 +506,7 @@ def main():
|
||||
else:
|
||||
print(f"✅ No cross-reference fixes needed")
|
||||
else:
|
||||
print("Usage: python3 fix-glossary-html.py [<html-file>]")
|
||||
print("Usage: python3 fix_cross_references.py [<html-or-xhtml-file>]")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
48
quarto/scripts/fix_epub_references.sh
Executable file
48
quarto/scripts/fix_epub_references.sh
Executable file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env bash
|
||||
# Post-process EPUB to fix cross-references
|
||||
# This script extracts the EPUB, fixes references, and re-packages it
|
||||
|
||||
set -e
|
||||
|
||||
EPUB_FILE="$1"
|
||||
|
||||
if [ -z "$EPUB_FILE" ]; then
|
||||
# Running as post-render hook - find the EPUB
|
||||
EPUB_FILE="_build/epub/Machine-Learning-Systems.epub"
|
||||
fi
|
||||
|
||||
if [ ! -f "$EPUB_FILE" ]; then
|
||||
echo "⚠️ EPUB file not found: $EPUB_FILE"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "📚 Post-processing EPUB: $EPUB_FILE"
|
||||
|
||||
# Get absolute path to EPUB file
|
||||
EPUB_ABS=$(cd "$(dirname "$EPUB_FILE")" && pwd)/$(basename "$EPUB_FILE")
|
||||
|
||||
# Create temporary directory
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
trap "rm -rf $TEMP_DIR" EXIT
|
||||
|
||||
# Extract EPUB
|
||||
echo " Extracting EPUB..."
|
||||
unzip -q "$EPUB_ABS" -d "$TEMP_DIR"
|
||||
|
||||
# Fix cross-references using Python script
|
||||
echo " Fixing cross-references..."
|
||||
cd "$TEMP_DIR"
|
||||
python3 "$(dirname "$0")/fix_cross_references.py" >/dev/null 2>&1 || true
|
||||
|
||||
# Re-package EPUB
|
||||
echo " Re-packaging EPUB..."
|
||||
cd "$TEMP_DIR"
|
||||
# EPUB requires mimetype to be first and uncompressed
|
||||
zip -0 -X fixed.epub mimetype
|
||||
# Add all other files recursively
|
||||
zip -r -X fixed.epub META-INF EPUB
|
||||
|
||||
# Replace original with fixed version
|
||||
mv "$TEMP_DIR/fixed.epub" "$EPUB_ABS"
|
||||
|
||||
echo "✅ EPUB post-processing complete"
|
||||
409
tools/scripts/utilities/validate_epub.py
Executable file
409
tools/scripts/utilities/validate_epub.py
Executable file
@@ -0,0 +1,409 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
EPUB Validator Script
|
||||
|
||||
Validates EPUB files for common issues including:
|
||||
- XML parsing errors (double-hyphen in comments)
|
||||
- CSS variable issues (--variable syntax)
|
||||
- Malformed HTML/XHTML
|
||||
- Missing required files
|
||||
- Structural validation
|
||||
|
||||
Uses epubcheck (official EPUB validator) if available, with custom checks for project-specific issues.
|
||||
|
||||
Installation:
|
||||
# Install epubcheck (recommended)
|
||||
brew install epubcheck # macOS
|
||||
# OR download from: https://github.com/w3c/epubcheck/releases
|
||||
|
||||
Usage:
|
||||
python3 validate_epub.py <path_to_epub_file>
|
||||
python3 validate_epub.py quarto/_build/epub/Machine-Learning-Systems.epub
|
||||
python3 validate_epub.py --quick <path_to_epub_file> # Skip epubcheck
|
||||
"""
|
||||
|
||||
import sys
|
||||
import zipfile
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Dict
|
||||
import tempfile
|
||||
import shutil
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
|
||||
class EPUBValidator:
|
||||
"""Validates EPUB files for common issues."""
|
||||
|
||||
def __init__(self, epub_path: str, use_epubcheck: bool = True):
|
||||
self.epub_path = Path(epub_path)
|
||||
self.errors: List[Tuple[str, str, str]] = [] # (severity, category, message)
|
||||
self.warnings: List[Tuple[str, str, str]] = []
|
||||
self.temp_dir = None
|
||||
self.use_epubcheck = use_epubcheck
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""Run all validation checks. Returns True if no errors found."""
|
||||
print(f"🔍 Validating EPUB: {self.epub_path.name}\n")
|
||||
|
||||
if not self.epub_path.exists():
|
||||
self._add_error("CRITICAL", "File", f"EPUB file not found: {self.epub_path}")
|
||||
return False
|
||||
|
||||
# Run epubcheck first if available
|
||||
if self.use_epubcheck:
|
||||
self._run_epubcheck()
|
||||
|
||||
# Extract EPUB to temp directory
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
try:
|
||||
with zipfile.ZipFile(self.epub_path, 'r') as zip_ref:
|
||||
zip_ref.extractall(self.temp_dir)
|
||||
except zipfile.BadZipFile:
|
||||
self._add_error("CRITICAL", "Structure", "Invalid ZIP/EPUB file")
|
||||
return False
|
||||
|
||||
# Run custom validation checks (project-specific)
|
||||
print("\n📋 Running custom validation checks...")
|
||||
self._check_mimetype()
|
||||
self._check_container_xml()
|
||||
self._check_css_variables()
|
||||
self._check_xml_comments()
|
||||
self._check_common_xhtml_errors()
|
||||
self._check_xhtml_validity()
|
||||
self._check_opf_structure()
|
||||
|
||||
# Print results
|
||||
self._print_results()
|
||||
|
||||
# Cleanup
|
||||
if self.temp_dir:
|
||||
shutil.rmtree(self.temp_dir)
|
||||
|
||||
return len(self.errors) == 0
|
||||
|
||||
def _add_error(self, severity: str, category: str, message: str):
|
||||
"""Add an error to the list."""
|
||||
self.errors.append((severity, category, message))
|
||||
|
||||
def _add_warning(self, severity: str, category: str, message: str):
|
||||
"""Add a warning to the list."""
|
||||
self.warnings.append((severity, category, message))
|
||||
|
||||
def _run_epubcheck(self):
|
||||
"""Run epubcheck validator if available."""
|
||||
print("🔧 Running epubcheck (official EPUB validator)...\n")
|
||||
|
||||
try:
|
||||
# Try to run epubcheck
|
||||
result = subprocess.run(
|
||||
['epubcheck', '--json', '-', str(self.epub_path)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
print("✅ epubcheck: PASS\n")
|
||||
return
|
||||
|
||||
# Parse JSON output
|
||||
try:
|
||||
output = json.loads(result.stdout) if result.stdout else {}
|
||||
messages = output.get('messages', [])
|
||||
|
||||
error_count = 0
|
||||
warning_count = 0
|
||||
|
||||
for msg in messages:
|
||||
severity = msg.get('severity', 'INFO')
|
||||
message_text = msg.get('message', 'Unknown error')
|
||||
locations = msg.get('locations', [])
|
||||
|
||||
location_str = ""
|
||||
if locations:
|
||||
loc = locations[0]
|
||||
path = loc.get('path', '')
|
||||
line = loc.get('line', '')
|
||||
col = loc.get('column', '')
|
||||
location_str = f"{path}:{line}:{col}" if line else path
|
||||
|
||||
full_message = f"{location_str}: {message_text}" if location_str else message_text
|
||||
|
||||
if severity == 'ERROR' or severity == 'FATAL':
|
||||
self._add_error("ERROR", "epubcheck", full_message)
|
||||
error_count += 1
|
||||
elif severity == 'WARNING':
|
||||
self._add_warning("WARNING", "epubcheck", full_message)
|
||||
warning_count += 1
|
||||
|
||||
print(f"❌ epubcheck found {error_count} errors, {warning_count} warnings\n")
|
||||
|
||||
except json.JSONDecodeError:
|
||||
# Fallback to text parsing
|
||||
if result.stderr:
|
||||
print(f"⚠️ epubcheck output (text mode):\n{result.stderr}\n")
|
||||
self._add_warning("WARNING", "epubcheck", "Could not parse JSON output")
|
||||
|
||||
except FileNotFoundError:
|
||||
print("⚠️ epubcheck not found. Install with: brew install epubcheck")
|
||||
print(" Skipping official EPUB validation.\n")
|
||||
except subprocess.TimeoutExpired:
|
||||
self._add_error("ERROR", "epubcheck", "Validation timed out after 120 seconds")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not run epubcheck: {e}\n")
|
||||
|
||||
def _check_mimetype(self):
|
||||
"""Check for valid mimetype file."""
|
||||
mimetype_path = Path(self.temp_dir) / "mimetype"
|
||||
if not mimetype_path.exists():
|
||||
self._add_error("ERROR", "Structure", "Missing mimetype file")
|
||||
return
|
||||
|
||||
content = mimetype_path.read_text().strip()
|
||||
if content != "application/epub+zip":
|
||||
self._add_error("ERROR", "Structure", f"Invalid mimetype: {content}")
|
||||
|
||||
def _check_container_xml(self):
|
||||
"""Check for valid META-INF/container.xml."""
|
||||
container_path = Path(self.temp_dir) / "META-INF" / "container.xml"
|
||||
if not container_path.exists():
|
||||
self._add_error("ERROR", "Structure", "Missing META-INF/container.xml")
|
||||
return
|
||||
|
||||
try:
|
||||
tree = ET.parse(container_path)
|
||||
root = tree.getroot()
|
||||
# Check for rootfile element
|
||||
rootfiles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:container}rootfile")
|
||||
if not rootfiles:
|
||||
self._add_error("ERROR", "Structure", "No rootfile found in container.xml")
|
||||
except ET.ParseError as e:
|
||||
self._add_error("ERROR", "XML", f"Invalid container.xml: {e}")
|
||||
|
||||
def _check_css_variables(self):
|
||||
"""Check CSS files for problematic CSS custom properties."""
|
||||
print("📝 Checking CSS files for CSS variables...")
|
||||
|
||||
css_files = list(Path(self.temp_dir).rglob("*.css"))
|
||||
|
||||
for css_file in css_files:
|
||||
rel_path = css_file.relative_to(self.temp_dir)
|
||||
content = css_file.read_text()
|
||||
|
||||
# Check for CSS variable declarations (--variable-name)
|
||||
var_declarations = re.findall(r'^\s*(--[\w-]+)\s*:', content, re.MULTILINE)
|
||||
if var_declarations:
|
||||
self._add_error("ERROR", "CSS",
|
||||
f"{rel_path}: Found CSS variable declarations: {', '.join(var_declarations[:5])}")
|
||||
|
||||
# Check for CSS variable usage (var(--variable-name))
|
||||
var_usage = re.findall(r'var\((--[\w-]+)\)', content)
|
||||
if var_usage:
|
||||
self._add_error("ERROR", "CSS",
|
||||
f"{rel_path}: Found CSS variable usage: {', '.join(set(var_usage[:5]))}")
|
||||
|
||||
# Count total double-hyphens (for reference)
|
||||
double_hyphen_count = content.count('--')
|
||||
if double_hyphen_count > 0:
|
||||
# Check if they're only in comments
|
||||
without_comments = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL)
|
||||
double_hyphens_in_code = without_comments.count('--')
|
||||
|
||||
if double_hyphens_in_code > 0:
|
||||
self._add_warning("WARNING", "CSS",
|
||||
f"{rel_path}: Found {double_hyphens_in_code} double-hyphens outside comments")
|
||||
else:
|
||||
print(f" ✓ {rel_path}: {double_hyphen_count} double-hyphens (all in comments)")
|
||||
|
||||
def _check_xml_comments(self):
|
||||
"""Check for XML comment violations (double-hyphen in comments)."""
|
||||
print("\n📝 Checking for XML comment violations...")
|
||||
|
||||
xml_files = list(Path(self.temp_dir).rglob("*.xhtml")) + \
|
||||
list(Path(self.temp_dir).rglob("*.xml")) + \
|
||||
list(Path(self.temp_dir).rglob("*.opf"))
|
||||
|
||||
# Pattern to find comments with double-hyphens inside them
|
||||
# XML spec prohibits -- inside comments
|
||||
comment_pattern = re.compile(r'<!--.*?--.*?-->', re.DOTALL)
|
||||
|
||||
for xml_file in xml_files:
|
||||
rel_path = xml_file.relative_to(self.temp_dir)
|
||||
try:
|
||||
content = xml_file.read_text()
|
||||
matches = comment_pattern.findall(content)
|
||||
|
||||
if matches:
|
||||
# Find line numbers
|
||||
lines = content.split('\n')
|
||||
for i, line in enumerate(lines, 1):
|
||||
if '--' in line and '<!--' in content[:content.index(line) if line in content else 0]:
|
||||
self._add_error("ERROR", "XML",
|
||||
f"{rel_path}:{i}: Comment contains '--' (double-hyphen)")
|
||||
except Exception as e:
|
||||
self._add_warning("WARNING", "XML", f"{rel_path}: Could not check comments: {e}")
|
||||
|
||||
def _check_common_xhtml_errors(self):
|
||||
"""Check for common XHTML/XML errors that plague EPUB files."""
|
||||
print("\n📝 Checking for common XHTML errors...")
|
||||
|
||||
xhtml_files = list(Path(self.temp_dir).rglob("*.xhtml"))
|
||||
|
||||
for xhtml_file in xhtml_files:
|
||||
rel_path = xhtml_file.relative_to(self.temp_dir)
|
||||
try:
|
||||
content = xhtml_file.read_text()
|
||||
lines = content.split('\n')
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
# Check for unclosed tags (common patterns)
|
||||
if '<br>' in line and '<br/>' not in line and '<br />' not in line:
|
||||
self._add_warning("WARNING", "XHTML",
|
||||
f"{rel_path}:{i}: Use self-closing <br/> instead of <br>")
|
||||
|
||||
if '<img ' in line and not '/>' in line[line.index('<img '):]:
|
||||
self._add_warning("WARNING", "XHTML",
|
||||
f"{rel_path}:{i}: <img> tag should be self-closing")
|
||||
|
||||
if '<hr>' in line and '<hr/>' not in line and '<hr />' not in line:
|
||||
self._add_warning("WARNING", "XHTML",
|
||||
f"{rel_path}:{i}: Use self-closing <hr/> instead of <hr>")
|
||||
|
||||
# Check for unescaped ampersands (except entities)
|
||||
if '&' in line:
|
||||
# Simple check for unescaped &
|
||||
if re.search(r'&(?![a-zA-Z]+;|#\d+;|#x[0-9a-fA-F]+;)', line):
|
||||
self._add_warning("WARNING", "XHTML",
|
||||
f"{rel_path}:{i}: Possibly unescaped ampersand (&)")
|
||||
|
||||
# Check for < > without proper escaping
|
||||
if re.search(r'<(?![a-zA-Z/!?])', line):
|
||||
self._add_warning("WARNING", "XHTML",
|
||||
f"{rel_path}:{i}: Possibly unescaped < character")
|
||||
|
||||
# Check for attributes without quotes
|
||||
if re.search(r'<\w+[^>]*\s+\w+=\w+[^"\']', line):
|
||||
self._add_warning("WARNING", "XHTML",
|
||||
f"{rel_path}:{i}: Attribute values should be quoted")
|
||||
|
||||
except Exception as e:
|
||||
self._add_warning("WARNING", "XHTML",
|
||||
f"{rel_path}: Could not check for common errors: {e}")
|
||||
|
||||
def _check_xhtml_validity(self):
|
||||
"""Check XHTML files for basic validity."""
|
||||
print("\n📝 Checking XHTML validity...")
|
||||
|
||||
xhtml_files = list(Path(self.temp_dir).rglob("*.xhtml"))
|
||||
|
||||
for xhtml_file in xhtml_files:
|
||||
rel_path = xhtml_file.relative_to(self.temp_dir)
|
||||
try:
|
||||
# Try to parse as XML (XHTML should be well-formed XML)
|
||||
ET.parse(xhtml_file)
|
||||
print(f" ✓ {rel_path}: Valid XHTML")
|
||||
except ET.ParseError as e:
|
||||
self._add_error("ERROR", "XHTML", f"{rel_path}: Parse error - {e}")
|
||||
|
||||
def _check_opf_structure(self):
|
||||
"""Check OPF file structure."""
|
||||
print("\n📝 Checking OPF structure...")
|
||||
|
||||
opf_files = list(Path(self.temp_dir).rglob("*.opf"))
|
||||
|
||||
if not opf_files:
|
||||
self._add_error("ERROR", "Structure", "No OPF file found")
|
||||
return
|
||||
|
||||
for opf_file in opf_files:
|
||||
rel_path = opf_file.relative_to(self.temp_dir)
|
||||
try:
|
||||
tree = ET.parse(opf_file)
|
||||
root = tree.getroot()
|
||||
|
||||
# Check for required elements
|
||||
namespaces = {'opf': 'http://www.idpf.org/2007/opf'}
|
||||
|
||||
metadata = root.find('.//opf:metadata', namespaces)
|
||||
manifest = root.find('.//opf:manifest', namespaces)
|
||||
spine = root.find('.//opf:spine', namespaces)
|
||||
|
||||
if metadata is None:
|
||||
self._add_error("ERROR", "OPF", f"{rel_path}: Missing metadata element")
|
||||
if manifest is None:
|
||||
self._add_error("ERROR", "OPF", f"{rel_path}: Missing manifest element")
|
||||
if spine is None:
|
||||
self._add_error("ERROR", "OPF", f"{rel_path}: Missing spine element")
|
||||
else:
|
||||
print(f" ✓ {rel_path}: Valid OPF structure")
|
||||
|
||||
except ET.ParseError as e:
|
||||
self._add_error("ERROR", "OPF", f"{rel_path}: Parse error - {e}")
|
||||
|
||||
def _print_results(self):
|
||||
"""Print validation results."""
|
||||
print("\n" + "="*70)
|
||||
print("📊 VALIDATION RESULTS")
|
||||
print("="*70)
|
||||
|
||||
if not self.errors and not self.warnings:
|
||||
print("\n✅ SUCCESS: No issues found!")
|
||||
print(f" {self.epub_path.name} is valid")
|
||||
return
|
||||
|
||||
if self.errors:
|
||||
print(f"\n❌ ERRORS FOUND: {len(self.errors)}")
|
||||
print("-" * 70)
|
||||
for severity, category, message in self.errors:
|
||||
print(f" [{severity}] [{category}] {message}")
|
||||
|
||||
if self.warnings:
|
||||
print(f"\n⚠️ WARNINGS: {len(self.warnings)}")
|
||||
print("-" * 70)
|
||||
for severity, category, message in self.warnings:
|
||||
print(f" [{severity}] [{category}] {message}")
|
||||
|
||||
print("\n" + "="*70)
|
||||
if self.errors:
|
||||
print("❌ VALIDATION FAILED")
|
||||
else:
|
||||
print("✅ VALIDATION PASSED (with warnings)")
|
||||
print("="*70)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python3 validate_epub.py [--quick] <path_to_epub_file>")
|
||||
print("\nOptions:")
|
||||
print(" --quick Skip epubcheck validation (faster, custom checks only)")
|
||||
print("\nExamples:")
|
||||
print(" python3 validate_epub.py quarto/_build/epub/Machine-Learning-Systems.epub")
|
||||
print(" python3 validate_epub.py --quick quarto/_build/epub/Machine-Learning-Systems.epub")
|
||||
sys.exit(1)
|
||||
|
||||
# Parse arguments
|
||||
use_epubcheck = True
|
||||
epub_path = None
|
||||
|
||||
for arg in sys.argv[1:]:
|
||||
if arg == '--quick':
|
||||
use_epubcheck = False
|
||||
elif not epub_path:
|
||||
epub_path = arg
|
||||
|
||||
if not epub_path:
|
||||
print("Error: No EPUB file specified")
|
||||
sys.exit(1)
|
||||
|
||||
validator = EPUBValidator(epub_path, use_epubcheck=use_epubcheck)
|
||||
|
||||
success = validator.validate()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user