chore: remove deprecated build scripts directory

Removes tools/scripts/build/ directory containing: - README.md - generate_stats.py - standardize_sources.sh These scripts appear to have been deprecated or relocated as part of repository reorganization. The clean.sh script has been moved to tools/setup/clean.sh.
2026-03-11 17:49:25 -05:00 · 2025-12-02 21:54:54 -05:00
parent 0495d81e3a
commit b62fc03472
4 changed files with 1 additions and 219 deletions
--- a/docs/BUILD.md
+++ b/docs/BUILD.md
@@ -648,7 +648,7 @@ Once everything is set up, you'll be able to:
 ### Community
 - **[GitHub Discussions](https://github.com/harvard-edge/cs249r_book/discussions)** - Ask questions and share knowledge
 - **[GitHub Issues](https://github.com/harvard-edge/cs249r_book/issues)** - Report bugs and request features
- **[MLSysBook.org](https://mlsysbook.org)** - Main website and learning platform
+- **[MLSysBook.ai](https://mlsysbook.ai)** - Main website and learning platform

 ### Tools and Scripts
 The `tools/scripts/` directory contains various utilities:
--- a/tools/scripts/build/README.md
+++ b/tools/scripts/build/README.md
@@ -1,25 +0,0 @@
-# Build Scripts
-
-Scripts for building, cleaning, and development workflows.
-
-## Scripts
-
- **`clean.sh`** - Comprehensive cleanup script (build artifacts, caches, temp files)
- **`standardize_sources.sh`** - Standardize source file formatting  
- **`generate_stats.py`** - Generate statistics about the Quarto project
-
-## Quick Usage
-
-```bash
-# Clean all build artifacts
-./clean.sh
-
-# Deep clean including caches and virtual environments  
-./clean.sh --deep
-
-# Preview what would be cleaned
-./clean.sh --dry-run
-
-# Generate project statistics
-python generate_stats.py
-``` 
--- a/tools/scripts/build/generate_stats.py
+++ b/tools/scripts/build/generate_stats.py
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-"""
-📘 Quarto Project Stats Collector
-
-This script scans a Quarto project directory, parses `.qmd` files, and reports useful statistics 
-to help you understand the structure and content of your textbook or technical book.
-
-✨ Tracked Stats (per file):
- 🧱 Chapters, Sections, Subsections
- 📝 Word Count
- 🖼️ Figures, 📊 Tables, 💻 Code Blocks
- 📚 Citations, 🦶 Footnotes, 📦 Callouts
- 🚧 TODOs and FIXMEs
- ❌ Figures/Tables without captions
-
-Usage:
-    python quarto_stats.py path/to/project
-"""
-
-import re
-from pathlib import Path
-from collections import defaultdict
-
-def strip_code_blocks(content):
-    """Remove fenced code blocks from the content."""
-    return re.sub(r"```.*?\n.*?```", "", content, flags=re.DOTALL)
-
-def collect_stats_from_qmd(file_path):
-    stats = defaultdict(int)
-    with open(file_path, "r", encoding="utf-8") as f:
-        full_content = f.read()
-
-    # Strip fenced code blocks before structural analysis
-    content = strip_code_blocks(full_content)
-    lines = content.splitlines()
-
-    # 🧱 Structure
-    stats['chapters'] += sum(1 for line in lines if line.strip().startswith("# "))
-    stats['sections'] += sum(1 for line in lines if line.strip().startswith("## "))
-    stats['subsections'] += sum(1 for line in lines if line.strip().startswith("### "))
-
-    # 📝 Word Count (including code and comments)
-    stats['words'] += len(re.findall(r'\b\w+\b', full_content))
-
-    # 🎨 Figures and 📊 Tables (only labeled ones using #fig- and #tbl-)
-    fig_labels = list(set(
-        re.findall(r'#fig-[\w-]+', full_content) +
-        re.findall(r'#\|\s*label:\s*fig-[\w-]+', full_content)
-    ))
-    tbl_labels = list(set(
-        re.findall(r'#tbl-[\w-]+', full_content) +
-        re.findall(r'#\|\s*label:\s*tbl-[\w-]+', full_content)
-    ))
-
-    # Count valid figures and tables (only labeled)
-    stats['figures'] += len(fig_labels)
-    stats['tables'] += len(tbl_labels)
-
-    # ❌ Figures and Tables Without Captions (set to zero since unlabeled are ignored)
-    stats['figs_no_caption'] = 0
-    stats['tables_no_caption'] = 0
-
-    # 💻 Code blocks
-    stats['code_blocks'] += len(re.findall(r'^```', full_content, re.MULTILINE))
-
-    # 📚 Citations
-    stats['citations'] += len(re.findall(r'@[\w:.-]+', content))
-
-    # 🦶 Footnotes - count definitions and references separately
-    footnote_defs = re.findall(r'\[\^fn-[^]]+\]:', content)
-    footnote_refs = re.findall(r'\[\^fn-[^]]+\](?!:)', content)
-    stats['footnote_defs'] += len(footnote_defs)
-    stats['footnote_refs'] += len(footnote_refs)
-    stats['footnotes'] += len(footnote_defs)  # Keep backward compatibility
-
-    # 📦 Callouts
-    stats['callouts'] += len(re.findall(r':::\s*\{\.callout-', content))
-
-    # 🚧 TODOs and FIXMEs
-    stats['todos'] += len(re.findall(r'TODO|FIXME', full_content, re.IGNORECASE))
-
-    return stats
-
-
-def summarize_stats(stats_by_file):
-    total = defaultdict(int)
-    header = (
-        f"{'File':35} | {'Ch':>3} | {'Sec':>4} | {'Words':>7} | "
-        f"{'Fig':>5} | {'Tbl':>5} | {'Code':>5} | {'Cite':>5} | "
-        f"{'FnDef':>5} | {'FnRef':>5} | {'Call':>5} | {'TODO':>5}"
-    )
-
-    print(header)
-    print("-" * len(header))
-
-    for file, stats in stats_by_file.items():
-        print(f"{file.name:35} | {stats['chapters']:>3} | {stats['sections']:>4} | {stats['words']:>7} | "
-            f"{stats['figures']:>5} | {stats['tables']:>5} | {stats['code_blocks']:>5} | {stats['citations']:>5} | "
-            f"{stats['footnote_defs']:>5} | {stats['footnote_refs']:>5} | {stats['callouts']:>5} | {stats['todos']:>5}")
-
-        for key in stats:
-            total[key] += stats[key]
-
-    print("\n📊 Total Summary:")
-    emoji_label = {
-        "chapters":           "🧱 Chapters",
-        "sections":           "🧱 Sections",
-        "subsections":        "🧱 Subsections",
-        "words":              "📝 Words",
-        "figures":            "🎨 Figures",
-        "tables":             "📊 Tables",
-        "code_blocks":        "💻 Code Blocks",
-        "citations":          "📚 Citations",
-        "footnotes":          "🦶 Footnotes (Total)",
-        "footnote_defs":      "📖 Footnote Definitions",
-        "footnote_refs":      "🔗 Footnote References",
-        "callouts":           "📦 Callouts",
-        "todos":              "🚧 TODOs",
-        "figs_no_caption":    "❌ Figures w/o Caption",
-        "tables_no_caption":  "❌ Tables w/o Caption"
-    }
-
-    for key, value in total.items():
-        label = emoji_label.get(key, key)
-        print(f"{label:<25} : {value}")
-
-def collect_project_stats(path):
-    """Walk through all .qmd files and collect stats."""
-    path = Path(path)
-    qmd_files = list(path.rglob("*.qmd"))
-    if not qmd_files:
-        print("⚠️ No QMD files found in the specified path.")
-        return
-
-    stats_by_file = {}
-    for qmd_file in qmd_files:
-        stats_by_file[qmd_file] = collect_stats_from_qmd(qmd_file)
-    summarize_stats(stats_by_file)
-
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description="📘 Collect Quarto textbook stats.")
-    parser.add_argument("path", help="Path to the root of the Quarto project")
-    args = parser.parse_args()
-    collect_project_stats(args.path)
--- a/tools/scripts/build/standardize_sources.sh
+++ b/tools/scripts/build/standardize_sources.sh
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-# Comprehensive Source Citation Standardization Script
-# This script standardizes all source citations in QMD files
-
-echo "🔧 Starting source citation standardization..."
-
-# 1. Convert asterisk-wrapped sources with academic citations
-echo "Converting *Source: @citation* to Source: [@citation]."
-find contents -name "*.qmd" -exec sed -i '' 's/\*Source: @\([^*]*\)\*/Source: [@\1]./g' {} \;
-
-# 2. Convert asterisk-wrapped sources with links  
-echo "Converting *Source: [text](url)* to Source: [text](url)."
-find contents -name "*.qmd" -exec sed -i '' 's/\*Source: \(\[[^]]*\]([^)]*)\)\*/Source: \1./g' {} \;
-
-# 3. Convert asterisk-wrapped sources with plain text
-echo "Converting *Source: text* to Source: text."
-find contents -name "*.qmd" -exec sed -i '' 's/\*Source: \([^*]*\)\*/Source: \1./g' {} \;
-
-# 4. Standardize academic citations without brackets to include brackets
-echo "Converting Source: @citation to Source: [@citation]."
-find contents -name "*.qmd" -exec sed -i '' 's/Source: @\([a-zA-Z0-9][^.]*\)\./Source: [@\1]./g' {} \;
-
-# 5. Add periods to sources that are missing them (company names, etc.)
-echo "Adding periods to sources missing punctuation..."
-find contents -name "*.qmd" -exec sed -i '' 's/Source: \([^.@\[]*[^.]\)$/Source: \1./g' {} \;
-
-# 6. Clean up table sources in curly braces
-echo "Standardizing table source citations..."
-find contents -name "*.qmd" -exec sed -i '' 's/{Source: \([^}]*\)};/Source: \1./g' {} \;
-
-# 7. Clean up any double periods
-echo "Cleaning up double periods..."
-find contents -name "*.qmd" -exec sed -i '' 's/Source: \([^.]*\)\.\./Source: \1./g' {} \;
-
-# 8. Fix any remaining formatting issues
-echo "Final cleanup..."
-find contents -name "*.qmd" -exec sed -i '' 's/Source: \[\[@/Source: [@/g' {} \;
-
-echo "✅ Source citation standardization complete!"
-echo ""
-echo "📊 Summary of standard formats applied:"
-echo "  • Academic citations: Source: [@citation]."
-echo "  • Company sources: Source: Company Name."
-echo "  • Link sources: Source: [Text](URL)."
-echo ""
-echo "🔍 To verify results, run:"
-echo "  grep -r 'Source:' contents --include='*.qmd' | head -20"