chore: remove deprecated build scripts directory

Removes tools/scripts/build/ directory containing:
- README.md
- generate_stats.py
- standardize_sources.sh

These scripts appear to have been deprecated or relocated as part of
repository reorganization. The clean.sh script has been moved to
tools/setup/clean.sh.
This commit is contained in:
Vijay Janapa Reddi
2025-12-02 21:54:54 -05:00
parent 0495d81e3a
commit b62fc03472
4 changed files with 1 additions and 219 deletions

View File

@@ -648,7 +648,7 @@ Once everything is set up, you'll be able to:
### Community
- **[GitHub Discussions](https://github.com/harvard-edge/cs249r_book/discussions)** - Ask questions and share knowledge
- **[GitHub Issues](https://github.com/harvard-edge/cs249r_book/issues)** - Report bugs and request features
- **[MLSysBook.org](https://mlsysbook.org)** - Main website and learning platform
- **[MLSysBook.ai](https://mlsysbook.ai)** - Main website and learning platform
### Tools and Scripts
The `tools/scripts/` directory contains various utilities:

View File

@@ -1,25 +0,0 @@
# Build Scripts
Scripts for building, cleaning, and development workflows.
## Scripts
- **`clean.sh`** - Comprehensive cleanup script (build artifacts, caches, temp files)
- **`standardize_sources.sh`** - Standardize source file formatting
- **`generate_stats.py`** - Generate statistics about the Quarto project
## Quick Usage
```bash
# Clean all build artifacts
./clean.sh
# Deep clean including caches and virtual environments
./clean.sh --deep
# Preview what would be cleaned
./clean.sh --dry-run
# Generate project statistics
python generate_stats.py
```

View File

@@ -1,145 +0,0 @@
#!/usr/bin/env python3
"""
📘 Quarto Project Stats Collector
This script scans a Quarto project directory, parses `.qmd` files, and reports useful statistics
to help you understand the structure and content of your textbook or technical book.
✨ Tracked Stats (per file):
- 🧱 Chapters, Sections, Subsections
- 📝 Word Count
- 🖼️ Figures, 📊 Tables, 💻 Code Blocks
- 📚 Citations, 🦶 Footnotes, 📦 Callouts
- 🚧 TODOs and FIXMEs
- ❌ Figures/Tables without captions
Usage:
python quarto_stats.py path/to/project
"""
import re
from pathlib import Path
from collections import defaultdict
def strip_code_blocks(content):
"""Remove fenced code blocks from the content."""
return re.sub(r"```.*?\n.*?```", "", content, flags=re.DOTALL)
def collect_stats_from_qmd(file_path):
stats = defaultdict(int)
with open(file_path, "r", encoding="utf-8") as f:
full_content = f.read()
# Strip fenced code blocks before structural analysis
content = strip_code_blocks(full_content)
lines = content.splitlines()
# 🧱 Structure
stats['chapters'] += sum(1 for line in lines if line.strip().startswith("# "))
stats['sections'] += sum(1 for line in lines if line.strip().startswith("## "))
stats['subsections'] += sum(1 for line in lines if line.strip().startswith("### "))
# 📝 Word Count (including code and comments)
stats['words'] += len(re.findall(r'\b\w+\b', full_content))
# 🎨 Figures and 📊 Tables (only labeled ones using #fig- and #tbl-)
fig_labels = list(set(
re.findall(r'#fig-[\w-]+', full_content) +
re.findall(r'#\|\s*label:\s*fig-[\w-]+', full_content)
))
tbl_labels = list(set(
re.findall(r'#tbl-[\w-]+', full_content) +
re.findall(r'#\|\s*label:\s*tbl-[\w-]+', full_content)
))
# Count valid figures and tables (only labeled)
stats['figures'] += len(fig_labels)
stats['tables'] += len(tbl_labels)
# ❌ Figures and Tables Without Captions (set to zero since unlabeled are ignored)
stats['figs_no_caption'] = 0
stats['tables_no_caption'] = 0
# 💻 Code blocks
stats['code_blocks'] += len(re.findall(r'^```', full_content, re.MULTILINE))
# 📚 Citations
stats['citations'] += len(re.findall(r'@[\w:.-]+', content))
# 🦶 Footnotes - count definitions and references separately
footnote_defs = re.findall(r'\[\^fn-[^]]+\]:', content)
footnote_refs = re.findall(r'\[\^fn-[^]]+\](?!:)', content)
stats['footnote_defs'] += len(footnote_defs)
stats['footnote_refs'] += len(footnote_refs)
stats['footnotes'] += len(footnote_defs) # Keep backward compatibility
# 📦 Callouts
stats['callouts'] += len(re.findall(r':::\s*\{\.callout-', content))
# 🚧 TODOs and FIXMEs
stats['todos'] += len(re.findall(r'TODO|FIXME', full_content, re.IGNORECASE))
return stats
def summarize_stats(stats_by_file):
total = defaultdict(int)
header = (
f"{'File':35} | {'Ch':>3} | {'Sec':>4} | {'Words':>7} | "
f"{'Fig':>5} | {'Tbl':>5} | {'Code':>5} | {'Cite':>5} | "
f"{'FnDef':>5} | {'FnRef':>5} | {'Call':>5} | {'TODO':>5}"
)
print(header)
print("-" * len(header))
for file, stats in stats_by_file.items():
print(f"{file.name:35} | {stats['chapters']:>3} | {stats['sections']:>4} | {stats['words']:>7} | "
f"{stats['figures']:>5} | {stats['tables']:>5} | {stats['code_blocks']:>5} | {stats['citations']:>5} | "
f"{stats['footnote_defs']:>5} | {stats['footnote_refs']:>5} | {stats['callouts']:>5} | {stats['todos']:>5}")
for key in stats:
total[key] += stats[key]
print("\n📊 Total Summary:")
emoji_label = {
"chapters": "🧱 Chapters",
"sections": "🧱 Sections",
"subsections": "🧱 Subsections",
"words": "📝 Words",
"figures": "🎨 Figures",
"tables": "📊 Tables",
"code_blocks": "💻 Code Blocks",
"citations": "📚 Citations",
"footnotes": "🦶 Footnotes (Total)",
"footnote_defs": "📖 Footnote Definitions",
"footnote_refs": "🔗 Footnote References",
"callouts": "📦 Callouts",
"todos": "🚧 TODOs",
"figs_no_caption": "❌ Figures w/o Caption",
"tables_no_caption": "❌ Tables w/o Caption"
}
for key, value in total.items():
label = emoji_label.get(key, key)
print(f"{label:<25} : {value}")
def collect_project_stats(path):
"""Walk through all .qmd files and collect stats."""
path = Path(path)
qmd_files = list(path.rglob("*.qmd"))
if not qmd_files:
print("⚠️ No QMD files found in the specified path.")
return
stats_by_file = {}
for qmd_file in qmd_files:
stats_by_file[qmd_file] = collect_stats_from_qmd(qmd_file)
summarize_stats(stats_by_file)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="📘 Collect Quarto textbook stats.")
parser.add_argument("path", help="Path to the root of the Quarto project")
args = parser.parse_args()
collect_project_stats(args.path)

View File

@@ -1,48 +0,0 @@
#!/bin/bash
# Comprehensive Source Citation Standardization Script
# This script standardizes all source citations in QMD files
echo "🔧 Starting source citation standardization..."
# 1. Convert asterisk-wrapped sources with academic citations
echo "Converting *Source: @citation* to Source: [@citation]."
find contents -name "*.qmd" -exec sed -i '' 's/\*Source: @\([^*]*\)\*/Source: [@\1]./g' {} \;
# 2. Convert asterisk-wrapped sources with links
echo "Converting *Source: [text](url)* to Source: [text](url)."
find contents -name "*.qmd" -exec sed -i '' 's/\*Source: \(\[[^]]*\]([^)]*)\)\*/Source: \1./g' {} \;
# 3. Convert asterisk-wrapped sources with plain text
echo "Converting *Source: text* to Source: text."
find contents -name "*.qmd" -exec sed -i '' 's/\*Source: \([^*]*\)\*/Source: \1./g' {} \;
# 4. Standardize academic citations without brackets to include brackets
echo "Converting Source: @citation to Source: [@citation]."
find contents -name "*.qmd" -exec sed -i '' 's/Source: @\([a-zA-Z0-9][^.]*\)\./Source: [@\1]./g' {} \;
# 5. Add periods to sources that are missing them (company names, etc.)
echo "Adding periods to sources missing punctuation..."
find contents -name "*.qmd" -exec sed -i '' 's/Source: \([^.@\[]*[^.]\)$/Source: \1./g' {} \;
# 6. Clean up table sources in curly braces
echo "Standardizing table source citations..."
find contents -name "*.qmd" -exec sed -i '' 's/{Source: \([^}]*\)};/Source: \1./g' {} \;
# 7. Clean up any double periods
echo "Cleaning up double periods..."
find contents -name "*.qmd" -exec sed -i '' 's/Source: \([^.]*\)\.\./Source: \1./g' {} \;
# 8. Fix any remaining formatting issues
echo "Final cleanup..."
find contents -name "*.qmd" -exec sed -i '' 's/Source: \[\[@/Source: [@/g' {} \;
echo "✅ Source citation standardization complete!"
echo ""
echo "📊 Summary of standard formats applied:"
echo " • Academic citations: Source: [@citation]."
echo " • Company sources: Source: Company Name."
echo " • Link sources: Source: [Text](URL)."
echo ""
echo "🔍 To verify results, run:"
echo " grep -r 'Source:' contents --include='*.qmd' | head -20"