mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-03-11 17:49:25 -05:00
chore: remove deprecated build scripts directory
Removes tools/scripts/build/ directory containing: - README.md - generate_stats.py - standardize_sources.sh These scripts appear to have been deprecated or relocated as part of repository reorganization. The clean.sh script has been moved to tools/setup/clean.sh.
This commit is contained in:
@@ -648,7 +648,7 @@ Once everything is set up, you'll be able to:
|
||||
### Community
|
||||
- **[GitHub Discussions](https://github.com/harvard-edge/cs249r_book/discussions)** - Ask questions and share knowledge
|
||||
- **[GitHub Issues](https://github.com/harvard-edge/cs249r_book/issues)** - Report bugs and request features
|
||||
- **[MLSysBook.org](https://mlsysbook.org)** - Main website and learning platform
|
||||
- **[MLSysBook.ai](https://mlsysbook.ai)** - Main website and learning platform
|
||||
|
||||
### Tools and Scripts
|
||||
The `tools/scripts/` directory contains various utilities:
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
# Build Scripts
|
||||
|
||||
Scripts for building, cleaning, and development workflows.
|
||||
|
||||
## Scripts
|
||||
|
||||
- **`clean.sh`** - Comprehensive cleanup script (build artifacts, caches, temp files)
|
||||
- **`standardize_sources.sh`** - Standardize source file formatting
|
||||
- **`generate_stats.py`** - Generate statistics about the Quarto project
|
||||
|
||||
## Quick Usage
|
||||
|
||||
```bash
|
||||
# Clean all build artifacts
|
||||
./clean.sh
|
||||
|
||||
# Deep clean including caches and virtual environments
|
||||
./clean.sh --deep
|
||||
|
||||
# Preview what would be cleaned
|
||||
./clean.sh --dry-run
|
||||
|
||||
# Generate project statistics
|
||||
python generate_stats.py
|
||||
```
|
||||
@@ -1,145 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
📘 Quarto Project Stats Collector
|
||||
|
||||
This script scans a Quarto project directory, parses `.qmd` files, and reports useful statistics
|
||||
to help you understand the structure and content of your textbook or technical book.
|
||||
|
||||
✨ Tracked Stats (per file):
|
||||
- 🧱 Chapters, Sections, Subsections
|
||||
- 📝 Word Count
|
||||
- 🖼️ Figures, 📊 Tables, 💻 Code Blocks
|
||||
- 📚 Citations, 🦶 Footnotes, 📦 Callouts
|
||||
- 🚧 TODOs and FIXMEs
|
||||
- ❌ Figures/Tables without captions
|
||||
|
||||
Usage:
|
||||
python quarto_stats.py path/to/project
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
def strip_code_blocks(content):
|
||||
"""Remove fenced code blocks from the content."""
|
||||
return re.sub(r"```.*?\n.*?```", "", content, flags=re.DOTALL)
|
||||
|
||||
def collect_stats_from_qmd(file_path):
|
||||
stats = defaultdict(int)
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
full_content = f.read()
|
||||
|
||||
# Strip fenced code blocks before structural analysis
|
||||
content = strip_code_blocks(full_content)
|
||||
lines = content.splitlines()
|
||||
|
||||
# 🧱 Structure
|
||||
stats['chapters'] += sum(1 for line in lines if line.strip().startswith("# "))
|
||||
stats['sections'] += sum(1 for line in lines if line.strip().startswith("## "))
|
||||
stats['subsections'] += sum(1 for line in lines if line.strip().startswith("### "))
|
||||
|
||||
# 📝 Word Count (including code and comments)
|
||||
stats['words'] += len(re.findall(r'\b\w+\b', full_content))
|
||||
|
||||
# 🎨 Figures and 📊 Tables (only labeled ones using #fig- and #tbl-)
|
||||
fig_labels = list(set(
|
||||
re.findall(r'#fig-[\w-]+', full_content) +
|
||||
re.findall(r'#\|\s*label:\s*fig-[\w-]+', full_content)
|
||||
))
|
||||
tbl_labels = list(set(
|
||||
re.findall(r'#tbl-[\w-]+', full_content) +
|
||||
re.findall(r'#\|\s*label:\s*tbl-[\w-]+', full_content)
|
||||
))
|
||||
|
||||
# Count valid figures and tables (only labeled)
|
||||
stats['figures'] += len(fig_labels)
|
||||
stats['tables'] += len(tbl_labels)
|
||||
|
||||
# ❌ Figures and Tables Without Captions (set to zero since unlabeled are ignored)
|
||||
stats['figs_no_caption'] = 0
|
||||
stats['tables_no_caption'] = 0
|
||||
|
||||
# 💻 Code blocks
|
||||
stats['code_blocks'] += len(re.findall(r'^```', full_content, re.MULTILINE))
|
||||
|
||||
# 📚 Citations
|
||||
stats['citations'] += len(re.findall(r'@[\w:.-]+', content))
|
||||
|
||||
# 🦶 Footnotes - count definitions and references separately
|
||||
footnote_defs = re.findall(r'\[\^fn-[^]]+\]:', content)
|
||||
footnote_refs = re.findall(r'\[\^fn-[^]]+\](?!:)', content)
|
||||
stats['footnote_defs'] += len(footnote_defs)
|
||||
stats['footnote_refs'] += len(footnote_refs)
|
||||
stats['footnotes'] += len(footnote_defs) # Keep backward compatibility
|
||||
|
||||
# 📦 Callouts
|
||||
stats['callouts'] += len(re.findall(r':::\s*\{\.callout-', content))
|
||||
|
||||
# 🚧 TODOs and FIXMEs
|
||||
stats['todos'] += len(re.findall(r'TODO|FIXME', full_content, re.IGNORECASE))
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def summarize_stats(stats_by_file):
|
||||
total = defaultdict(int)
|
||||
header = (
|
||||
f"{'File':35} | {'Ch':>3} | {'Sec':>4} | {'Words':>7} | "
|
||||
f"{'Fig':>5} | {'Tbl':>5} | {'Code':>5} | {'Cite':>5} | "
|
||||
f"{'FnDef':>5} | {'FnRef':>5} | {'Call':>5} | {'TODO':>5}"
|
||||
)
|
||||
|
||||
print(header)
|
||||
print("-" * len(header))
|
||||
|
||||
for file, stats in stats_by_file.items():
|
||||
print(f"{file.name:35} | {stats['chapters']:>3} | {stats['sections']:>4} | {stats['words']:>7} | "
|
||||
f"{stats['figures']:>5} | {stats['tables']:>5} | {stats['code_blocks']:>5} | {stats['citations']:>5} | "
|
||||
f"{stats['footnote_defs']:>5} | {stats['footnote_refs']:>5} | {stats['callouts']:>5} | {stats['todos']:>5}")
|
||||
|
||||
for key in stats:
|
||||
total[key] += stats[key]
|
||||
|
||||
print("\n📊 Total Summary:")
|
||||
emoji_label = {
|
||||
"chapters": "🧱 Chapters",
|
||||
"sections": "🧱 Sections",
|
||||
"subsections": "🧱 Subsections",
|
||||
"words": "📝 Words",
|
||||
"figures": "🎨 Figures",
|
||||
"tables": "📊 Tables",
|
||||
"code_blocks": "💻 Code Blocks",
|
||||
"citations": "📚 Citations",
|
||||
"footnotes": "🦶 Footnotes (Total)",
|
||||
"footnote_defs": "📖 Footnote Definitions",
|
||||
"footnote_refs": "🔗 Footnote References",
|
||||
"callouts": "📦 Callouts",
|
||||
"todos": "🚧 TODOs",
|
||||
"figs_no_caption": "❌ Figures w/o Caption",
|
||||
"tables_no_caption": "❌ Tables w/o Caption"
|
||||
}
|
||||
|
||||
for key, value in total.items():
|
||||
label = emoji_label.get(key, key)
|
||||
print(f"{label:<25} : {value}")
|
||||
|
||||
def collect_project_stats(path):
|
||||
"""Walk through all .qmd files and collect stats."""
|
||||
path = Path(path)
|
||||
qmd_files = list(path.rglob("*.qmd"))
|
||||
if not qmd_files:
|
||||
print("⚠️ No QMD files found in the specified path.")
|
||||
return
|
||||
|
||||
stats_by_file = {}
|
||||
for qmd_file in qmd_files:
|
||||
stats_by_file[qmd_file] = collect_stats_from_qmd(qmd_file)
|
||||
summarize_stats(stats_by_file)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="📘 Collect Quarto textbook stats.")
|
||||
parser.add_argument("path", help="Path to the root of the Quarto project")
|
||||
args = parser.parse_args()
|
||||
collect_project_stats(args.path)
|
||||
@@ -1,48 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Comprehensive Source Citation Standardization Script
|
||||
# This script standardizes all source citations in QMD files
|
||||
|
||||
echo "🔧 Starting source citation standardization..."
|
||||
|
||||
# 1. Convert asterisk-wrapped sources with academic citations
|
||||
echo "Converting *Source: @citation* to Source: [@citation]."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/\*Source: @\([^*]*\)\*/Source: [@\1]./g' {} \;
|
||||
|
||||
# 2. Convert asterisk-wrapped sources with links
|
||||
echo "Converting *Source: [text](url)* to Source: [text](url)."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/\*Source: \(\[[^]]*\]([^)]*)\)\*/Source: \1./g' {} \;
|
||||
|
||||
# 3. Convert asterisk-wrapped sources with plain text
|
||||
echo "Converting *Source: text* to Source: text."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/\*Source: \([^*]*\)\*/Source: \1./g' {} \;
|
||||
|
||||
# 4. Standardize academic citations without brackets to include brackets
|
||||
echo "Converting Source: @citation to Source: [@citation]."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/Source: @\([a-zA-Z0-9][^.]*\)\./Source: [@\1]./g' {} \;
|
||||
|
||||
# 5. Add periods to sources that are missing them (company names, etc.)
|
||||
echo "Adding periods to sources missing punctuation..."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/Source: \([^.@\[]*[^.]\)$/Source: \1./g' {} \;
|
||||
|
||||
# 6. Clean up table sources in curly braces
|
||||
echo "Standardizing table source citations..."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/{Source: \([^}]*\)};/Source: \1./g' {} \;
|
||||
|
||||
# 7. Clean up any double periods
|
||||
echo "Cleaning up double periods..."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/Source: \([^.]*\)\.\./Source: \1./g' {} \;
|
||||
|
||||
# 8. Fix any remaining formatting issues
|
||||
echo "Final cleanup..."
|
||||
find contents -name "*.qmd" -exec sed -i '' 's/Source: \[\[@/Source: [@/g' {} \;
|
||||
|
||||
echo "✅ Source citation standardization complete!"
|
||||
echo ""
|
||||
echo "📊 Summary of standard formats applied:"
|
||||
echo " • Academic citations: Source: [@citation]."
|
||||
echo " • Company sources: Source: Company Name."
|
||||
echo " • Link sources: Source: [Text](URL)."
|
||||
echo ""
|
||||
echo "🔍 To verify results, run:"
|
||||
echo " grep -r 'Source:' contents --include='*.qmd' | head -20"
|
||||
Reference in New Issue
Block a user