mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-05 17:18:48 -05:00
🗂️ Major reorganization: - Organized scripts into logical categories: build/, content/, maintenance/, testing/, utilities/, docs/ - Moved 25+ scripts from flat structure to categorized directories - Added category-specific README files with usage examples 📝 Standardized script naming: - Fixed abbreviations: fn_cnt.sh → count_footnotes.sh, fixbib.py → fix_bibliography.py - Applied consistent verb_noun patterns: section_id_manager.py → manage_section_ids.py - Improved clarity: ascii_checker.py → check_ascii.py, fixtitle.py → fix_titles.py - Distinguished similar tools: footnote_cnt.sh → analyze_footnotes.sh 📚 Updated documentation: - Comprehensive main README with usage examples for all categories - Individual README files for each script category - Updated DEVELOPMENT.md with new scripts organization section - Synchronized all script references in documentation 🔗 Integration updates: - Updated Makefile to use new script paths - Fixed pre-commit hook references to moved scripts - Maintained all existing functionality while improving organization The scripts directory now follows professional standards with intuitive categorization, consistent naming conventions, and comprehensive documentation.
98 lines
2.7 KiB
Python
98 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
extract_headers.py
|
||
|
||
Extracts section headers (e.g., #, ##, ###) from .qmd files.
|
||
Supports either a single file (-f) or all .qmd files in a directory (-d).
|
||
Outputs a neatly formatted table of:
|
||
- Filename
|
||
- Header Level
|
||
- Header Text
|
||
|
||
Usage:
|
||
python extract_headers.py -f path/to/file.qmd
|
||
python extract_headers.py -d path/to/dir/
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
import argparse
|
||
from pathlib import Path
|
||
|
||
def extract_headers_from_file(file_path):
|
||
"""
|
||
Reads a .qmd file and extracts all markdown-style headers.
|
||
|
||
Args:
|
||
file_path (str or Path): Path to the .qmd file.
|
||
|
||
Returns:
|
||
list of tuples: Each tuple is (header level, header text).
|
||
"""
|
||
headers = []
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
for line in f:
|
||
# Match lines starting with 1–6 '#' followed by space and text
|
||
match = re.match(r'^(#{1,6})\s+(.*)', line)
|
||
if match:
|
||
level = match.group(1) # e.g., '##'
|
||
text = match.group(2).strip()
|
||
headers.append((level, text))
|
||
return headers
|
||
|
||
def process_files(files):
|
||
"""
|
||
Processes a list of files and extracts headers from each.
|
||
|
||
Args:
|
||
files (list of Path): List of .qmd files to process.
|
||
|
||
Returns:
|
||
list of tuples: Each tuple is (relative path, header level, header text).
|
||
"""
|
||
results = []
|
||
for file_path in files:
|
||
headers = extract_headers_from_file(file_path)
|
||
rel_path = os.path.relpath(file_path)
|
||
for level, text in headers:
|
||
results.append((rel_path, level, text))
|
||
return results
|
||
|
||
def find_qmd_files(directory):
|
||
"""
|
||
Recursively finds all .qmd files under the given directory.
|
||
|
||
Args:
|
||
directory (str): Directory path.
|
||
|
||
Returns:
|
||
list of Path: All matching .qmd files.
|
||
"""
|
||
return list(Path(directory).rglob("*.qmd"))
|
||
|
||
def main():
|
||
"""
|
||
Entry point. Parses arguments and runs header extraction.
|
||
"""
|
||
parser = argparse.ArgumentParser(description="Extract section headers from .qmd files.")
|
||
group = parser.add_mutually_exclusive_group(required=True)
|
||
group.add_argument('-f', '--file', help='Path to a single .qmd file')
|
||
group.add_argument('-d', '--directory', help='Directory containing .qmd files recursively')
|
||
args = parser.parse_args()
|
||
|
||
if args.file:
|
||
files = [Path(args.file)]
|
||
else:
|
||
files = find_qmd_files(args.directory)
|
||
|
||
headers = process_files(files)
|
||
|
||
# Print formatted output table
|
||
print(f"{'Filename':<30} | {'Level':<5} | Header")
|
||
print(f"{'-'*30}-|{'-'*6}-|{'-'*40}")
|
||
for filename, level, header in headers:
|
||
print(f"{filename:<30} | {level:<5} | {header}")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|