Files
cs249r_book/scripts/extract_headers.py
Vijay Janapa Reddi b7395d942c Adds script to extract headers from .qmd files
Creates a script that extracts section headers from .qmd files,
outputting them in a formatted table showing filename, header level,
and header text. It supports processing either a single file or all
.qmd files within a directory.
2025-07-11 18:09:22 -04:00

98 lines
2.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
extract_headers.py
Extracts section headers (e.g., #, ##, ###) from .qmd files.
Supports either a single file (-f) or all .qmd files in a directory (-d).
Outputs a neatly formatted table of:
- Filename
- Header Level
- Header Text
Usage:
python extract_headers.py -f path/to/file.qmd
python extract_headers.py -d path/to/dir/
"""
import os
import re
import argparse
from pathlib import Path
def extract_headers_from_file(file_path):
"""
Reads a .qmd file and extracts all markdown-style headers.
Args:
file_path (str or Path): Path to the .qmd file.
Returns:
list of tuples: Each tuple is (header level, header text).
"""
headers = []
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
# Match lines starting with 16 '#' followed by space and text
match = re.match(r'^(#{1,6})\s+(.*)', line)
if match:
level = match.group(1) # e.g., '##'
text = match.group(2).strip()
headers.append((level, text))
return headers
def process_files(files):
"""
Processes a list of files and extracts headers from each.
Args:
files (list of Path): List of .qmd files to process.
Returns:
list of tuples: Each tuple is (relative path, header level, header text).
"""
results = []
for file_path in files:
headers = extract_headers_from_file(file_path)
rel_path = os.path.relpath(file_path)
for level, text in headers:
results.append((rel_path, level, text))
return results
def find_qmd_files(directory):
"""
Recursively finds all .qmd files under the given directory.
Args:
directory (str): Directory path.
Returns:
list of Path: All matching .qmd files.
"""
return list(Path(directory).rglob("*.qmd"))
def main():
"""
Entry point. Parses arguments and runs header extraction.
"""
parser = argparse.ArgumentParser(description="Extract section headers from .qmd files.")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-f', '--file', help='Path to a single .qmd file')
group.add_argument('-d', '--directory', help='Directory containing .qmd files recursively')
args = parser.parse_args()
if args.file:
files = [Path(args.file)]
else:
files = find_qmd_files(args.directory)
headers = process_files(files)
# Print formatted output table
print(f"{'Filename':<30} | {'Level':<5} | Header")
print(f"{'-'*30}-|{'-'*6}-|{'-'*40}")
for filename, level, header in headers:
print(f"{filename:<30} | {level:<5} | {header}")
if __name__ == "__main__":
main()