Files
cs249r_book/scripts/find_acronyms.py
Vijay Janapa Reddi 3061c8ea4b acronym fixes
2025-03-28 11:32:49 -04:00

63 lines
2.3 KiB
Python

import re
import argparse
import os
from collections import defaultdict
# Regular expression to match keywords in parentheses (e.g., (CNN))
pattern = re.compile(r"\(([A-Z]{2,}s?)\)")
def process_file(file_path, keyword_lines):
"""Processes a single file and updates the keyword dictionary with line numbers."""
try:
with open(file_path, "r", encoding="utf-8") as file:
for line_num, line in enumerate(file, start=1):
matches = pattern.findall(line)
for match in matches:
keyword_lines[match].append(line_num)
except Exception as e:
print(f"Error reading {file_path}: {e}")
def process_directory(directory):
"""Recursively finds all `.qmd` files in a directory and processes them."""
keyword_lines = defaultdict(list)
for root, _, files in os.walk(directory):
for file in files:
if file.endswith(".qmd"):
file_path = os.path.join(root, file)
process_file(file_path, keyword_lines)
return keyword_lines
def print_results(keyword_lines):
"""Prints the extracted keywords and their corresponding line numbers, sorted by most occurrences."""
sorted_keywords = sorted(keyword_lines.items(), key=lambda x: -len(x[1])) # Sort by most occurrences
for keyword, lines in sorted_keywords:
print(f"{keyword}: {', '.join(map(str, lines))}")
def main():
parser = argparse.ArgumentParser(description="Extract uppercase keywords in parentheses and their line numbers from .qmd files.")
parser.add_argument("-f", "--file", help="Path to a single .qmd file")
parser.add_argument("-d", "--directory", help="Path to a directory containing .qmd files (processed recursively)")
args = parser.parse_args()
if args.file and args.directory:
print("Please provide only one of -f (file) or -d (directory), not both.")
return
keyword_lines = defaultdict(list)
if args.file:
process_file(args.file, keyword_lines)
print_results(keyword_lines)
elif args.directory:
keyword_lines = process_directory(args.directory)
print_results(keyword_lines)
else:
print("Please specify a file with -f or a directory with -d.")
if __name__ == "__main__":
main()