Files
cs249r_book/utils/find_fig_references.py
Vijay Janapa Reddi 6dab6eb5e4 (Non-) ASCII checker scripts + fixes
When working with PDF builds, one of the big issues has been that I get into Unicode error issues, and so the scripts in this particular push help find all the non-ASCII Unicodes so that I can fix them manually. Also, another issue that shows up is with figure reference labels, which can be broken. And so, the fig_references script detects those and raises errors.
2024-02-19 16:57:54 -05:00

69 lines
2.9 KiB
Python

import os
import re
import argparse
import sys
def find_fig_references(file_or_directory):
"""
Find references in Quarto Markdown files inside the given file or directory.
Look for references inside {#fig-} or @fig- that contain underscores.
"""
fig_ref_pattern = re.compile(r'(?:\{#|@)fig-([^}\s]+(?:_[^}\s]+)?)')
found_warnings = False
if os.path.isfile(file_or_directory):
found_warnings = process_file(file_or_directory, fig_ref_pattern)
elif os.path.isdir(file_or_directory):
found_warnings = process_directory(file_or_directory, fig_ref_pattern)
else:
print("Error: The specified file or directory does not exist.")
if found_warnings:
sys.exit(1) # Exit with non-zero error code if warnings were found
def process_file(file_path, fig_ref_pattern):
"""
Process a single QMD file.
"""
found_warnings = False
print(f"Processing file: {file_path}")
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
for line_number, line in enumerate(lines, start=1):
matches = fig_ref_pattern.findall(line)
for match in matches:
if '_' in match:
print(f"Warning: Underscore in fig reference detected "
f"at line {line_number} in file {file_path}")
print(f" Reference: {match}")
found_warnings = True
return found_warnings
def process_directory(directory, fig_ref_pattern):
"""
Process all QMD files in the specified directory.
"""
found_warnings = False
print(f"Searching for invalid fig references in directory: {directory}")
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith(".qmd"):
file_path = os.path.join(root, file)
if process_file(file_path, fig_ref_pattern):
found_warnings = True
return found_warnings
def main():
parser = argparse.ArgumentParser(description="Find and warn about invalid fig references in Quarto Markdown files.",
epilog="The script searches for references to figures in Quarto Markdown files. "
"It looks for references inside {#fig-} or @fig- that contain underscores. "
"If such references are found, a warning is printed, and the script exits "
"with a non-zero error code.",
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("file_or_directory", help="QMD file or directory to search for Quarto Markdown files.")
args = parser.parse_args()
file_or_directory = args.file_or_directory
find_fig_references(file_or_directory)
if __name__ == "__main__":
main()