Files
cs249r_book/utils/ascii_checker.py
Vijay Janapa Reddi 6dab6eb5e4 (Non-) ASCII checker scripts + fixes
When working with PDF builds, one of the big issues has been that I get into Unicode error issues, and so the scripts in this particular push help find all the non-ASCII Unicodes so that I can fix them manually. Also, another issue that shows up is with figure reference labels, which can be broken. And so, the fig_references script detects those and raises errors.
2024-02-19 16:57:54 -05:00

97 lines
3.4 KiB
Python

import os
import re
import argparse
import sys
def find_issues(file_path, file_extension):
"""Find potential issues in a file.
Args:
file_path (str): The path to the file to be analyzed.
file_extension (str): The extension of the file to determine the type of analysis.
Returns:
list: A list of issues identified in the file.
"""
issues = []
# Open the file and read its contents
with open(file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
# Check for issues based on file extension
if os.path.splitext(file_path)[1] == file_extension:
# Check for potential Markdown issues
for i, line in enumerate(lines, start=1):
# Check for non-ASCII Unicode characters
non_ascii_chars = re.findall(r'[^\x00-\x7F]', line)
if non_ascii_chars:
for char in non_ascii_chars:
char_idx = line.index(char)
context = line[max(0, char_idx - 10):min(len(line), char_idx + 10)]
issues.append(f"Non-ASCII Unicode character '{char}' found at line {i}: '{context}'")
return issues
def generate_error_report(directory, file_extension):
"""Generate an error report for files with a specific extension in a directory.
Args:
directory (str): The directory to search for files.
file_extension (str): The file extension to check (e.g., .tex, .md).
Returns:
dict: A dictionary containing file paths mapped to lists of issues found in those files.
"""
error_report = {}
# Variable to track whether any issues have been found
has_issues = False
# Recursively iterate through the directory
for root, _, files in os.walk(directory):
for file in files:
# Check if the file has the specified extension
file_path = os.path.join(root, file)
if os.path.splitext(file_path)[1] == file_extension:
print(f"Processing file: {file_path}")
issues = find_issues(file_path, file_extension)
if issues:
has_issues = True
error_report[file_path] = issues
return has_issues, error_report
def main():
# Command line argument parser
parser = argparse.ArgumentParser(description="Check files for potential issues.")
parser.add_argument("directory", help="The directory to search for files.")
parser.add_argument("file_extension", help="The file extension to check (e.g., .tex, .md)")
# Adding help option for argument parsing
parser.add_argument(
"-help",
action="help",
help="Show this help message and exit."
)
args = parser.parse_args()
# Generate error report
has_issues, error_report = generate_error_report(args.directory, args.file_extension)
# Print error report
if error_report:
print("\nError Report:")
for file_path, issues in error_report.items():
print(f"\nFile: {file_path}")
for issue in issues:
print(f"- {issue}")
# Check if any issues were found
if has_issues:
sys.exit(1) # Exit with non-zero exit code if issues were found
else:
print(f"\nNo potential issues found in {args.file_extension} files.")
sys.exit(0) # Exit with zero exit code if no issues were found
if __name__ == "__main__":
main()