mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-29 17:20:21 -05:00
308 lines
11 KiB
Python
Executable File
308 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
SVG Cleanup Script - Cross-Platform Version
|
|
============================================
|
|
Removes control characters from SVG files that can cause rendering issues
|
|
in browsers and other tools. Control characters are often introduced by
|
|
LaTeX -> SVG conversion tools.
|
|
|
|
This is a cross-platform Python replacement for clean-svgs.sh that works
|
|
on Windows, macOS, and Linux.
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
import shutil
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import List, Optional, Tuple
|
|
import re
|
|
|
|
|
|
class Colors:
|
|
"""ANSI color codes for terminal output."""
|
|
|
|
def __init__(self):
|
|
# Only use colors if stdout is a terminal
|
|
if sys.stdout.isatty():
|
|
self.RED = '\033[0;31m'
|
|
self.GREEN = '\033[0;32m'
|
|
self.YELLOW = '\033[1;33m'
|
|
self.BLUE = '\033[0;34m'
|
|
self.PURPLE = '\033[0;35m'
|
|
self.CYAN = '\033[0;36m'
|
|
self.NC = '\033[0m' # No Color
|
|
else:
|
|
self.RED = self.GREEN = self.YELLOW = ''
|
|
self.BLUE = self.PURPLE = self.CYAN = self.NC = ''
|
|
|
|
|
|
class SVGCleaner:
|
|
"""Cross-platform SVG file cleaner that removes control characters."""
|
|
|
|
def __init__(self, debug_mode: bool = False):
|
|
self.debug_mode = debug_mode
|
|
self.colors = Colors()
|
|
self.script_name = Path(__file__).name
|
|
|
|
# Control characters to remove (same as bash version)
|
|
# \000-\010: NULL through BACKSPACE
|
|
# \013: VERTICAL TAB
|
|
# \014: FORM FEED
|
|
# \016-\037: SHIFT OUT through UNIT SEPARATOR
|
|
self.control_chars_pattern = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F]')
|
|
|
|
def log_info(self, message: str) -> None:
|
|
"""Log info message."""
|
|
print(f"{self.colors.CYAN}[INFO]{self.colors.NC} {self.script_name}: {message}")
|
|
|
|
def log_success(self, message: str) -> None:
|
|
"""Log success message."""
|
|
print(f"{self.colors.GREEN}[SUCCESS]{self.colors.NC} {self.script_name}: {message}")
|
|
|
|
def log_warning(self, message: str) -> None:
|
|
"""Log warning message."""
|
|
print(f"{self.colors.YELLOW}[WARNING]{self.colors.NC} {self.script_name}: {message}")
|
|
|
|
def log_error(self, message: str) -> None:
|
|
"""Log error message."""
|
|
print(f"{self.colors.RED}[ERROR]{self.colors.NC} {self.script_name}: {message}")
|
|
|
|
def log_file(self, message: str) -> None:
|
|
"""Log file-specific message."""
|
|
print(f"{self.colors.PURPLE} → {self.colors.NC}{message}")
|
|
|
|
def log_debug(self, message: str) -> None:
|
|
"""Log debug message if debug mode is enabled."""
|
|
if self.debug_mode:
|
|
print(f"{self.colors.BLUE}[DEBUG]{self.colors.NC} {self.script_name}: {message}")
|
|
|
|
def find_svg_files(self, build_dir: Path) -> List[Path]:
|
|
"""Find all SVG files in the build directory."""
|
|
svg_files = []
|
|
try:
|
|
# Use pathlib's glob for cross-platform file finding
|
|
svg_files = list(build_dir.rglob('*.svg'))
|
|
self.log_debug(f"Found {len(svg_files)} SVG files")
|
|
except Exception as e:
|
|
self.log_error(f"Error finding SVG files: {e}")
|
|
|
|
return svg_files
|
|
|
|
def has_control_characters(self, file_path: Path) -> bool:
|
|
"""Check if file contains control characters."""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
content = f.read()
|
|
return bool(self.control_chars_pattern.search(content))
|
|
except Exception as e:
|
|
self.log_debug(f"Error reading file {file_path}: {e}")
|
|
return False
|
|
|
|
def clean_svg_file(self, svg_file: Path) -> str:
|
|
"""
|
|
Clean control characters from an SVG file.
|
|
|
|
Returns:
|
|
str: 'cleaned' if file was cleaned, 'no_action' if no cleaning needed, 'error' if failed
|
|
"""
|
|
self.log_debug(f"Processing: {svg_file}")
|
|
|
|
# Check if file contains control characters
|
|
if not self.has_control_characters(svg_file):
|
|
self.log_debug(f"No control characters found in: {svg_file}")
|
|
return 'no_action'
|
|
|
|
self.log_file(f"Cleaning: {svg_file}")
|
|
self.log_debug(f"Control characters detected in: {svg_file}")
|
|
|
|
# Check write permissions
|
|
if not os.access(svg_file, os.W_OK):
|
|
self.log_error(f"No write permission for: {svg_file}")
|
|
return 'error'
|
|
|
|
# Create temporary file for safe processing
|
|
try:
|
|
with tempfile.NamedTemporaryFile(mode='w', delete=False,
|
|
suffix='.svg.tmp',
|
|
dir=svg_file.parent) as temp_file:
|
|
temp_path = Path(temp_file.name)
|
|
|
|
self.log_debug(f"Created temporary file: {temp_path}")
|
|
|
|
# Read original file and clean control characters
|
|
with open(svg_file, 'r', encoding='utf-8', errors='ignore') as input_file:
|
|
content = input_file.read()
|
|
|
|
# Remove control characters
|
|
cleaned_content = self.control_chars_pattern.sub('', content)
|
|
|
|
# Write cleaned content to temporary file
|
|
with open(temp_path, 'w', encoding='utf-8') as output_file:
|
|
output_file.write(cleaned_content)
|
|
|
|
# Create backup of original file
|
|
backup_path = svg_file.with_suffix(f'{svg_file.suffix}.bak.{os.getpid()}')
|
|
self.log_debug(f"Creating backup: {backup_path}")
|
|
|
|
shutil.copy2(svg_file, backup_path)
|
|
|
|
# Replace original with cleaned version
|
|
shutil.move(str(temp_path), str(svg_file))
|
|
|
|
# Remove backup on success
|
|
backup_path.unlink()
|
|
self.log_debug(f"Successfully cleaned: {svg_file}")
|
|
self.log_debug(f"Removed backup: {backup_path}")
|
|
|
|
return 'cleaned'
|
|
|
|
except Exception as e:
|
|
self.log_error(f"Failed to clean {svg_file}: {e}")
|
|
|
|
# Clean up temporary file if it exists
|
|
if 'temp_path' in locals() and temp_path.exists():
|
|
try:
|
|
temp_path.unlink()
|
|
except:
|
|
pass
|
|
|
|
# Restore from backup if it exists
|
|
if 'backup_path' in locals() and backup_path.exists():
|
|
try:
|
|
shutil.move(str(backup_path), str(svg_file))
|
|
self.log_info("Restored original file from backup")
|
|
except Exception as restore_error:
|
|
self.log_error(f"Failed to restore backup for {svg_file}: {restore_error}")
|
|
|
|
return 'error'
|
|
|
|
def clean_directory(self, build_dir: str) -> Tuple[int, int]:
|
|
"""
|
|
Clean all SVG files in the specified directory.
|
|
|
|
Returns:
|
|
Tuple[int, int]: (cleaned_count, error_count)
|
|
"""
|
|
build_path = Path(build_dir)
|
|
|
|
self.log_info(f"Starting SVG cleanup in directory: {build_path}")
|
|
|
|
if self.debug_mode:
|
|
self.log_debug("Debug mode enabled")
|
|
quarto_output_dir = os.environ.get('QUARTO_PROJECT_OUTPUT_DIR')
|
|
if quarto_output_dir:
|
|
self.log_debug("Running as Quarto post-render script")
|
|
self.log_debug(f"QUARTO_PROJECT_OUTPUT_DIR={quarto_output_dir}")
|
|
quarto_render_all = os.environ.get('QUARTO_PROJECT_RENDER_ALL')
|
|
if quarto_render_all:
|
|
self.log_debug(f"Full project render detected (QUARTO_PROJECT_RENDER_ALL={quarto_render_all})")
|
|
else:
|
|
self.log_debug("Incremental or preview render")
|
|
else:
|
|
self.log_debug("Running as standalone script")
|
|
|
|
# Check if build directory exists
|
|
if not build_path.exists():
|
|
self.log_warning(f"Build directory '{build_path}' does not exist")
|
|
self.log_info("Script completed (nothing to clean)")
|
|
return 0, 0
|
|
|
|
if not build_path.is_dir():
|
|
self.log_error(f"'{build_path}' is not a directory")
|
|
return 0, 1
|
|
|
|
# Find all SVG files
|
|
svg_files = self.find_svg_files(build_path)
|
|
|
|
if not svg_files:
|
|
self.log_info(f"No SVG files found in {build_path}")
|
|
self.log_info("Script completed (nothing to clean)")
|
|
return 0, 0
|
|
|
|
# Process each SVG file
|
|
cleaned_count = 0
|
|
error_count = 0
|
|
|
|
for svg_file in svg_files:
|
|
result = self.clean_svg_file(svg_file)
|
|
if result == 'cleaned':
|
|
cleaned_count += 1
|
|
elif result == 'error':
|
|
error_count += 1
|
|
# 'no_action' doesn't increment any counter
|
|
|
|
return cleaned_count, error_count
|
|
|
|
|
|
def parse_arguments() -> argparse.Namespace:
|
|
"""Parse command line arguments."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Clean control characters from SVG files",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
This script can be run in two modes:
|
|
|
|
1. Quarto Post-Render Script (automatic):
|
|
When run by Quarto, uses QUARTO_PROJECT_OUTPUT_DIR environment variable
|
|
Usage: %(prog)s [--debug]
|
|
|
|
2. Standalone Script:
|
|
Usage: %(prog)s [BUILD_DIR] [--debug]
|
|
|
|
Examples:
|
|
%(prog)s # Clean SVGs in _build directory (or Quarto output dir)
|
|
%(prog)s --debug # Enable debug mode (when run by Quarto)
|
|
%(prog)s output # Clean SVGs in output directory (standalone)
|
|
%(prog)s _build --debug # Clean with debug output enabled (standalone)
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
'build_dir',
|
|
nargs='?',
|
|
help='Directory to clean SVG files in (default: _build or QUARTO_PROJECT_OUTPUT_DIR)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--debug',
|
|
action='store_true',
|
|
help='Enable debug output'
|
|
)
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
def main() -> int:
|
|
"""Main entry point."""
|
|
args = parse_arguments()
|
|
|
|
# Determine build directory
|
|
if args.build_dir:
|
|
build_dir = args.build_dir
|
|
else:
|
|
# Use Quarto's output directory if available, otherwise default
|
|
build_dir = os.environ.get('QUARTO_PROJECT_OUTPUT_DIR', '_build')
|
|
|
|
# Create cleaner and process files
|
|
cleaner = SVGCleaner(debug_mode=args.debug)
|
|
cleaned_count, error_count = cleaner.clean_directory(build_dir)
|
|
|
|
# Show summary
|
|
if cleaned_count > 0:
|
|
cleaner.log_success(f"Cleaned {cleaned_count} SVG file(s)")
|
|
|
|
if error_count > 0:
|
|
cleaner.log_warning(f"Failed to clean {error_count} SVG file(s) - see error messages above")
|
|
cleaner.log_info("Script completed with some errors but continuing")
|
|
return 1
|
|
else:
|
|
cleaner.log_info("Script completed successfully")
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|