mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-05 09:09:13 -05:00
- Implemented 'Logic First' pedagogical flow: Demand -> Supply -> Service -> Hardening - Part I: Foundations of Scale (Logic) - Part II: Building the Fleet (Physics) - split Networking and Orchestration - Part III: Deployment at Scale (Service) - added Optimization at Scale - Part IV: Production Concerns (Hardening) - consolidated Security, Robustness, Sustainability, Frontiers - Eliminated redundancy across chapters (e.g., Young's formula moved to Storage) - Updated transitions and cross-references for coherent flow - Updated README.md and Quarto configuration to reflect new structure
280 lines
11 KiB
Python
280 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
check_images.py
|
|
|
|
Validates image files by inspecting their actual content.
|
|
Supports .png, .jpg, .jpeg, .gif, .svg, .webp formats.
|
|
|
|
Usage:
|
|
- Single file: python check_images.py -f image.png
|
|
- Directory scan: python check_images.py -d ./assets
|
|
- CI hooks: python check_images.py image1.png image2.jpg
|
|
- Auto-fix: python check_images.py -d ./assets --fix
|
|
- Show progress: python check_images.py -d ./assets --verbose
|
|
|
|
By default, only shows summary. Use --verbose (-v) to see progress
|
|
for each file with ✅/❌ indicators. Use --debug for detailed info.
|
|
|
|
Validation methods:
|
|
- Raster formats (PNG, JPEG, GIF, WebP): Uses Pillow to verify format
|
|
- Vector formats (SVG): Validates XML structure and SVG namespace
|
|
|
|
Returns:
|
|
- Exit code 1 if invalid image files are found.
|
|
- Exit code 2 if files are unreadable.
|
|
- Exit code 0 if all images are valid or fixed.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import xml.etree.ElementTree as ET
|
|
from PIL import Image, UnidentifiedImageError
|
|
from rich.console import Console
|
|
from rich.table import Table
|
|
|
|
VALID_EXTENSIONS = {
|
|
'.png': 'PNG',
|
|
'.jpg': 'JPEG',
|
|
'.jpeg': 'JPEG',
|
|
'.gif': 'GIF',
|
|
'.svg': 'SVG',
|
|
'.webp': 'WEBP', # Modern web format
|
|
}
|
|
|
|
console = Console()
|
|
|
|
def is_valid_svg(filepath):
|
|
"""Validate SVG file by checking if it's valid XML with SVG root."""
|
|
try:
|
|
tree = ET.parse(filepath)
|
|
root = tree.getroot()
|
|
# Check if it has SVG namespace or is an SVG element
|
|
if 'svg' in root.tag.lower() or root.tag.endswith('}svg'):
|
|
return True, 'SVG'
|
|
else:
|
|
return False, f"Not valid SVG (root: {root.tag})"
|
|
except ET.ParseError as e:
|
|
return f"Invalid XML: {e}", None
|
|
except Exception as e:
|
|
return f"Unreadable: {e}", None
|
|
|
|
def is_valid_image(filepath, expected_format):
|
|
"""Validate image files using PIL for raster formats, custom logic for SVG."""
|
|
if expected_format == 'SVG':
|
|
return is_valid_svg(filepath)
|
|
|
|
try:
|
|
with Image.open(filepath) as img:
|
|
actual_format = img.format.upper()
|
|
return actual_format == expected_format, actual_format
|
|
except Exception as e:
|
|
return f"Unreadable: {e}", None
|
|
|
|
def fix_image(filepath, expected_format):
|
|
"""Fix image format mismatches. SVG files cannot be auto-fixed."""
|
|
if expected_format == 'SVG':
|
|
console.print(f"⚠️ [yellow]Cannot fix SVG files:[/yellow] {filepath}")
|
|
return False
|
|
|
|
try:
|
|
with Image.open(filepath) as img:
|
|
img = img.convert('RGBA') if expected_format == 'PNG' else img.convert('RGB')
|
|
img.save(filepath, format=expected_format)
|
|
console.print(f"🔧 [blue]Fixed:[/blue] {filepath} → {expected_format}")
|
|
return True
|
|
except Exception as e:
|
|
console.print(f"❌ [red]Failed to fix:[/red] {filepath} ({e})")
|
|
return False
|
|
|
|
def check_file(filepath, strict=False, verbose=False, fix=False, show_progress=True):
|
|
ext = os.path.splitext(filepath)[1].lower()
|
|
expected_format = VALID_EXTENSIONS.get(ext)
|
|
|
|
if not expected_format:
|
|
msg = f"Unsupported extension (.{ext})"
|
|
if strict:
|
|
if show_progress:
|
|
console.print(f"⚠️ [yellow]{filepath}[/yellow] - Unsupported extension")
|
|
return [(filepath, msg, None, None)]
|
|
if verbose and show_progress:
|
|
console.print(f"⚠️ [dim]Skip:[/dim] {filepath} (unsupported extension)")
|
|
return []
|
|
|
|
result, actual_format = is_valid_image(filepath, expected_format)
|
|
if result is True:
|
|
if show_progress:
|
|
console.print(f"✅ [green]{filepath}[/green] ({actual_format})")
|
|
elif verbose:
|
|
console.print(f"✅ [green]{filepath}[/green]: valid ({actual_format})")
|
|
return []
|
|
elif isinstance(result, str):
|
|
if show_progress:
|
|
console.print(f"❌ [red]{filepath}[/red] - {result}")
|
|
return [(filepath, result, None, expected_format)]
|
|
else:
|
|
if fix:
|
|
fixed = fix_image(filepath, expected_format)
|
|
if not fixed and show_progress:
|
|
console.print(f"❌ [red]{filepath}[/red] - Fix failed")
|
|
return [] if fixed else [(filepath, "Fix failed", actual_format, expected_format)]
|
|
else:
|
|
if show_progress:
|
|
console.print(f"❌ [red]{filepath}[/red] - Format mismatch ({actual_format} != {expected_format})")
|
|
return [(filepath, "Format mismatch", actual_format, expected_format)]
|
|
|
|
def check_directory(root_dir, strict=False, verbose=False, fix=False, show_progress=True):
|
|
invalid_files = []
|
|
total_files = 0
|
|
image_files = 0
|
|
format_stats = {} # Track stats by format
|
|
|
|
if show_progress:
|
|
console.print(f"\n🔍 [bold cyan]Scanning directory:[/bold cyan] {root_dir}")
|
|
console.print()
|
|
|
|
for dirpath, _, filenames in os.walk(root_dir):
|
|
for fname in filenames:
|
|
total_files += 1
|
|
fpath = os.path.join(dirpath, fname)
|
|
ext = os.path.splitext(fname)[1].lower()
|
|
|
|
# Only process image files
|
|
if ext in VALID_EXTENSIONS:
|
|
image_files += 1
|
|
expected_format = VALID_EXTENSIONS[ext]
|
|
|
|
# Initialize format stats if not exists
|
|
if expected_format not in format_stats:
|
|
format_stats[expected_format] = {'total': 0, 'valid': 0, 'invalid': 0}
|
|
|
|
format_stats[expected_format]['total'] += 1
|
|
|
|
file_invalid = check_file(fpath, strict=strict, verbose=verbose, fix=fix, show_progress=show_progress)
|
|
if file_invalid:
|
|
format_stats[expected_format]['invalid'] += 1
|
|
invalid_files.extend(file_invalid)
|
|
else:
|
|
format_stats[expected_format]['valid'] += 1
|
|
|
|
return invalid_files, total_files, image_files, format_stats
|
|
|
|
def print_invalid_files(invalid):
|
|
table = Table(title="❌ Invalid Image Files", show_lines=True)
|
|
table.add_column("File", style="cyan", overflow="fold")
|
|
table.add_column("Reason", style="red")
|
|
table.add_column("Actual Format", style="yellow")
|
|
table.add_column("Expected Format", style="red")
|
|
|
|
for item in invalid:
|
|
if isinstance(item, (list, tuple)) and len(item) == 4:
|
|
fpath, reason, actual, expected = item
|
|
table.add_row(fpath, reason, str(actual or "—"), str(expected or "—"))
|
|
else:
|
|
table.add_row(str(item), "Internal script error: invalid item format", "—", "—")
|
|
|
|
console.print(table)
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Validate image files by checking actual format using Pillow.")
|
|
group = parser.add_mutually_exclusive_group()
|
|
group.add_argument('-f', '--file', type=str, help="Path to a single image file")
|
|
group.add_argument('-d', '--dir', type=str, help="Directory to scan recursively")
|
|
parser.add_argument('files', nargs='*', help="Files passed directly (e.g., via pre-commit)")
|
|
parser.add_argument('--strict', action='store_true', help="Fail on unsupported file extensions")
|
|
parser.add_argument('--verbose', '-v', action='store_true', help="Show progress for each file checked")
|
|
parser.add_argument('--debug', action='store_true', help="Show detailed debug information")
|
|
parser.add_argument('--fix', action='store_true', help="Attempt to fix format mismatches in place")
|
|
|
|
args = parser.parse_args()
|
|
invalid = []
|
|
total_files = 0
|
|
image_files = 0
|
|
format_stats = {}
|
|
show_progress = args.verbose
|
|
debug_mode = args.debug
|
|
|
|
if args.file:
|
|
image_files = 1
|
|
total_files = 1
|
|
if show_progress:
|
|
console.print(f"\n🔍 [bold cyan]Checking single file:[/bold cyan] {args.file}")
|
|
console.print()
|
|
invalid = check_file(args.file, strict=args.strict, verbose=debug_mode, fix=args.fix, show_progress=show_progress)
|
|
|
|
# Track format stats for single file
|
|
ext = os.path.splitext(args.file)[1].lower()
|
|
if ext in VALID_EXTENSIONS:
|
|
expected_format = VALID_EXTENSIONS[ext]
|
|
format_stats[expected_format] = {'total': 1, 'valid': 0 if len(invalid) > 0 else 1, 'invalid': len(invalid)}
|
|
|
|
elif args.dir:
|
|
invalid, total_files, image_files, format_stats = check_directory(args.dir, strict=args.strict, verbose=debug_mode, fix=args.fix, show_progress=show_progress)
|
|
# Debug print to see what invalid actually is
|
|
print(f"DEBUG: invalid type: {type(invalid)}, value: {invalid}") # temporary debug
|
|
|
|
elif args.files:
|
|
if show_progress:
|
|
console.print(f"\n🔍 [bold cyan]Checking {len(args.files)} files...[/bold cyan]")
|
|
console.print()
|
|
image_files = len(args.files)
|
|
total_files = len(args.files)
|
|
|
|
for fpath in args.files:
|
|
ext = os.path.splitext(fpath)[1].lower()
|
|
if ext in VALID_EXTENSIONS:
|
|
expected_format = VALID_EXTENSIONS[ext]
|
|
|
|
# Initialize format stats if not exists
|
|
if expected_format not in format_stats:
|
|
format_stats[expected_format] = {'total': 0, 'valid': 0, 'invalid': 0}
|
|
|
|
format_stats[expected_format]['total'] += 1
|
|
|
|
file_invalid = check_file(fpath, strict=args.strict, verbose=debug_mode, fix=args.fix, show_progress=show_progress)
|
|
if file_invalid:
|
|
format_stats[expected_format]['invalid'] += 1
|
|
invalid.extend(file_invalid)
|
|
else:
|
|
format_stats[expected_format]['valid'] += 1
|
|
else:
|
|
parser.print_help()
|
|
sys.exit(0)
|
|
|
|
# Print summary
|
|
console.print()
|
|
console.print("[bold]📊 Summary:[/bold]")
|
|
if args.dir:
|
|
console.print(f" Total files scanned: [cyan]{total_files}[/cyan]")
|
|
console.print(f" Image files found: [cyan]{image_files}[/cyan]")
|
|
console.print(f" Valid images: [green]{image_files - len(invalid)}[/green]")
|
|
console.print(f" Invalid images: [red]{len(invalid)}[/red]")
|
|
|
|
# Show format breakdown
|
|
if format_stats:
|
|
console.print()
|
|
console.print("[bold]📋 Format Breakdown:[/bold]")
|
|
|
|
# Sort by total count (descending)
|
|
sorted_formats = sorted(format_stats.items(), key=lambda x: x[1]['total'], reverse=True)
|
|
|
|
for format_name, stats in sorted_formats:
|
|
fmt_total = stats['total']
|
|
fmt_valid = stats['valid']
|
|
fmt_invalid = stats['invalid']
|
|
|
|
status_color = "green" if fmt_invalid == 0 else "yellow" if fmt_invalid < fmt_total else "red"
|
|
console.print(f" {format_name}: [cyan]{fmt_total}[/cyan] total ([{status_color}]{fmt_valid} valid, {fmt_invalid} invalid[/{status_color}])")
|
|
|
|
if invalid:
|
|
console.print()
|
|
print_invalid_files(invalid)
|
|
unreadable = any("Unreadable" in reason for _, reason, _, _ in invalid)
|
|
sys.exit(2 if unreadable else 1)
|
|
else:
|
|
console.print("\n[bold green]✅ All image files are valid[/bold green]")
|
|
sys.exit(0)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|