#!/usr/bin/env python3 """Generate redirect HTML stubs (and a Netlify _redirects file) from the shared redirect-map. Why this script exists ---------------------- GitHub Pages doesn't honor server-side redirects. To preserve SEO juice from the legacy mlsysbook.ai URLs after the staged rollout, we emit one tiny HTML file per legacy path: Crawlers treat the canonical as authoritative, drop the legacy URL on recrawl (the noindex), and follow the link graph through to the new location. Real users hit the meta-refresh and arrive in <100ms. The same map ALSO produces a Netlify-format `_redirects` file so that if we ever move off GitHub Pages to a host that supports real 301s, the existing redirect map drives that day-one without a second source of truth. Usage ----- build-redirects.py --map shared/config/redirect-map.json \ --out gh-pages-staging/ \ [--base-url https://mlsysbook.ai] \ [--check] --check Validates the JSON without writing any files (CI-friendly). """ from __future__ import annotations import argparse import json import sys from pathlib import Path from typing import Any DEFAULT_BASE_URL = "https://mlsysbook.ai" STUB_TEMPLATE = """ Redirecting…

This page has moved to {dest}.

""" def resolve_dest(to: str, base_url: str) -> str: """Return an absolute URL for `to`. If `to` is already absolute, pass through. If it's a path, resolve against base_url.""" if to.startswith(("http://", "https://")): return to if not to.startswith("/"): to = "/" + to return base_url.rstrip("/") + to def validate_entry(i: int, entry: dict[str, Any]) -> list[str]: """Return a list of validation errors for one entry. Empty list = OK.""" errs: list[str] = [] where = f"redirects[{i}]" for required in ("from", "to"): if required not in entry: errs.append(f"{where}: missing required field '{required}'") src = entry.get("from", "") if src and not src.startswith("/"): errs.append(f"{where}: 'from' must start with '/' (got {src!r})") status = entry.get("status", 301) if status not in (301, 302, 307, 308): errs.append(f"{where}: 'status' should be 301/302/307/308 (got {status!r})") # Wildcard handling: only allowed as a final '*' segment for now. if "*" in src and not src.endswith("/*"): errs.append( f"{where}: wildcard '*' currently only supported as the trailing " f"path segment (e.g. '/foo/*'); got {src!r}" ) return errs def write_html_stub(out_root: Path, src: str, dest_url: str) -> Path: """Materialize the redirect at out_root//index.html (or .html file if `src` already names a `.html`).""" rel = src.lstrip("/") if rel.endswith(".html") or rel.endswith(".htm"): target = out_root / rel elif rel == "" or rel.endswith("/"): target = out_root / rel / "index.html" else: # path with no extension → emit as both //index.html target = out_root / rel / "index.html" target.parent.mkdir(parents=True, exist_ok=True) target.write_text( STUB_TEMPLATE.format(dest=dest_url, canonical=dest_url), encoding="utf-8", ) return target def write_netlify_file(out_root: Path, lines: list[str]) -> Path: """Emit a Netlify-compatible `_redirects` file alongside the stubs.""" target = out_root / "_redirects" target.write_text( "# Generated by shared/scripts/build-redirects.py — do not edit by hand.\n" "# Source of truth: shared/config/redirect-map.json\n" + "\n".join(lines) + "\n", encoding="utf-8", ) return target def main() -> int: ap = argparse.ArgumentParser(description=__doc__) ap.add_argument("--map", required=True, help="Path to redirect-map.json") ap.add_argument( "--out", required=False, help="Directory to emit redirect stubs into (typically the staging " "copy of gh-pages). Required unless --check is passed.", ) ap.add_argument( "--base-url", default=DEFAULT_BASE_URL, help=f"Base URL for relative 'to' values (default: {DEFAULT_BASE_URL})", ) ap.add_argument( "--check", action="store_true", help="Validate the map only; do not write files.", ) args = ap.parse_args() if not args.check and not args.out: ap.error("--out is required unless --check is passed") map_path = Path(args.map) try: data = json.loads(map_path.read_text(encoding="utf-8")) except (FileNotFoundError, json.JSONDecodeError) as e: print(f"❌ Could not load {map_path}: {e}", file=sys.stderr) return 2 redirects = data.get("redirects", []) if not isinstance(redirects, list): print("❌ 'redirects' must be a list", file=sys.stderr) return 2 errors: list[str] = [] for i, entry in enumerate(redirects): errors.extend(validate_entry(i, entry)) if errors: print("❌ Validation errors:", file=sys.stderr) for e in errors: print(f" - {e}", file=sys.stderr) return 1 print(f"✅ Validated {len(redirects)} redirect entries from {map_path}") if args.check: return 0 out_root = Path(args.out) out_root.mkdir(parents=True, exist_ok=True) netlify_lines: list[str] = [] written = 0 for entry in redirects: src = entry["from"] dest = resolve_dest(entry["to"], args.base_url) status = entry.get("status", 301) netlify_lines.append(f"{src} {dest} {status}") if "*" in src: # We cannot statically expand wildcards — that requires walking # the deployed tree. Skip stub emission and rely on the Netlify # _redirects line for hosts that support it. continue target = write_html_stub(out_root, src, dest) written += 1 print(f" → {src} ⇒ {dest} ({target.relative_to(out_root)})") write_netlify_file(out_root, netlify_lines) print(f"✅ Wrote {written} HTML stub(s) and 1 _redirects file to {out_root}") return 0 if __name__ == "__main__": raise SystemExit(main())