#!/usr/bin/env python3
"""Generate redirect HTML stubs (and a Netlify _redirects file) from the
shared redirect-map.
Why this script exists
----------------------
GitHub Pages doesn't honor server-side redirects. To preserve SEO juice
from the legacy mlsysbook.ai URLs after the staged rollout, we emit one
tiny HTML file per legacy path:
Crawlers treat the canonical as authoritative, drop the legacy URL on
recrawl (the noindex), and follow the link graph through to the new
location. Real users hit the meta-refresh and arrive in <100ms.
The same map ALSO produces a Netlify-format `_redirects` file so that if
we ever move off GitHub Pages to a host that supports real 301s, the
existing redirect map drives that day-one without a second source of
truth.
Usage
-----
build-redirects.py --map shared/config/redirect-map.json \
--out gh-pages-staging/ \
[--base-url https://mlsysbook.ai] \
[--check]
--check Validates the JSON without writing any files (CI-friendly).
"""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
from typing import Any
DEFAULT_BASE_URL = "https://mlsysbook.ai"
STUB_TEMPLATE = """
"""
def resolve_dest(to: str, base_url: str) -> str:
"""Return an absolute URL for `to`. If `to` is already absolute, pass
through. If it's a path, resolve against base_url."""
if to.startswith(("http://", "https://")):
return to
if not to.startswith("/"):
to = "/" + to
return base_url.rstrip("/") + to
def validate_entry(i: int, entry: dict[str, Any]) -> list[str]:
"""Return a list of validation errors for one entry. Empty list = OK."""
errs: list[str] = []
where = f"redirects[{i}]"
for required in ("from", "to"):
if required not in entry:
errs.append(f"{where}: missing required field '{required}'")
src = entry.get("from", "")
if src and not src.startswith("/"):
errs.append(f"{where}: 'from' must start with '/' (got {src!r})")
status = entry.get("status", 301)
if status not in (301, 302, 307, 308):
errs.append(f"{where}: 'status' should be 301/302/307/308 (got {status!r})")
# Wildcard handling: only allowed as a final '*' segment for now.
if "*" in src and not src.endswith("/*"):
errs.append(
f"{where}: wildcard '*' currently only supported as the trailing "
f"path segment (e.g. '/foo/*'); got {src!r}"
)
return errs
def write_html_stub(out_root: Path, src: str, dest_url: str) -> Path:
"""Materialize the redirect at out_root//index.html (or .html
file if `src` already names a `.html`)."""
rel = src.lstrip("/")
if rel.endswith(".html") or rel.endswith(".htm"):
target = out_root / rel
elif rel == "" or rel.endswith("/"):
target = out_root / rel / "index.html"
else:
# path with no extension → emit as both //index.html
target = out_root / rel / "index.html"
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(
STUB_TEMPLATE.format(dest=dest_url, canonical=dest_url),
encoding="utf-8",
)
return target
def write_netlify_file(out_root: Path, lines: list[str]) -> Path:
"""Emit a Netlify-compatible `_redirects` file alongside the stubs."""
target = out_root / "_redirects"
target.write_text(
"# Generated by shared/scripts/build-redirects.py — do not edit by hand.\n"
"# Source of truth: shared/config/redirect-map.json\n"
+ "\n".join(lines)
+ "\n",
encoding="utf-8",
)
return target
def main() -> int:
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("--map", required=True, help="Path to redirect-map.json")
ap.add_argument(
"--out",
required=False,
help="Directory to emit redirect stubs into (typically the staging "
"copy of gh-pages). Required unless --check is passed.",
)
ap.add_argument(
"--base-url",
default=DEFAULT_BASE_URL,
help=f"Base URL for relative 'to' values (default: {DEFAULT_BASE_URL})",
)
ap.add_argument(
"--check",
action="store_true",
help="Validate the map only; do not write files.",
)
args = ap.parse_args()
if not args.check and not args.out:
ap.error("--out is required unless --check is passed")
map_path = Path(args.map)
try:
data = json.loads(map_path.read_text(encoding="utf-8"))
except (FileNotFoundError, json.JSONDecodeError) as e:
print(f"❌ Could not load {map_path}: {e}", file=sys.stderr)
return 2
redirects = data.get("redirects", [])
if not isinstance(redirects, list):
print("❌ 'redirects' must be a list", file=sys.stderr)
return 2
errors: list[str] = []
for i, entry in enumerate(redirects):
errors.extend(validate_entry(i, entry))
if errors:
print("❌ Validation errors:", file=sys.stderr)
for e in errors:
print(f" - {e}", file=sys.stderr)
return 1
print(f"✅ Validated {len(redirects)} redirect entries from {map_path}")
if args.check:
return 0
out_root = Path(args.out)
out_root.mkdir(parents=True, exist_ok=True)
netlify_lines: list[str] = []
written = 0
for entry in redirects:
src = entry["from"]
dest = resolve_dest(entry["to"], args.base_url)
status = entry.get("status", 301)
netlify_lines.append(f"{src} {dest} {status}")
if "*" in src:
# We cannot statically expand wildcards — that requires walking
# the deployed tree. Skip stub emission and rely on the Netlify
# _redirects line for hosts that support it.
continue
target = write_html_stub(out_root, src, dest)
written += 1
print(f" → {src} ⇒ {dest} ({target.relative_to(out_root)})")
write_netlify_file(out_root, netlify_lines)
print(f"✅ Wrote {written} HTML stub(s) and 1 _redirects file to {out_root}")
return 0
if __name__ == "__main__":
raise SystemExit(main())