chore(staffml): remove chapter-url manifest and link-probe infrastructure

Removes the last active coupling between StaffML questions and the mlsysbook.ai site: Deleted files ============= - interviews/staffml/src/data/chapter-urls.json 27-entry chapter-id → relative-path map. All 27 URLs currently 404 against production because the live site serves /contents/core/... while the manifest uses /contents/vol1|vol2/... paths. - interviews/staffml/scripts/check-deep-dive-links.py Weekly URL-health probe that walked chapter-urls.json. Nothing else consumes it; its sole SOURCE_PATH was the manifest above. - .github/workflows/staffml-link-check.yml Scheduled CI (cron '0 9 * * 1') + PR-comment + auto-issue-filing pipeline for the probe. With the probe gone, the workflow had no job left. Grep confirmed no other workflow depends on its 'staffml-link-report' artifact name. Modified ======== - interviews/staffml/scripts/DEPRECATED.md Drop the 'check-deep-dive-links.py' row (script no longer exists so the replacement pointer is no longer meaningful). - interviews/staffml/.gitignore Drop the '_deep_dive_link_report.json' ignore (the file that produced it is gone). What replaces this ================== Nothing yet. Per the resources-list model adopted in the preceding commits, per-question book links are an author-curated editorial act — authors add { name, url } entries to Details.resources when book URLs stabilize (mlsysbook.ai/vol1 still moving). Until then, StaffML is deliberately self-contained for book-linking purposes. Ecosystem-level cross-linking to the book remains via Nav.tsx's existing 'MLSysBook.ai' header link (stable, points at homepage); a more prominent affordance is planned for a follow-up commit.
2026-05-06 09:38:33 -05:00 · 2026-04-16 18:27:58 -04:00
parent 6e3ef2aa6f
commit 409d58c57b
5 changed files with 0 additions and 553 deletions
--- a/.github/workflows/staffml-link-check.yml
+++ b/.github/workflows/staffml-link-check.yml
@@ -1,250 +0,0 @@
-name: '🎯 StaffML · 🔗 Link Check'
-
-# =============================================================================
-# StaffML — Chapter-URL manifest health check
-# =============================================================================
-#
-# Probes every unique URL in src/data/chapter-urls.json (joined with
-# mlsysbook.ai) via curl and uploads a structured JSON health report. Runs on
-# a weekly schedule and on manual dispatch. Optionally fails the workflow if
-# any new URLs are dead.
-#
-# Flow:
-#   1. CHECKOUT  — minimal clone, no deps
-#   2. PROBE     — python3 scripts/check-deep-dive-links.py
-#   3. UPLOAD    — _deep_dive_link_report.json as workflow artifact
-#   4. NOTIFY    — open an issue if broken-count regresses week-over-week
-#
-# Triggers:
-#   - schedule:           every Monday 09:00 UTC
-#   - workflow_dispatch:  manual run
-#   - pull_request:       only when chapter-urls.json, refs.ts, or the
-#                         probe itself is touched
-#
-# Related:
-#   - interviews/staffml/scripts/check-deep-dive-links.py
-#   - interviews/staffml/src/lib/refs.ts (consumer of chapter-urls.json)
-#
-# =============================================================================
-
-on:
-  schedule:
-    # Mondays at 09:00 UTC — early enough to catch the weekend's regressions
-    - cron: '0 9 * * 1'
-  workflow_dispatch:
-    inputs:
-      fail_on_broken:
-        description: 'Fail the workflow if any URL is dead'
-        required: false
-        default: 'false'
-        type: choice
-        options: ['true', 'false']
-  pull_request:
-    paths:
-      - 'interviews/staffml/src/data/chapter-urls.json'
-      - 'interviews/staffml/src/lib/refs.ts'
-      - 'interviews/staffml/scripts/check-deep-dive-links.py'
-      - '.github/workflows/staffml-link-check.yml'
-
-permissions:
-  contents: read
-  issues: write         # for the scheduled regression-notify step (opens/updates an issue)
-  pull-requests: write  # for the PR-comment step on pull_request events
-
-concurrency:
-  group: staffml-link-check
-  cancel-in-progress: false   # let scheduled runs always finish
-
-jobs:
-  check-links:
-    name: '🔗 Probe corpus deep_dive_urls'
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-
-    steps:
-      - name: 📥 Checkout
-        uses: actions/checkout@v6
-        with:
-          # Minimal clone — we only need the corpus and the script
-          fetch-depth: 1
-
-      - name: 🐍 Verify Python
-        run: python3 --version
-
-      - name: 🔍 Verify curl
-        run: curl --version
-
-      - name: 🌐 Run link checker
-        id: probe
-        working-directory: interviews/staffml
-        run: |
-          set +e
-          python3 scripts/check-deep-dive-links.py --quiet | tee /tmp/link-check.log
-          rc=$?
-          set -e
-
-          if [ ! -f scripts/_deep_dive_link_report.json ]; then
-            echo "❌ Link checker did not produce a report"
-            exit 1
-          fi
-
-          # Extract summary metrics for downstream steps
-          eval "$(python3 - <<'PY'
-          import json
-          with open('scripts/_deep_dive_link_report.json') as f:
-              r = json.load(f)
-          total = r.get('total_links', 0)
-          unique = r.get('unique_urls', 0)
-          broken = r.get('broken_count', 0)
-          healthy = unique - broken
-          pct = (healthy / unique * 100) if unique else 0
-          print(f"TOTAL_LINKS={total}")
-          print(f"UNIQUE_URLS={unique}")
-          print(f"BROKEN_COUNT={broken}")
-          print(f"HEALTHY_COUNT={healthy}")
-          print(f"HEALTH_PCT={pct:.1f}")
-          PY
-          )"
-
-          # Publish to step outputs for downstream steps
-          {
-            echo "total_links=$TOTAL_LINKS"
-            echo "unique_urls=$UNIQUE_URLS"
-            echo "broken_count=$BROKEN_COUNT"
-            echo "healthy_count=$HEALTHY_COUNT"
-            echo "health_pct=$HEALTH_PCT"
-          } >> "$GITHUB_OUTPUT"
-
-          echo ""
-          echo "─── Link health summary ───"
-          echo "Total references : $TOTAL_LINKS"
-          echo "Unique URLs      : $UNIQUE_URLS"
-          echo "Healthy          : $HEALTHY_COUNT ($HEALTH_PCT%)"
-          echo "Broken           : $BROKEN_COUNT"
-
-          # Honor the manual fail-on-broken input
-          if [ "${{ github.event.inputs.fail_on_broken }}" = "true" ] && [ "$rc" -ne 0 ]; then
-            echo "❌ Manual fail-on-broken requested and broken URLs found"
-            exit 1
-          fi
-
-      - name: 📤 Upload report artifact
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: staffml-link-report
-          path: interviews/staffml/scripts/_deep_dive_link_report.json
-          retention-days: 90
-
-      - name: 📊 PR comment with link health diff
-        if: github.event_name == 'pull_request'
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const fs = require('fs');
-            const report = JSON.parse(
-              fs.readFileSync('interviews/staffml/scripts/_deep_dive_link_report.json', 'utf8')
-            );
-            const total = report.total_links;
-            const unique = report.unique_urls;
-            const broken = report.broken_count;
-            const healthy = unique - broken;
-            const pct = unique ? ((healthy / unique) * 100).toFixed(1) : '0.0';
-
-            // Top 5 broken-by-impact, surfaced inline
-            const top = (report.broken || [])
-              .slice(0, 5)
-              .map(b => `| ${b.status} | ${b.occurrences} | \`${b.url.slice(0, 80)}\` |`)
-              .join('\n');
-
-            const body = [
-              '## 🔗 StaffML link-health report',
-              '',
-              `- **Total references**: ${total}`,
-              `- **Unique URLs**: ${unique}`,
-              `- **Healthy (2xx/3xx)**: ${healthy} (${pct}%)`,
-              `- **Broken**: ${broken}`,
-              '',
-              top ? '### Top 5 broken URLs by user-impact' : '',
-              top ? '| Status | Occurrences | URL |' : '',
-              top ? '|---|---|---|' : '',
-              top,
-              '',
-              '> Full report uploaded as workflow artifact `staffml-link-report`.',
-            ].filter(Boolean).join('\n');
-
-            github.rest.issues.createComment({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              issue_number: context.issue.number,
-              body,
-            });
-
-      - name: 🚨 Open issue on regression
-        # Only on scheduled runs — we want a single source of weekly truth
-        if: github.event_name == 'schedule'
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const fs = require('fs');
-            const report = JSON.parse(
-              fs.readFileSync('interviews/staffml/scripts/_deep_dive_link_report.json', 'utf8')
-            );
-            const broken = report.broken_count || 0;
-            const unique = report.unique_urls || 0;
-            const healthy = unique - broken;
-            const pct = unique ? ((healthy / unique) * 100).toFixed(1) : '0.0';
-
-            // Only file an issue if health < 60% — adjust as the corpus heals
-            const HEALTH_THRESHOLD = 60.0;
-            if (parseFloat(pct) >= HEALTH_THRESHOLD) {
-              console.log(`Health ${pct}% >= threshold ${HEALTH_THRESHOLD}%, no issue filed.`);
-              return;
-            }
-
-            const top = (report.broken || [])
-              .slice(0, 10)
-              .map(b => `- [${b.status}] x${b.occurrences} \`${b.url}\``)
-              .join('\n');
-
-            const title = `[StaffML] Link health ${pct}% (${broken}/${unique} broken)`;
-            const body = [
-              `## Weekly StaffML link-health report`,
-              '',
-              `- **Healthy**: ${healthy}/${unique} (${pct}%)`,
-              `- **Broken**: ${broken}`,
-              `- **Threshold**: ${HEALTH_THRESHOLD}%`,
-              '',
-              `### Top 10 broken URLs by user-impact`,
-              top,
-              '',
-              `Full report attached as artifact \`staffml-link-report\` on the [workflow run](${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}).`,
-              '',
-              `_Auto-filed by \`.github/workflows/staffml-link-check.yml\`._`,
-            ].join('\n');
-
-            // Avoid duplicates: look for an open issue with the same title prefix
-            const existing = await github.rest.issues.listForRepo({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              state: 'open',
-              labels: 'staffml,link-health',
-            });
-            const dup = existing.data.find(i => i.title.startsWith('[StaffML] Link health'));
-            if (dup) {
-              console.log(`Updating existing issue #${dup.number}`);
-              await github.rest.issues.createComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                issue_number: dup.number,
-                body,
-              });
-            } else {
-              await github.rest.issues.create({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                title,
-                body,
-                labels: ['staffml', 'link-health'],
-              });
-            }
--- a/interviews/staffml/.gitignore
+++ b/interviews/staffml/.gitignore
@@ -5,9 +5,6 @@ out/
 # Dependencies
 node_modules/

-# Link-checker report (generated by scripts/check-deep-dive-links.py)
-scripts/_deep_dive_link_report.json
-
 # Cloudflare Wrangler cache (worker dev/deploy)
 worker/.wrangler/
 worker/wrangler.toml.bak
--- a/interviews/staffml/scripts/DEPRECATED.md
+++ b/interviews/staffml/scripts/DEPRECATED.md
@@ -12,7 +12,6 @@ artifact) or pushed data into `src/data/corpus.json` (now emitted by
 | `sync-vault.py` | Copied vault/corpus.json → src/data/ with filter | `vault build --legacy-json` emits site-compatible JSON directly |
 | `generate-manifest.py` | Built src/data/vault-manifest.json | Built by `vault publish` as a release artifact |
 | `validate-vault.py` | Sanity check on corpus shape | Covered by `vault check --strict` invariants |
-| `check-deep-dive-links.py` | URL reachability | `vault check --tier slow` (nightly) |
 | `format-napkin-math.py` | One-shot formatter | Obsolete |
 | `sync-periodic-table.mjs` | Unrelated (periodic-table site feature) | Still active — NOT deprecated |

--- a/interviews/staffml/scripts/check-deep-dive-links.py
+++ b/interviews/staffml/scripts/check-deep-dive-links.py
@@ -1,270 +0,0 @@
-#!/usr/bin/env python3
-"""
-Link checker for the StaffML → textbook chapter-URL manifest.
-
-Walks src/data/chapter-urls.json (the 27-entry chapter-id → relative-path map
-consumed by src/lib/refs.ts), prefixes each path with mlsysbook.ai, probes
-each URL once via curl over a small concurrent worker pool, and emits a
-structured JSON report at scripts/_deep_dive_link_report.json plus a
-human-readable summary on stdout.
-
-Background:
-  The per-question `deep_dive_url` field was removed during the vault
-  migration (Phase 1). StaffML now links to textbook chapters via this
-  manifest. Topic-granular linking is a separate, deferred design
-  (see interviews/vault/BOOK_LINKING_PLAN.md). Until that ships, the
-  chapter-URL manifest IS the user-facing link surface — probing it keeps
-  us honest about chapter-level link health.
-
-Usage:
-  python3 scripts/check-deep-dive-links.py                  # full check
-  python3 scripts/check-deep-dive-links.py --hosts mlsysbook.ai
-  python3 scripts/check-deep-dive-links.py --fail-on-broken # exit 1 if any URL is dead
-
-Output report shape (keys stable for the workflow to parse):
-  {
-    "checked_at": "2026-04-16T18:42:00Z",
-    "total_links": 27,
-    "unique_urls": 27,
-    "by_status": { "200": 27, "404": 0, ... },
-    "by_host":   { "mlsysbook.ai": { "200": 27 }, ... },
-    "broken":    [ { "url": "...", "status": 404, "occurrences": 1 }, ... ]
-  }
-"""
-
-from __future__ import annotations
-
-import argparse
-import concurrent.futures
-import json
-import os
-import sys
-import time
-from collections import Counter, defaultdict
-from pathlib import Path
-from urllib.parse import urlparse
-
-import subprocess
-import shutil
-
-if shutil.which("curl") is None:
-    print("FATAL: curl is required (sudo apt install curl / brew install curl)", file=sys.stderr)
-    sys.exit(2)
-
-# ───────────────────────── Config ──────────────────────────
-TIMEOUT_SECONDS = 6
-MAX_WORKERS = 8
-USER_AGENT = "StaffML-LinkChecker/1.0 (+https://staffml.ai)"
-BASE_URL = "https://mlsysbook.ai"
-SOURCE_PATH = Path(__file__).resolve().parent.parent / "src" / "data" / "chapter-urls.json"
-REPORT_PATH = Path(__file__).resolve().parent / "_deep_dive_link_report.json"
-
-# Hosts we know are broken (mark in report but don't even try to probe to save time)
-KNOWN_DEAD_HOSTS = {
-    "harvard-edge.github.io",
-}
-
-
-# ───────────────────── Probing logic ───────────────────────
-def probe_url(url: str) -> dict:
-    """Return {status, host} for a single URL via curl.
-
-    Uses HEAD (-I -L --head) with --location-trusted to follow redirects.
-    Returns the final HTTP status code, or a sentinel string like
-    'timeout' / 'dns' / 'tls' / 'invalid' / 'curl-fail'.
-    """
-    parsed = urlparse(url)
-    host = parsed.hostname or ""
-
-    if host in KNOWN_DEAD_HOSTS:
-        return {"status": "known-dead", "host": host}
-
-    if parsed.scheme not in ("http", "https"):
-        return {"status": "invalid-scheme", "host": host}
-
-    try:
-        result = subprocess.run(
-            [
-                "curl",
-                "-sL",                          # silent + follow redirects
-                "-o", os.devnull,
-                "-A", USER_AGENT,
-                "--max-time", str(TIMEOUT_SECONDS),
-                "-w", "%{http_code}",
-                url,
-            ],
-            capture_output=True,
-            text=True,
-            timeout=TIMEOUT_SECONDS + 2,
-        )
-    except subprocess.TimeoutExpired:
-        return {"status": "timeout", "host": host}
-    except Exception as e:
-        return {"status": f"error: {type(e).__name__}", "host": host}
-
-    if result.returncode != 0:
-        # curl error code -> sentinel
-        # https://curl.se/libcurl/c/libcurl-errors.html
-        stderr_lower = (result.stderr or "").lower()
-        if result.returncode == 6 or "could not resolve" in stderr_lower:
-            return {"status": "dns", "host": host}
-        if result.returncode == 28:
-            return {"status": "timeout", "host": host}
-        if result.returncode in (35, 60):
-            return {"status": "tls", "host": host}
-        return {"status": f"curl-fail-{result.returncode}", "host": host}
-
-    code_str = (result.stdout or "").strip()
-    if not code_str.isdigit():
-        return {"status": "no-status", "host": host}
-    return {"status": int(code_str), "host": host}
-
-
-# ───────────────────── Manifest walking ────────────────────
-def collect_urls(source_path: Path) -> dict[str, int]:
-    """Return {url: occurrence_count} from the chapter-url manifest.
-
-    chapter-urls.json is a flat {chapter_id: relative_path} dict. Each entry
-    is one user-facing destination, so occurrences=1 per URL. The relative
-    path is joined with BASE_URL to form the probe target.
-    """
-    with source_path.open() as f:
-        data = json.load(f)
-
-    if not isinstance(data, dict):
-        raise SystemExit(
-            f"Expected a flat dict in {source_path}, got {type(data).__name__}"
-        )
-
-    counts: Counter[str] = Counter()
-    for chapter_id, rel_path in data.items():
-        if not isinstance(rel_path, str) or not rel_path:
-            continue
-        # Relative paths are absolute under the site root (start with '/'),
-        # so a simple concatenation with BASE_URL is correct.
-        url = BASE_URL.rstrip("/") + "/" + rel_path.lstrip("/")
-        counts[url] += 1
-    return dict(counts)
-
-
-# ─────────────────────── Main flow ─────────────────────────
-def main(argv: list[str]) -> int:
-    parser = argparse.ArgumentParser(description="Check StaffML corpus deep_dive_url health.")
-    parser.add_argument("--hosts", nargs="*", default=None,
-                        help="Only probe URLs whose host is in this allowlist.")
-    parser.add_argument("--fail-on-broken", action="store_true",
-                        help="Exit with code 1 if any URL is dead (status >= 400 or sentinel).")
-    parser.add_argument("--quiet", action="store_true", help="Suppress per-URL progress.")
-    args = parser.parse_args(argv)
-
-    if not SOURCE_PATH.exists():
-        print(f"FATAL: chapter-url manifest not found at {SOURCE_PATH}", file=sys.stderr)
-        return 2
-
-    print(f"Loading chapter-url manifest from {SOURCE_PATH}")
-    occurrences = collect_urls(SOURCE_PATH)
-    total_links = sum(occurrences.values())
-    unique_urls = list(occurrences.keys())
-
-    if args.hosts:
-        allow = set(args.hosts)
-        unique_urls = [u for u in unique_urls if (urlparse(u).hostname or "") in allow]
-        print(f"Filtered by hosts {sorted(allow)}: {len(unique_urls)} URLs to probe.")
-
-    print(f"Found {total_links} manifest entries → {len(occurrences)} unique URLs")
-    if args.hosts:
-        print(f"Probing {len(unique_urls)} after host filter")
-    else:
-        print(f"Probing {len(unique_urls)} unique URLs (HEAD with GET fallback, "
-              f"timeout {TIMEOUT_SECONDS}s, {MAX_WORKERS} workers)")
-
-    started = time.time()
-    results: dict[str, dict] = {}
-    completed = 0
-
-    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
-        future_to_url = {ex.submit(probe_url, u): u for u in unique_urls}
-        for fut in concurrent.futures.as_completed(future_to_url):
-            url = future_to_url[fut]
-            try:
-                results[url] = fut.result()
-            except Exception as e:
-                results[url] = {"status": f"exception: {type(e).__name__}", "host": urlparse(url).hostname or ""}
-            completed += 1
-            if not args.quiet and completed % 25 == 0:
-                print(f"  ... {completed}/{len(unique_urls)} probed", file=sys.stderr)
-
-    elapsed = time.time() - started
-
-    # ────────── Aggregation ──────────
-    by_status: Counter[str] = Counter()
-    by_host: dict[str, Counter[str]] = defaultdict(Counter)
-    broken = []
-
-    SUCCESS_CODES = {200, 201, 204, 301, 302, 303, 307, 308}
-
-    for url, info in results.items():
-        status = info.get("status")
-        host = info.get("host", "")
-        status_str = str(status)
-        by_status[status_str] += 1
-        by_host[host][status_str] += 1
-
-        # Broken = anything that isn't a 2xx/3xx success code.
-        # Sentinel strings (timeout/dns/tls/known-dead/...) all count as broken.
-        is_success = isinstance(status, int) and status in SUCCESS_CODES
-        if not is_success:
-            broken.append({
-                "url": url,
-                "status": status,
-                "host": host,
-                "occurrences": occurrences.get(url, 0),
-            })
-
-    broken.sort(key=lambda r: -r["occurrences"])
-
-    report = {
-        "checked_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
-        "elapsed_seconds": round(elapsed, 1),
-        "total_links": total_links,
-        "unique_urls": len(occurrences),
-        "probed": len(unique_urls),
-        "by_status": dict(by_status),
-        "by_host": {h: dict(c) for h, c in sorted(by_host.items())},
-        "broken_count": len(broken),
-        "broken": broken,
-    }
-
-    REPORT_PATH.write_text(json.dumps(report, indent=2))
-    print(f"\nReport written to {REPORT_PATH}")
-
-    # ────────── Human summary ──────────
-    print(f"\n=== Summary ({elapsed:.1f}s) ===")
-    print(f"Manifest entries:            {total_links}")
-    print(f"Unique URLs:                 {len(occurrences)}")
-    print(f"Probed:                      {len(unique_urls)}")
-    print(f"\nBy status:")
-    for s, n in sorted(by_status.items(), key=lambda kv: -kv[1]):
-        print(f"  {s:>14}  {n}")
-
-    print(f"\nTop 10 broken hosts (by unique URL count):")
-    host_broken = sorted(
-        [(h, sum(n for s, n in cs.items() if s not in ("200", "301", "302", "303", "307", "308"))) for h, cs in by_host.items()],
-        key=lambda kv: -kv[1],
-    )[:10]
-    for h, n in host_broken:
-        if n:
-            print(f"  {h:>40}  {n} broken")
-
-    print(f"\nTop 10 broken URLs (by user-impact = occurrence count):")
-    for b in broken[:10]:
-        print(f"  [{b['status']}] x{b['occurrences']:<4} {b['url'][:90]}")
-
-    if args.fail_on_broken and broken:
-        print(f"\n❌ {len(broken)} broken URLs — exiting 1", file=sys.stderr)
-        return 1
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
--- a/interviews/staffml/src/data/chapter-urls.json
+++ b/interviews/staffml/src/data/chapter-urls.json
@@ -1,29 +0,0 @@
-{
-  "vol1_benchmarking": "/contents/vol1/benchmarking/benchmarking.html",
-  "vol1_data_engineering": "/contents/vol1/data_engineering/data_engineering.html",
-  "vol1_frameworks": "/contents/vol1/frameworks/frameworks.html",
-  "vol1_hw_acceleration": "/contents/vol1/hw_acceleration/hw_acceleration.html",
-  "vol1_ml_ops": "/contents/vol1/ml_ops/ml_ops.html",
-  "vol1_ml_systems": "/contents/vol1/ml_systems/ml_systems.html",
-  "vol1_ml_workflow": "/contents/vol1/ml_workflow/ml_workflow.html",
-  "vol1_model_serving": "/contents/vol1/model_serving/model_serving.html",
-  "vol1_nn_architectures": "/contents/vol1/nn_architectures/nn_architectures.html",
-  "vol1_nn_computation": "/contents/vol1/nn_computation/nn_computation.html",
-  "vol1_responsible_engr": "/contents/vol1/responsible_engr/responsible_engr.html",
-  "vol1_training": "/contents/vol1/training/training.html",
-  "vol2_collective_communication": "/contents/vol2/collective_communication/collective_communication.html",
-  "vol2_compute_infrastructure": "/contents/vol2/compute_infrastructure/compute_infrastructure.html",
-  "vol2_data_storage": "/contents/vol2/data_storage/data_storage.html",
-  "vol2_distributed_training": "/contents/vol2/distributed_training/distributed_training.html",
-  "vol2_edge_intelligence": "/contents/vol2/edge_intelligence/edge_intelligence.html",
-  "vol2_fault_tolerance": "/contents/vol2/fault_tolerance/fault_tolerance.html",
-  "vol2_fleet_orchestration": "/contents/vol2/fleet_orchestration/fleet_orchestration.html",
-  "vol2_inference": "/contents/vol2/inference/inference.html",
-  "vol2_network_fabrics": "/contents/vol2/network_fabrics/network_fabrics.html",
-  "vol2_ops_scale": "/contents/vol2/ops_scale/ops_scale.html",
-  "vol2_performance_engineering": "/contents/vol2/performance_engineering/performance_engineering.html",
-  "vol2_responsible_ai": "/contents/vol2/responsible_ai/responsible_ai.html",
-  "vol2_robust_ai": "/contents/vol2/robust_ai/robust_ai.html",
-  "vol2_security_privacy": "/contents/vol2/security_privacy/security_privacy.html",
-  "vol2_sustainable_ai": "/contents/vol2/sustainable_ai/sustainable_ai.html"
-}