KohakuHub/scripts/dev/seed_demo_data.py

#!/usr/bin/env python3
"""Create deterministic local demo data through KohakuHub's API surface."""

from __future__ import annotations

import asyncio
import base64
import hashlib
import io
import json
import sys
import textwrap
from contextlib import AsyncExitStack
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
from urllib.parse import urlsplit

import httpx
from PIL import Image, ImageDraw, ImageFont

ROOT_DIR = Path(__file__).resolve().parents[2]
SRC_DIR = ROOT_DIR / "src"
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

from kohakuhub.config import cfg
from kohakuhub.main import app
from kohakuhub.utils.s3 import init_storage

SEED_VERSION = "local-dev-demo-v1"
DEFAULT_PASSWORD = "KohakuDev123!"
PRIMARY_USERNAME = "mai_lin"
MANIFEST_PATH = ROOT_DIR / "hub-meta" / "dev" / "demo-seed-manifest.json"
INTERNAL_BASE_URL = (
    getattr(cfg.app, "internal_base_url", None)
    or cfg.app.base_url
    or "http://127.0.0.1:48888"
)


class SeedError(RuntimeError):
    """Raised when demo data creation fails."""


@dataclass(frozen=True)
class AccountSeed:
    username: str
    email: str
    full_name: str
    bio: str
    website: str
    social_media: dict[str, str]
    avatar_bg: str
    avatar_accent: str


@dataclass(frozen=True)
class OrganizationSeed:
    name: str
    description: str
    bio: str
    website: str
    social_media: dict[str, str]
    avatar_bg: str
    avatar_accent: str
    members: tuple[tuple[str, str], ...]


@dataclass(frozen=True)
class CommitSeed:
    summary: str
    description: str
    files: tuple[tuple[str, bytes], ...]


@dataclass(frozen=True)
class RepoSeed:
    actor: str
    repo_type: str
    namespace: str
    name: str
    private: bool
    commits: tuple[CommitSeed, ...]
    branch: str | None = None
    tag: str | None = None
    download_path: str | None = None
    download_sessions: int = 0


ACCOUNTS: tuple[AccountSeed, ...] = (
    AccountSeed(
        username="mai_lin",
        email="mai.lin@kohakuhub.dev",
        full_name="Mai Lin",
        bio=(
            "Product-minded ML engineer focused on reproducible dataset QA, "
            "small-model packaging, and local debugging workflows."
        ),
        website="https://kohakuhub.local/mai-lin",
        social_media={
            "github": "mai-lin-labs",
            "huggingface": "mai-lin-labs",
            "twitter_x": "mai_lin_ops",
        },
        avatar_bg="#183153",
        avatar_accent="#f59e0b",
    ),
    AccountSeed(
        username="leo_park",
        email="leo.park@kohakuhub.dev",
        full_name="Leo Park",
        bio=(
            "Frontend-heavy engineer who keeps repo demos honest with browser "
            "smoke tests and hand-curated example data."
        ),
        website="https://kohakuhub.local/leo-park",
        social_media={
            "github": "leo-park-dev",
            "threads": "leo.park.dev",
        },
        avatar_bg="#0f766e",
        avatar_accent="#f8fafc",
    ),
    AccountSeed(
        username="sara_chen",
        email="sara.chen@kohakuhub.dev",
        full_name="Sara Chen",
        bio=(
            "Annotation lead for invoice, receipt, and layout-heavy datasets. "
            "Prefers clean schemas over magical post-processing."
        ),
        website="https://kohakuhub.local/sara-chen",
        social_media={
            "github": "sara-chen-data",
            "huggingface": "sara-chen-data",
        },
        avatar_bg="#7c2d12",
        avatar_accent="#fde68a",
    ),
    AccountSeed(
        username="noah_kim",
        email="noah.kim@kohakuhub.dev",
        full_name="Noah Kim",
        bio=(
            "Ships compact vision models for harbor monitoring, segmentation, "
            "and camera-side smoke testing."
        ),
        website="https://kohakuhub.local/noah-kim",
        social_media={
            "github": "noah-kim-vision",
            "twitter_x": "noahkimvision",
        },
        avatar_bg="#1d4ed8",
        avatar_accent="#dbeafe",
    ),
    AccountSeed(
        username="ivy_ops",
        email="ivy.ops@kohakuhub.dev",
        full_name="Ivy Ops",
        bio=(
            "Release and infra support. Uses stable, boring fixtures so bug "
            "reports stay reproducible."
        ),
        website="https://kohakuhub.local/ivy-ops",
        social_media={
            "github": "ivy-ops",
        },
        avatar_bg="#3f3f46",
        avatar_accent="#f4f4f5",
    ),
)

ORGANIZATIONS: tuple[OrganizationSeed, ...] = (
    OrganizationSeed(
        name="aurora-labs",
        description=(
            "Applied document intelligence team building OCR-friendly models, "
            "datasets, and lightweight internal tooling."
        ),
        bio=(
            "Aurora Labs curates multilingual OCR assets for receipts, forms, "
            "and customer-service automation."
        ),
        website="https://aurora-labs.kohakuhub.local",
        social_media={
            "github": "aurora-labs",
            "huggingface": "aurora-labs",
        },
        avatar_bg="#312e81",
        avatar_accent="#e0e7ff",
        members=(
            ("mai_lin", "super-admin"),
            ("leo_park", "admin"),
            ("sara_chen", "member"),
            ("ivy_ops", "visitor"),
        ),
    ),
    OrganizationSeed(
        name="harbor-vision",
        description=(
            "Small computer-vision team for coastal monitoring, dock safety, "
            "and camera-ready deployment checks."
        ),
        bio=(
            "Harbor Vision maintains compact segmentation and inspection models "
            "for edge-friendly marine operations."
        ),
        website="https://harbor-vision.kohakuhub.local",
        social_media={
            "github": "harbor-vision",
            "twitter_x": "harborvision",
        },
        avatar_bg="#0f766e",
        avatar_accent="#ccfbf1",
        members=(
            ("mai_lin", "super-admin"),
            ("noah_kim", "super-admin"),
            ("leo_park", "visitor"),
        ),
    ),
)


def text_bytes(body: str) -> bytes:
    return (textwrap.dedent(body).strip() + "\n").encode("utf-8")


def json_bytes(payload: dict | list) -> bytes:
    return (json.dumps(payload, indent=2, sort_keys=True) + "\n").encode("utf-8")


def csv_bytes(rows: Iterable[Iterable[str]]) -> bytes:
    lines = [",".join(row) for row in rows]
    return ("\n".join(lines) + "\n").encode("utf-8")


def jsonl_bytes(rows: Iterable[dict]) -> bytes:
    return ("\n".join(json.dumps(row, sort_keys=True) for row in rows) + "\n").encode(
        "utf-8"
    )


def profile_space_files(title: str, summary: str, accent: str) -> tuple[tuple[str, bytes], ...]:
    return (
        (
            "README.md",
            text_bytes(
                f"""
                ---
                title: {title}
                emoji: "\u2605"
                colorFrom: indigo
                colorTo: amber
                sdk: gradio
                sdk_version: "4.44.0"
                ---

                # {title}

                {summary}

                This space exists so local profile pages render with realistic content
                instead of an empty placeholder repository.
                """
            ),
        ),
        (
            "app.py",
            text_bytes(
                f"""
                import gradio as gr

                demo = gr.Interface(
                    fn=lambda text: "{title}: " + text.strip(),
                    inputs=gr.Textbox(label="Prompt"),
                    outputs=gr.Textbox(label="Response"),
                    title="{title}",
                    description="{summary}",
                    theme=gr.themes.Soft(primary_hue="{accent}"),
                )

                if __name__ == "__main__":
                    demo.launch()
                """
            ),
        ),
        ("requirements.txt", text_bytes("gradio>=4.44.0")),
    )


def lfs_blob(label: str) -> bytes:
    header = f"SEED-LFS::{label}\n".encode("utf-8")
    return header + (b"0123456789abcdef" * 64)


def build_repo_seeds() -> tuple[RepoSeed, ...]:
    return (
        RepoSeed(
            actor="mai_lin",
            repo_type="model",
            namespace="mai_lin",
            name="lineart-caption-base",
            private=False,
            commits=(
                CommitSeed(
                    summary="Bootstrap base caption model",
                    description=(
                        "Create the public demo model repo with a realistic README, "
                        "lightweight config, and a small LFS-tracked checkpoint."
                    ),
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                ---
                                license: mit
                                library_name: transformers
                                pipeline_tag: image-to-text
                                tags:
                                  - captioning
                                  - line-art
                                  - document-vision
                                ---

                                # lineart-caption-base

                                A compact caption model tuned for monochrome line art,
                                icon-heavy diagrams, and OCR-adjacent illustrations.

                                ## Intended use

                                - draft captions for internal QA dashboards
                                - generate quick prompts for reviewers
                                - validate frontend metadata rendering
                                """
                            ),
                        ),
                        (
                            "config.json",
                            json_bytes(
                                {
                                    "architectures": ["VisionEncoderDecoderModel"],
                                    "decoder_layers": 6,
                                    "encoder_layers": 12,
                                    "image_size": 448,
                                    "model_type": "lineart-caption-base",
                                    "vocab_size": 32000,
                                }
                            ),
                        ),
                        (
                            "tokenizer.json",
                            json_bytes(
                                {
                                    "added_tokens": [],
                                    "normalizer": {"type": "NFKC"},
                                    "pre_tokenizer": {"type": "Whitespace"},
                                    "version": "1.0",
                                }
                            ),
                        ),
                        ("examples/prompt.txt", text_bytes("Describe the icon, layout, and visible text.")),
                        (
                            "checkpoints/lineart-caption-base.safetensors",
                            lfs_blob("lineart-caption-base"),
                        ),
                    ),
                ),
                CommitSeed(
                    summary="Add eval notes and release metrics",
                    description="Follow-up commit so commit history and file updates are visible in local UI.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                ---
                                license: mit
                                library_name: transformers
                                pipeline_tag: image-to-text
                                tags:
                                  - captioning
                                  - line-art
                                  - document-vision
                                ---

                                # lineart-caption-base

                                A compact caption model tuned for monochrome line art,
                                icon-heavy diagrams, and OCR-adjacent illustrations.

                                ## Current release

                                - validation CIDEr: 1.38
                                - latency target: <120 ms on local A10G
                                - known gap: dense legends still need manual review
                                """
                            ),
                        ),
                        (
                            "eval/metrics.json",
                            json_bytes(
                                {
                                    "cider": 1.38,
                                    "clip_score": 0.284,
                                    "latency_ms_p50": 87,
                                    "latency_ms_p95": 114,
                                }
                            ),
                        ),
                        (
                            "docs/training-notes.md",
                            text_bytes(
                                """
                                # Training Notes

                                - Base corpus: 82k internal line-art render pairs
                                - Additional hard negatives: 4k cluttered signage crops
                                - Checkpoint exported for small-batch browser smoke tests
                                """
                            ),
                        ),
                    ),
                ),
            ),
            branch="ablation-notes",
            tag="v0.2.1",
            download_path="checkpoints/lineart-caption-base.safetensors",
            download_sessions=4,
        ),
        RepoSeed(
            actor="mai_lin",
            repo_type="dataset",
            namespace="mai_lin",
            name="street-sign-zh-en",
            private=False,
            commits=(
                CommitSeed(
                    summary="Import bilingual street sign dataset",
                    description="Seed a CSV-backed dataset that exercises dataset preview and tree views.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                ---
                                license: cc-by-4.0
                                task_categories:
                                  - image-text-to-text
                                language:
                                  - zh
                                  - en
                                pretty_name: Street Sign ZH EN
                                ---

                                # street-sign-zh-en

                                A small bilingual dataset for OCR-friendly sign translation and
                                layout QA. Rows keep the original text, translation, and scene tag.
                                """
                            ),
                        ),
                        (
                            "data/train.csv",
                            csv_bytes(
                                (
                                    ("image", "text_zh", "text_en", "scene"),
                                    ("img_0001.png", "\u5317\u4eac\u7ad9", "Beijing Railway Station", "station"),
                                    ("img_0002.png", "\u5c0f\u5fc3\u53f0\u9636", "Watch Your Step", "retail"),
                                    ("img_0003.png", "\u7981\u6b62\u5438\u70df", "No Smoking", "hospital"),
                                    ("img_0004.png", "\u53f3\u8f6c\u8f66\u9053", "Right Turn Only", "road"),
                                )
                            ),
                        ),
                        (
                            "data/validation.csv",
                            csv_bytes(
                                (
                                    ("image", "text_zh", "text_en", "scene"),
                                    ("val_0001.png", "\u51fa\u53e3", "Exit", "mall"),
                                    ("val_0002.png", "\u670d\u52a1\u53f0", "Service Desk", "airport"),
                                )
                            ),
                        ),
                        (
                            "metadata/features.json",
                            json_bytes(
                                {
                                    "image": "string",
                                    "text_zh": "string",
                                    "text_en": "string",
                                    "scene": "string",
                                }
                            ),
                        ),
                    ),
                ),
                CommitSeed(
                    summary="Add preview samples for dataset viewer",
                    description="Include JSONL samples and notebook notes for local bug reproduction.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                ---
                                license: cc-by-4.0
                                task_categories:
                                  - image-text-to-text
                                language:
                                  - zh
                                  - en
                                pretty_name: Street Sign ZH EN
                                ---

                                # street-sign-zh-en

                                A small bilingual dataset for OCR-friendly sign translation and
                                layout QA. Rows keep the original text, translation, and scene tag.

                                ## Notes

                                Validation rows intentionally mix transport, retail, and public
                                service scenarios so sorting and filtering bugs are easier to spot.
                                """
                            ),
                        ),
                        (
                            "previews/samples.jsonl",
                            jsonl_bytes(
                                (
                                    {
                                        "image": "img_0001.png",
                                        "text_zh": "\u5317\u4eac\u7ad9",
                                        "text_en": "Beijing Railway Station",
                                        "scene": "station",
                                    },
                                    {
                                        "image": "img_0002.png",
                                        "text_zh": "\u5c0f\u5fc3\u53f0\u9636",
                                        "text_en": "Watch Your Step",
                                        "scene": "retail",
                                    },
                                )
                            ),
                        ),
                        (
                            "notebooks/README.md",
                            text_bytes(
                                """
                                # Notebook Notes

                                This dataset is intentionally tiny in local dev. The point is to
                                exercise preview, pagination, and schema rendering without waiting
                                on a large bootstrap import.
                                """
                            ),
                        ),
                    ),
                ),
            ),
            branch="qa-pass",
            tag="2026-04-demo",
            download_path="data/train.csv",
            download_sessions=8,
        ),
        RepoSeed(
            actor="mai_lin",
            repo_type="space",
            namespace="mai_lin",
            name="mai_lin",
            private=False,
            commits=(
                CommitSeed(
                    summary="Create profile showcase space",
                    description="Provide a same-name space so local profile pages render a realistic card.",
                    files=profile_space_files(
                        "Mai Lin Workspace",
                        "Small utilities and pinned demos used for local reproduction.",
                        "amber",
                    ),
                ),
                CommitSeed(
                    summary="Add profile theme preset",
                    description="A second commit makes the space history non-empty for UI testing.",
                    files=(
                        (
                            "assets/theme.json",
                            json_bytes(
                                {
                                    "accent": "amber",
                                    "layout": "split",
                                    "panels": ["repos", "activity", "notes"],
                                }
                            ),
                        ),
                    ),
                ),
            ),
        ),
        RepoSeed(
            actor="mai_lin",
            repo_type="dataset",
            namespace="mai_lin",
            name="internal-evals",
            private=True,
            commits=(
                CommitSeed(
                    summary="Seed private eval artifacts",
                    description="Keep one private user-owned repo for auth and permission checks.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                # internal-evals

                                Private staging area for eval summaries and failure-case review.
                                This repo is intentionally private and only accessible to Mai.
                                """
                            ),
                        ),
                        (
                            "runs/2026-04-15-summary.json",
                            json_bytes(
                                {
                                    "caption_regressions": 7,
                                    "dataset": "street-sign-zh-en",
                                    "notes": "False positives cluster around mirrored storefront text.",
                                }
                            ),
                        ),
                        (
                            "data/failure_cases.jsonl",
                            jsonl_bytes(
                                (
                                    {
                                        "file": "eval_001.png",
                                        "issue": "mirror_text",
                                        "severity": "medium",
                                    },
                                    {
                                        "file": "eval_002.png",
                                        "issue": "crowded_legend",
                                        "severity": "high",
                                    },
                                )
                            ),
                        ),
                    ),
                ),
                CommitSeed(
                    summary="Add reviewer checklist",
                    description="Second commit for commit-history coverage on a private repo.",
                    files=(
                        (
                            "notes/reviewer-checklist.md",
                            text_bytes(
                                """
                                # Reviewer Checklist

                                - confirm sample renders in dataset viewer
                                - compare translated text against bilingual CSV rows
                                - log UI regressions with the seeded repo name
                                """
                            ),
                        ),
                    ),
                ),
            ),
            download_path="runs/2026-04-15-summary.json",
            download_sessions=1,
        ),
        RepoSeed(
            actor="mai_lin",
            repo_type="space",
            namespace="aurora-labs",
            name="aurora-labs",
            private=False,
            commits=(
                CommitSeed(
                    summary="Create org showcase space",
                    description="Same-name org space keeps organization profile pages representative.",
                    files=profile_space_files(
                        "Aurora Labs Demo Portal",
                        "Landing page for OCR demos, pinned datasets, and release notes.",
                        "indigo",
                    ),
                ),
                CommitSeed(
                    summary="Add roadmap note",
                    description="A lightweight follow-up commit for org space history.",
                    files=(
                        (
                            "docs/roadmap.md",
                            text_bytes(
                                """
                                # Local Demo Roadmap

                                - tighten OCR-lite benchmark reporting
                                - keep receipt-layout-bench labels stable for bug repro
                                - mirror one private support model for permission testing
                                """
                            ),
                        ),
                    ),
                ),
            ),
        ),
        RepoSeed(
            actor="mai_lin",
            repo_type="model",
            namespace="aurora-labs",
            name="aurora-ocr-lite",
            private=False,
            commits=(
                CommitSeed(
                    summary="Publish OCR-lite baseline",
                    description="Public model repo with LFS checkpoint and readable metadata.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                ---
                                license: apache-2.0
                                library_name: transformers
                                pipeline_tag: image-to-text
                                tags:
                                  - ocr
                                  - receipts
                                  - multilingual
                                ---

                                # aurora-ocr-lite

                                An OCR-focused checkpoint for receipt snippets, payment slips,
                                and service counter paperwork.
                                """
                            ),
                        ),
                        (
                            "config.json",
                            json_bytes(
                                {
                                    "backbone": "vit-small-patch16-384",
                                    "decoder": "bart-base",
                                    "max_position_embeddings": 512,
                                    "torch_dtype": "float16",
                                }
                            ),
                        ),
                        (
                            "vocab.txt",
                            text_bytes(
                                """
                                [PAD]
                                [UNK]
                                total
                                subtotal
                                tax
                                cashier
                                paid
                                """
                            ),
                        ),
                        (
                            "checkpoints/aurora-ocr-lite.safetensors",
                            lfs_blob("aurora-ocr-lite"),
                        ),
                    ),
                ),
                CommitSeed(
                    summary="Add benchmark export and release notes",
                    description="Keep one public org model slightly more active for trending and history views.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                ---
                                license: apache-2.0
                                library_name: transformers
                                pipeline_tag: image-to-text
                                tags:
                                  - ocr
                                  - receipts
                                  - multilingual
                                ---

                                # aurora-ocr-lite

                                An OCR-focused checkpoint for receipt snippets, payment slips,
                                and service counter paperwork.

                                ## Release notes

                                - reduced hallucinated currency markers on narrow receipt crops
                                - added benchmark export used by the admin dashboard smoke tests
                                """
                            ),
                        ),
                        (
                            "eval/benchmark.json",
                            json_bytes(
                                {
                                    "cer": 0.081,
                                    "wer": 0.119,
                                    "latency_ms_p50": 64,
                                    "latency_ms_p95": 92,
                                }
                            ),
                        ),
                        (
                            "scripts/export_notes.md",
                            text_bytes(
                                """
                                # Export Notes

                                Checkpoint is intentionally small and fake. It only exists so local
                                flows hit LFS, quota, and file-tree code paths.
                                """
                            ),
                        ),
                    ),
                ),
            ),
            branch="benchmark-v2",
            tag="v0.3.0",
            download_path="checkpoints/aurora-ocr-lite.safetensors",
            download_sessions=12,
        ),
        RepoSeed(
            actor="leo_park",
            repo_type="dataset",
            namespace="aurora-labs",
            name="receipt-layout-bench",
            private=False,
            commits=(
                CommitSeed(
                    summary="Create receipt layout benchmark",
                    description="Public dataset repo with JSONL splits for dataset preview coverage.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                ---
                                license: cc-by-4.0
                                pretty_name: Receipt Layout Bench
                                task_categories:
                                  - token-classification
                                ---

                                # receipt-layout-bench

                                Annotation benchmark for merchant, total, tax, and timestamp spans.
                                """
                            ),
                        ),
                        (
                            "splits/train.jsonl",
                            jsonl_bytes(
                                (
                                    {
                                        "image": "train_0001.png",
                                        "merchant": "North Pier Cafe",
                                        "total": "18.40",
                                        "currency": "USD",
                                    },
                                    {
                                        "image": "train_0002.png",
                                        "merchant": "River Town Mart",
                                        "total": "42.15",
                                        "currency": "USD",
                                    },
                                )
                            ),
                        ),
                        (
                            "splits/test.jsonl",
                            jsonl_bytes(
                                (
                                    {
                                        "image": "test_0001.png",
                                        "merchant": "Airport Bento",
                                        "total": "9.80",
                                        "currency": "USD",
                                    },
                                    {
                                        "image": "test_0002.png",
                                        "merchant": "Harbor Books",
                                        "total": "27.10",
                                        "currency": "USD",
                                    },
                                )
                            ),
                        ),
                        (
                            "schema/fields.json",
                            json_bytes(
                                {
                                    "merchant": "string",
                                    "total": "string",
                                    "currency": "string",
                                    "timestamp": "string",
                                }
                            ),
                        ),
                    ),
                ),
                CommitSeed(
                    summary="Add annotation guide",
                    description="Second dataset commit for history, tree diffing, and docs rendering.",
                    files=(
                        (
                            "docs/annotation-guide.md",
                            text_bytes(
                                """
                                # Annotation Guide

                                - mark printed totals, not handwritten notes
                                - keep currency in a dedicated field
                                - preserve merchant spelling from source image
                                """
                            ),
                        ),
                        (
                            "README.md",
                            text_bytes(
                                """
                                ---
                                license: cc-by-4.0
                                pretty_name: Receipt Layout Bench
                                task_categories:
                                  - token-classification
                                ---

                                # receipt-layout-bench

                                Annotation benchmark for merchant, total, tax, and timestamp spans.

                                The local seed intentionally mixes neat and messy receipts to cover
                                pagination, filters, and table previews.
                                """
                            ),
                        ),
                    ),
                ),
            ),
            branch="supplier-a-refresh",
            tag="v1.0.0",
            download_path="splits/test.jsonl",
            download_sessions=5,
        ),
        RepoSeed(
            actor="mai_lin",
            repo_type="model",
            namespace="aurora-labs",
            name="customer-support-rag",
            private=True,
            commits=(
                CommitSeed(
                    summary="Seed private support model workspace",
                    description="Private org repo for auth-only browsing and settings checks.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                # customer-support-rag

                                Internal-only retrieval and prompt assets for support workflows.
                                This repo is private and visible to Aurora Labs members only.
                                """
                            ),
                        ),
                        (
                            "prompt/system.txt",
                            text_bytes(
                                """
                                You are a cautious support assistant. Answer only with facts from
                                the indexed knowledge base, and cite the exact article title.
                                """
                            ),
                        ),
                        (
                            "retrieval/index-schema.json",
                            json_bytes(
                                {
                                    "article_id": "string",
                                    "channel": "string",
                                    "lang": "string",
                                    "text": "string",
                                }
                            ),
                        ),
                        (
                            "config.json",
                            json_bytes(
                                {
                                    "chunk_size": 384,
                                    "embedding_model": "bge-small-en-v1.5",
                                    "top_k": 6,
                                }
                            ),
                        ),
                    ),
                ),
                CommitSeed(
                    summary="Add ops runbook",
                    description="Keep a second private-org commit for local history inspection.",
                    files=(
                        (
                            "docs/runbook.md",
                            text_bytes(
                                """
                                # Runbook

                                - refresh embeddings weekly
                                - snapshot prompts before frontend demos
                                - record regressions against the fixed local seed data
                                """
                            ),
                        ),
                    ),
                ),
            ),
            download_path="prompt/system.txt",
            download_sessions=1,
        ),
        RepoSeed(
            actor="noah_kim",
            repo_type="model",
            namespace="harbor-vision",
            name="marine-seg-small",
            private=False,
            commits=(
                CommitSeed(
                    summary="Publish marine segmentation starter model",
                    description="Public vision model with another fake LFS checkpoint.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                ---
                                license: apache-2.0
                                pipeline_tag: image-segmentation
                                tags:
                                  - segmentation
                                  - marine
                                  - edge
                                ---

                                # marine-seg-small

                                Compact segmentation model for harbor waterlines, safety zones,
                                and dock equipment outlines.
                                """
                            ),
                        ),
                        (
                            "config.json",
                            json_bytes(
                                {
                                    "backbone": "convnext-tiny",
                                    "classes": ["water", "dock", "vessel", "buoy"],
                                    "input_size": 512,
                                }
                            ),
                        ),
                        (
                            "labels.json",
                            json_bytes(
                                {
                                    "0": "water",
                                    "1": "dock",
                                    "2": "vessel",
                                    "3": "buoy",
                                }
                            ),
                        ),
                        (
                            "checkpoints/marine-seg-small.safetensors",
                            lfs_blob("marine-seg-small"),
                        ),
                    ),
                ),
                CommitSeed(
                    summary="Add harbor evaluation report",
                    description="Second model commit for history and stats coverage.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                ---
                                license: apache-2.0
                                pipeline_tag: image-segmentation
                                tags:
                                  - segmentation
                                  - marine
                                  - edge
                                ---

                                # marine-seg-small

                                Compact segmentation model for harbor waterlines, safety zones,
                                and dock equipment outlines.

                                ## Eval highlights

                                - best IoU on waterline masks from overcast camera feeds
                                - weaker on stacked cargo edges during dusk
                                """
                            ),
                        ),
                        (
                            "eval/coastal-harbor.json",
                            json_bytes(
                                {
                                    "iou_dock": 0.84,
                                    "iou_vessel": 0.79,
                                    "iou_water": 0.91,
                                }
                            ),
                        ),
                    ),
                ),
            ),
            branch="saltwater-eval",
            tag="v1.1.0",
            download_path="checkpoints/marine-seg-small.safetensors",
            download_sessions=6,
        ),
        RepoSeed(
            actor="noah_kim",
            repo_type="space",
            namespace="harbor-vision",
            name="smoke-test-dashboard",
            private=True,
            commits=(
                CommitSeed(
                    summary="Create private smoke-test dashboard",
                    description="Private org space used for auth and space rendering checks.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                # smoke-test-dashboard

                                Private dashboard for camera ingest smoke tests and deployment sign-off.
                                """
                            ),
                        ),
                        (
                            "app.py",
                            text_bytes(
                                """
                                import gradio as gr

                                dashboard = gr.Interface(
                                    fn=lambda status: f"dashboard status: {status}",
                                    inputs=gr.Textbox(label="Input"),
                                    outputs=gr.Textbox(label="Output"),
                                    title="Smoke Test Dashboard",
                                )

                                if __name__ == "__main__":
                                    dashboard.launch()
                                """
                            ),
                        ),
                        ("requirements.txt", text_bytes("gradio>=4.44.0")),
                    ),
                ),
                CommitSeed(
                    summary="Add dashboard notes",
                    description="Second private-space commit for browsing stateful history locally.",
                    files=(
                        (
                            "dashboards/README.md",
                            text_bytes(
                                """
                                # Dashboard Notes

                                Fixed local fixtures are better than random telemetry when the goal
                                is to reproduce layout and auth bugs.
                                """
                            ),
                        ),
                    ),
                ),
            ),
            download_path="README.md",
            download_sessions=1,
        ),
        RepoSeed(
            actor="leo_park",
            repo_type="space",
            namespace="leo_park",
            name="formula-checker-lite",
            private=False,
            commits=(
                CommitSeed(
                    summary="Create public formula checker demo",
                    description="Lightweight public space for user profile and space listings.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                # formula-checker-lite

                                Small browser demo that validates spreadsheet-style formulas and
                                flags obviously broken references.
                                """
                            ),
                        ),
                        (
                            "app.py",
                            text_bytes(
                                """
                                import gradio as gr

                                def validate(expr: str) -> str:
                                    return "looks valid" if "=" in expr else "missing leading ="

                                demo = gr.Interface(
                                    fn=validate,
                                    inputs=gr.Textbox(label="Formula"),
                                    outputs=gr.Textbox(label="Status"),
                                    title="Formula Checker Lite",
                                )

                                if __name__ == "__main__":
                                    demo.launch()
                                """
                            ),
                        ),
                        ("requirements.txt", text_bytes("gradio>=4.44.0")),
                    ),
                ),
                CommitSeed(
                    summary="Add preset expressions",
                    description="Second commit keeps this user-owned space non-trivial.",
                    files=(
                        (
                            "assets/presets.json",
                            json_bytes(
                                {
                                    "valid": "=SUM(A1:A3)",
                                    "invalid": "SUM(A1:A3)",
                                    "cross_sheet": "=Sheet2!B4",
                                }
                            ),
                        ),
                    ),
                ),
            ),
            download_path="README.md",
            download_sessions=2,
        ),
        RepoSeed(
            actor="sara_chen",
            repo_type="dataset",
            namespace="sara_chen",
            name="invoice-entities-mini",
            private=False,
            commits=(
                CommitSeed(
                    summary="Seed invoice entity dataset",
                    description="Public user dataset so profile pages are not empty.",
                    files=(
                        (
                            "README.md",
                            text_bytes(
                                """
                                ---
                                license: cc-by-4.0
                                pretty_name: Invoice Entities Mini
                                task_categories:
                                  - token-classification
                                ---

                                # invoice-entities-mini

                                Tiny invoice entity dataset for local schema, preview, and table rendering checks.
                                """
                            ),
                        ),
                        (
                            "data/train.jsonl",
                            jsonl_bytes(
                                (
                                    {
                                        "invoice_id": "inv_1001",
                                        "vendor": "Blue Harbor Logistics",
                                        "amount": "1240.00",
                                    },
                                    {
                                        "invoice_id": "inv_1002",
                                        "vendor": "Northline Design",
                                        "amount": "315.50",
                                    },
                                )
                            ),
                        ),
                        (
                            "data/test.jsonl",
                            jsonl_bytes(
                                (
                                    {
                                        "invoice_id": "inv_2001",
                                        "vendor": "River Street Foods",
                                        "amount": "89.20",
                                    },
                                )
                            ),
                        ),
                        (
                            "schema.json",
                            json_bytes(
                                {
                                    "invoice_id": "string",
                                    "vendor": "string",
                                    "amount": "string",
                                }
                            ),
                        ),
                    ),
                ),
                CommitSeed(
                    summary="Add notebook notes",
                    description="Second public dataset commit for file tree and commit history coverage.",
                    files=(
                        (
                            "notebooks/README.md",
                            text_bytes(
                                """
                                # Notebook Notes

                                Keep the local seed tiny. If a preview bug shows up here, it is much
                                easier to reason about than a random large import.
                                """
                            ),
                        ),
                    ),
                ),
            ),
            download_path="data/train.jsonl",
            download_sessions=3,
        ),
    )


REPO_SEEDS = build_repo_seeds()

LIKES: tuple[tuple[str, str, str, str], ...] = (
    ("leo_park", "model", "mai_lin", "lineart-caption-base"),
    ("leo_park", "dataset", "mai_lin", "street-sign-zh-en"),
    ("leo_park", "model", "harbor-vision", "marine-seg-small"),
    ("sara_chen", "model", "mai_lin", "lineart-caption-base"),
    ("sara_chen", "model", "aurora-labs", "aurora-ocr-lite"),
    ("sara_chen", "dataset", "aurora-labs", "receipt-layout-bench"),
    ("noah_kim", "model", "aurora-labs", "aurora-ocr-lite"),
    ("noah_kim", "dataset", "mai_lin", "street-sign-zh-en"),
    ("noah_kim", "space", "leo_park", "formula-checker-lite"),
    ("ivy_ops", "model", "mai_lin", "lineart-caption-base"),
    ("ivy_ops", "model", "aurora-labs", "aurora-ocr-lite"),
    ("ivy_ops", "dataset", "sara_chen", "invoice-entities-mini"),
    ("mai_lin", "model", "harbor-vision", "marine-seg-small"),
    ("mai_lin", "space", "leo_park", "formula-checker-lite"),
    ("mai_lin", "dataset", "aurora-labs", "receipt-layout-bench"),
)


def account_index() -> dict[str, AccountSeed]:
    return {account.username: account for account in ACCOUNTS}


def repo_slug(repo: RepoSeed) -> str:
    return f"{repo.repo_type}-{repo.namespace}-{repo.name}".replace("/", "-")


def make_avatar_bytes(label: str, background: str, accent: str) -> bytes:
    image = Image.new("RGB", (512, 512), background)
    draw = ImageDraw.Draw(image)

    draw.rounded_rectangle((48, 48, 464, 464), radius=96, outline=accent, width=16)
    draw.ellipse((120, 120, 392, 392), fill=accent)

    initials = "".join(part[0].upper() for part in label.replace("-", " ").split()[:2])
    font = ImageFont.load_default()
    text_box = draw.textbbox((0, 0), initials, font=font)
    text_width = text_box[2] - text_box[0]
    text_height = text_box[3] - text_box[1]
    draw.text(
        ((512 - text_width) / 2, (512 - text_height) / 2),
        initials,
        fill=background,
        font=font,
    )

    buffer = io.BytesIO()
    image.save(buffer, format="PNG")
    return buffer.getvalue()


def describe_error(response: httpx.Response) -> str:
    try:
        payload = response.json()
    except Exception:
        payload = response.text
    return f"HTTP {response.status_code}: {payload}"


async def ensure_response(
    response: httpx.Response,
    action: str,
    allowed_statuses: tuple[int, ...] = (200,),
) -> httpx.Response:
    if response.status_code not in allowed_statuses:
        raise SeedError(f"{action} failed with {describe_error(response)}")
    return response


def url_to_internal_path(url: str) -> str:
    parsed = urlsplit(url)
    path = parsed.path or "/"
    if parsed.query:
        path = f"{path}?{parsed.query}"
    return path


async def user_seed_exists(client: httpx.AsyncClient) -> bool:
    response = await client.get(
        f"/api/users/{PRIMARY_USERNAME}/type",
        params={"fallback": "false"},
    )
    if response.status_code == 404:
        return False
    await ensure_response(response, f"check existing seed for {PRIMARY_USERNAME}")
    return True


async def register_account(client: httpx.AsyncClient, account: AccountSeed) -> None:
    response = await client.post(
        "/api/auth/register",
        json={
            "username": account.username,
            "email": account.email,
            "password": DEFAULT_PASSWORD,
        },
    )
    if response.status_code == 200:
        return

    if response.status_code == 400:
        message = str(response.json())
        if "exists" in message or "conflicts" in message:
            return

    raise SeedError(f"register {account.username} failed with {describe_error(response)}")


async def login_account(client: httpx.AsyncClient, account: AccountSeed) -> None:
    response = await client.post(
        "/api/auth/login",
        json={"username": account.username, "password": DEFAULT_PASSWORD},
    )
    await ensure_response(response, f"login {account.username}")

    if "session_id" not in client.cookies:
        raise SeedError(f"login {account.username} did not set a session cookie")


async def upload_avatar(
    client: httpx.AsyncClient,
    path: str,
    label: str,
    background: str,
    accent: str,
) -> None:
    response = await client.post(
        path,
        files={
            "file": (
                f"{label}.png",
                make_avatar_bytes(label, background, accent),
                "image/png",
            )
        },
    )
    await ensure_response(response, f"upload avatar for {label}")


async def configure_user_profile(client: httpx.AsyncClient, account: AccountSeed) -> None:
    response = await client.put(
        f"/api/users/{account.username}/settings",
        json={
            "email": account.email,
            "full_name": account.full_name,
            "bio": account.bio,
            "website": account.website,
            "social_media": account.social_media,
        },
    )
    await ensure_response(response, f"update user settings for {account.username}")
    await upload_avatar(
        client,
        f"/api/users/{account.username}/avatar",
        account.username,
        account.avatar_bg,
        account.avatar_accent,
    )


async def create_organization(
    client: httpx.AsyncClient, organization: OrganizationSeed
) -> None:
    response = await client.post(
        "/org/create",
        json={
            "name": organization.name,
            "description": organization.description,
        },
    )
    if response.status_code == 200:
        return

    if response.status_code == 400 and "already exists" in str(response.json()):
        return

    raise SeedError(
        f"create organization {organization.name} failed with {describe_error(response)}"
    )


async def ensure_org_member(
    client: httpx.AsyncClient,
    org_name: str,
    username: str,
    role: str,
) -> None:
    response = await client.post(
        f"/org/{org_name}/members",
        json={"username": username, "role": role},
    )
    if response.status_code not in (200, 400):
        raise SeedError(
            f"add {username} to {org_name} failed with {describe_error(response)}"
        )

    # PUT keeps roles deterministic even if the member already existed.
    response = await client.put(
        f"/org/{org_name}/members/{username}",
        json={"role": role},
    )
    await ensure_response(response, f"set role for {username} in {org_name}")


async def configure_organization(
    client: httpx.AsyncClient, organization: OrganizationSeed
) -> None:
    response = await client.put(
        f"/api/organizations/{organization.name}/settings",
        json={
            "description": organization.description,
            "bio": organization.bio,
            "website": organization.website,
            "social_media": organization.social_media,
        },
    )
    await ensure_response(response, f"update organization settings for {organization.name}")
    await upload_avatar(
        client,
        f"/api/organizations/{organization.name}/avatar",
        organization.name,
        organization.avatar_bg,
        organization.avatar_accent,
    )


async def create_repo(client: httpx.AsyncClient, repo: RepoSeed) -> None:
    payload = {
        "type": repo.repo_type,
        "name": repo.name,
        "private": repo.private,
    }
    if repo.namespace != repo.actor:
        payload["organization"] = repo.namespace

    response = await client.post("/api/repos/create", json=payload)
    if response.status_code == 200:
        return

    if response.status_code == 400 and "already exists" in str(response.json()):
        return

    raise SeedError(f"create repo {repo.namespace}/{repo.name} failed with {describe_error(response)}")


async def upload_lfs_object(
    client: httpx.AsyncClient,
    repo: RepoSeed,
    content: bytes,
) -> tuple[str, int]:
    oid = hashlib.sha256(content).hexdigest()
    size = len(content)

    response = await client.post(
        f"/{repo.repo_type}s/{repo.namespace}/{repo.name}.git/info/lfs/objects/batch",
        json={
            "operation": "upload",
            "transfers": ["basic"],
            "objects": [{"oid": oid, "size": size}],
            "hash_algo": "sha256",
            # Local dev uses the frontend base_url publicly, so the seed script rewrites
            # verify URLs back onto the in-process backend transport.
            "is_browser": True,
        },
    )
    await ensure_response(response, f"prepare LFS upload for {repo.namespace}/{repo.name}")

    batch_data = response.json()
    obj = batch_data["objects"][0]
    if obj.get("error"):
        raise SeedError(f"LFS batch returned an error for {repo.namespace}/{repo.name}: {obj['error']}")

    upload_action = (obj.get("actions") or {}).get("upload")
    if upload_action:
        upload_headers = upload_action.get("header") or {}
        async with httpx.AsyncClient(follow_redirects=False, timeout=60.0) as network_client:
            upload_response = await network_client.put(
                upload_action["href"],
                content=content,
                headers=upload_headers,
            )

        if upload_response.status_code not in (200, 201):
            raise SeedError(
                f"LFS upload failed for {repo.namespace}/{repo.name}: "
                f"HTTP {upload_response.status_code} {upload_response.text}"
            )

        verify_action = (obj.get("actions") or {}).get("verify")
        if verify_action:
            verify_response = await client.post(
                url_to_internal_path(verify_action["href"]),
                json={"oid": oid, "size": size},
            )
            await ensure_response(
                verify_response,
                f"verify LFS upload for {repo.namespace}/{repo.name}",
            )

    return oid, size


async def commit_files(
    client: httpx.AsyncClient,
    repo: RepoSeed,
    commit: CommitSeed,
) -> None:
    metadata = []
    payload_by_path = {}

    for path, content in commit.files:
        sha256 = hashlib.sha256(content).hexdigest()
        metadata.append(
            {
                "path": path,
                "size": len(content),
                "sha256": sha256,
            }
        )
        payload_by_path[path] = content

    preupload_response = await client.post(
        f"/api/{repo.repo_type}s/{repo.namespace}/{repo.name}/preupload/main",
        json={"files": metadata},
    )
    await ensure_response(
        preupload_response,
        f"preupload {repo.namespace}/{repo.name}",
    )
    preupload_results = {
        item["path"]: item for item in preupload_response.json().get("files", [])
    }

    ndjson_lines = [
        {
            "key": "header",
            "value": {
                "summary": commit.summary,
                "description": commit.description,
            },
        }
    ]

    for path, content in commit.files:
        mode = preupload_results[path]["uploadMode"]

        if preupload_results[path]["shouldIgnore"]:
            continue

        if mode == "lfs":
            oid, size = await upload_lfs_object(client, repo, content)
            ndjson_lines.append(
                {
                    "key": "lfsFile",
                    "value": {
                        "path": path,
                        "oid": oid,
                        "size": size,
                        "algo": "sha256",
                    },
                }
            )
            continue

        ndjson_lines.append(
            {
                "key": "file",
                "value": {
                    "path": path,
                    "content": base64.b64encode(content).decode("ascii"),
                    "encoding": "base64",
                },
            }
        )

    ndjson_payload = "\n".join(json.dumps(line, sort_keys=True) for line in ndjson_lines)
    response = await client.post(
        f"/api/{repo.repo_type}s/{repo.namespace}/{repo.name}/commit/main",
        content=ndjson_payload,
        headers={"Content-Type": "application/x-ndjson"},
    )
    await ensure_response(response, f"commit {repo.namespace}/{repo.name}")


async def create_branch(client: httpx.AsyncClient, repo: RepoSeed) -> None:
    if not repo.branch:
        return

    response = await client.post(
        f"/api/{repo.repo_type}s/{repo.namespace}/{repo.name}/branch",
        json={"branch": repo.branch, "revision": "main"},
    )
    if response.status_code == 200:
        return

    if response.status_code in (400, 409) and "already exists" in str(response.json()):
        return

    raise SeedError(
        f"create branch {repo.branch} for {repo.namespace}/{repo.name} failed with "
        f"{describe_error(response)}"
    )


async def create_tag(client: httpx.AsyncClient, repo: RepoSeed) -> None:
    if not repo.tag:
        return

    response = await client.post(
        f"/api/{repo.repo_type}s/{repo.namespace}/{repo.name}/tag",
        json={"tag": repo.tag, "revision": "main"},
    )
    if response.status_code == 200:
        return

    if response.status_code in (400, 409) and "already exists" in str(response.json()):
        return

    raise SeedError(
        f"create tag {repo.tag} for {repo.namespace}/{repo.name} failed with "
        f"{describe_error(response)}"
    )


async def like_repo(
    client: httpx.AsyncClient,
    repo_type: str,
    namespace: str,
    name: str,
) -> None:
    response = await client.post(f"/api/{repo_type}s/{namespace}/{name}/like")
    if response.status_code == 200:
        return

    if response.status_code == 400 and "already liked" in str(response.json()):
        return

    raise SeedError(
        f"like {repo_type}/{namespace}/{name} failed with {describe_error(response)}"
    )


async def trigger_download(
    client: httpx.AsyncClient,
    repo: RepoSeed,
    path: str,
    *,
    cookies: dict[str, str] | None = None,
) -> None:
    response = await client.get(
        f"/api/{repo.repo_type}s/{repo.namespace}/{repo.name}/resolve/main/{path}",
        cookies=cookies,
    )
    if response.status_code not in (302, 307):
        raise SeedError(
            f"download seed for {repo.namespace}/{repo.name}:{path} failed with "
            f"{describe_error(response)}"
        )


def build_manifest() -> dict:
    return {
        "seed_version": SEED_VERSION,
        "manifest_path": str(MANIFEST_PATH),
        "main_ui_url": cfg.app.base_url,
        "backend_url": INTERNAL_BASE_URL,
        "main_login": {
            "username": PRIMARY_USERNAME,
            "password": DEFAULT_PASSWORD,
        },
        "additional_users": [
            {
                "username": account.username,
                "password": DEFAULT_PASSWORD,
                "email": account.email,
            }
            for account in ACCOUNTS
            if account.username != PRIMARY_USERNAME
        ],
        "admin_ui": {
            "url": "http://127.0.0.1:5174",
            "token": cfg.admin.secret_token,
        },
        "organizations": [
            {
                "name": organization.name,
                "members": [
                    {"username": username, "role": role}
                    for username, role in organization.members
                ],
            }
            for organization in ORGANIZATIONS
        ],
        "repositories": [
            {
                "type": repo.repo_type,
                "namespace": repo.namespace,
                "name": repo.name,
                "private": repo.private,
            }
            for repo in REPO_SEEDS
        ],
    }


def write_manifest() -> None:
    MANIFEST_PATH.parent.mkdir(parents=True, exist_ok=True)
    MANIFEST_PATH.write_text(
        json.dumps(build_manifest(), indent=2, sort_keys=True) + "\n",
        encoding="utf-8",
    )


def print_summary(seed_applied: bool) -> None:
    state = "Seeded" if seed_applied else "Seed already present"
    print(f"{state}: {SEED_VERSION}")
    print(f"Manifest: {MANIFEST_PATH}")
    print(f"Main UI: {cfg.app.base_url}")
    print(f"Backend: {INTERNAL_BASE_URL}")
    print(f"Login: {PRIMARY_USERNAME} / {DEFAULT_PASSWORD}")
    print(f"Admin UI token: {cfg.admin.secret_token}")


async def seed_demo_data() -> None:
    init_storage()
    transport = httpx.ASGITransport(app=app)
    accounts_by_name = account_index()

    async with AsyncExitStack() as stack:
        seed_client = await stack.enter_async_context(
            httpx.AsyncClient(
                transport=transport,
                base_url=INTERNAL_BASE_URL,
                follow_redirects=False,
            )
        )

        if await user_seed_exists(seed_client):
            write_manifest()
            print_summary(seed_applied=False)
            return

        for account in ACCOUNTS:
            await register_account(seed_client, account)

        authed_clients: dict[str, httpx.AsyncClient] = {}
        for account in ACCOUNTS:
            client = await stack.enter_async_context(
                httpx.AsyncClient(
                    transport=transport,
                    base_url=INTERNAL_BASE_URL,
                    follow_redirects=False,
                )
            )
            await login_account(client, account)
            await configure_user_profile(client, account)
            authed_clients[account.username] = client

        primary_client = authed_clients[PRIMARY_USERNAME]
        for organization in ORGANIZATIONS:
            await create_organization(primary_client, organization)
            for username, role in organization.members:
                if username == PRIMARY_USERNAME:
                    continue
                await ensure_org_member(primary_client, organization.name, username, role)
            await configure_organization(primary_client, organization)

        for repo in REPO_SEEDS:
            repo_client = authed_clients[repo.actor]
            await create_repo(repo_client, repo)
            for commit in repo.commits:
                await commit_files(repo_client, repo, commit)
            await create_branch(repo_client, repo)
            await create_tag(repo_client, repo)

        for liker, repo_type, namespace, name in LIKES:
            await like_repo(authed_clients[liker], repo_type, namespace, name)

        anon_client = await stack.enter_async_context(
            httpx.AsyncClient(
                transport=transport,
                base_url=INTERNAL_BASE_URL,
                follow_redirects=False,
            )
        )

        for repo in REPO_SEEDS:
            if not repo.download_path:
                continue

            if repo.private:
                await trigger_download(
                    authed_clients[PRIMARY_USERNAME],
                    repo,
                    repo.download_path,
                )
                continue

            for session_number in range(repo.download_sessions):
                await trigger_download(
                    anon_client,
                    repo,
                    repo.download_path,
                    cookies={
                        "hf_download_session": f"seed-{repo_slug(repo)}-{session_number:02d}"
                    },
                )

        # Download tracking happens in background tasks off the API response path.
        await asyncio.sleep(0.5)

    write_manifest()
    print_summary(seed_applied=True)


def main() -> int:
    try:
        asyncio.run(seed_demo_data())
    except SeedError as exc:
        print(f"Seed failed: {exc}", file=sys.stderr)
        return 1
    return 0


if __name__ == "__main__":
    raise SystemExit(main())