Merge pull request #3076 from vinta/chore/code-cleanup

chore: simplify website/ Python and polish sponsors section
2026-05-07 14:17:36 -05:00 · 2026-04-19 22:58:38 +08:00
parent a4b7fc6878 35aee20aa9
commit cfcc564120
7 changed files with 118 additions and 192 deletions
--- a/website/build.py
+++ b/website/build.py
@@ -4,20 +4,12 @@
 import json
 import re
 import shutil
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from pathlib import Path
-from typing import TypedDict
+from typing import Any

 from jinja2 import Environment, FileSystemLoader
-from readme_parser import parse_readme, parse_sponsors
-
-
-class StarData(TypedDict):
-    stars: int
-    owner: str
-    last_commit_at: str
-    fetched_at: str
-
+from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors

 GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")

@@ -46,7 +38,7 @@ def extract_github_repo(url: str) -> str | None:
    return m.group(1) if m else None


-def load_stars(path: Path) -> dict[str, StarData]:
+def load_stars(path: Path) -> dict[str, dict]:
    """Load star data from JSON. Returns empty dict if file doesn't exist or is corrupt."""
    if path.exists():
        try:
@@ -76,68 +68,55 @@ def sort_entries(entries: list[dict]) -> list[dict]:


 def extract_entries(
-    categories: list[dict],
-    groups: list[dict],
+    categories: list[ParsedSection],
+    groups: list[ParsedGroup],
 ) -> list[dict]:
    """Flatten categories into individual library entries for table display.

    Entries appearing in multiple categories are merged into a single entry
    with lists of categories and groups.
    """
-    cat_to_group: dict[str, str] = {}
-    for group in groups:
-        for cat in group["categories"]:
-            cat_to_group[cat["name"]] = group["name"]
+    cat_to_group = {cat["name"]: group["name"] for group in groups for cat in group["categories"]}

-    seen: dict[tuple[str, str], dict] = {}  # (url, name) -> entry
-    entries: list[dict] = []
+    seen: dict[tuple[str, str], dict[str, Any]] = {}  # (url, name) -> entry
+    entries: list[dict[str, Any]] = []
    for cat in categories:
        group_name = cat_to_group.get(cat["name"], "Other")
        for entry in cat["entries"]:
-            url = entry["url"]
-            key = (url, entry["name"])
-            if key in seen:
-                existing = seen[key]
-                if cat["name"] not in existing["categories"]:
-                    existing["categories"].append(cat["name"])
-                if group_name not in existing["groups"]:
-                    existing["groups"].append(group_name)
-                subcat = entry["subcategory"]
-                if subcat:
-                    scoped = f"{cat['name']} > {subcat}"
-                    if not any(s["value"] == scoped for s in existing["subcategories"]):
-                        existing["subcategories"].append({"name": subcat, "value": scoped})
-            else:
-                merged = {
+            key = (entry["url"], entry["name"])
+            existing: dict[str, Any] | None = seen.get(key)
+            if existing is None:
+                existing = {
                    "name": entry["name"],
-                    "url": url,
+                    "url": entry["url"],
                    "description": entry["description"],
-                    "categories": [cat["name"]],
-                    "groups": [group_name],
-                    "subcategories": [{"name": entry["subcategory"], "value": f"{cat['name']} > {entry['subcategory']}"}] if entry["subcategory"] else [],
+                    "categories": [],
+                    "groups": [],
+                    "subcategories": [],
                    "stars": None,
                    "owner": None,
                    "last_commit_at": None,
-                    "source_type": detect_source_type(url),
+                    "source_type": detect_source_type(entry["url"]),
                    "also_see": entry["also_see"],
                }
-                seen[key] = merged
-                entries.append(merged)
+                seen[key] = existing
+                entries.append(existing)
+            if cat["name"] not in existing["categories"]:
+                existing["categories"].append(cat["name"])
+            if group_name not in existing["groups"]:
+                existing["groups"].append(group_name)
+            subcat = entry["subcategory"]
+            if subcat:
+                scoped = f"{cat['name']} > {subcat}"
+                if not any(s["value"] == scoped for s in existing["subcategories"]):
+                    existing["subcategories"].append({"name": subcat, "value": scoped})
    return entries


-def format_stars_short(stars: int) -> str:
-    """Format star count as compact string like '230k'."""
-    if stars >= 1000:
-        return f"{stars // 1000}k"
-    return str(stars)
-
-
-def build(repo_root: str) -> None:
+def build(repo_root: Path) -> None:
    """Main build: parse README, render single-page HTML via Jinja2 templates."""
-    repo = Path(repo_root)
-    website = repo / "website"
-    readme_text = (repo / "README.md").read_text(encoding="utf-8")
+    website = repo_root / "website"
+    readme_text = (repo_root / "README.md").read_text(encoding="utf-8")

    subtitle = ""
    for line in readme_text.split("\n"):
@@ -156,7 +135,10 @@ def build(repo_root: str) -> None:
    stars_data = load_stars(website / "data" / "github_stars.json")

    repo_self = stars_data.get("vinta/awesome-python", {})
-    repo_stars = format_stars_short(repo_self["stars"]) if "stars" in repo_self else None
+    repo_stars = None
+    if "stars" in repo_self:
+        stars_val = repo_self["stars"]
+        repo_stars = f"{stars_val // 1000}k" if stars_val >= 1000 else str(stars_val)

    for entry in entries:
        repo_key = extract_github_repo(entry["url"])
@@ -189,7 +171,7 @@ def build(repo_root: str) -> None:
            total_entries=total_entries,
            total_categories=len(categories),
            repo_stars=repo_stars,
-            build_date=datetime.now(timezone.utc).strftime("%B %d, %Y"),
+            build_date=datetime.now(UTC).strftime("%B %d, %Y"),
            sponsors=sponsors,
        ),
        encoding="utf-8",
@@ -208,4 +190,4 @@ def build(repo_root: str) -> None:


 if __name__ == "__main__":
-    build(str(Path(__file__).parent.parent))
+    build(Path(__file__).parent.parent)
--- a/website/fetch_github_stars.py
+++ b/website/fetch_github_stars.py
@@ -5,7 +5,9 @@ import json
 import os
 import re
 import sys
-from datetime import datetime, timezone
+from collections.abc import Sequence
+from datetime import UTC, datetime, timedelta
+from itertools import batched
 from pathlib import Path

 import httpx
@@ -44,10 +46,8 @@ def save_cache(cache: dict) -> None:
    )


-def build_graphql_query(repos: list[str]) -> str:
+def build_graphql_query(repos: Sequence[str]) -> str:
    """Build a GraphQL query with aliases for up to 100 repos."""
-    if not repos:
-        return ""
    parts = []
    for i, repo in enumerate(repos):
        owner, name = repo.split("/", 1)
@@ -64,7 +64,7 @@ def build_graphql_query(repos: list[str]) -> str:

 def parse_graphql_response(
    data: dict,
-    repos: list[str],
+    repos: Sequence[str],
 ) -> dict[str, dict]:
    """Parse GraphQL response into {owner/repo: {stars, owner}} dict."""
    result = {}
@@ -82,9 +82,7 @@ def parse_graphql_response(
    return result


-def fetch_batch(
-    repos: list[str], *, client: httpx.Client,
-) -> dict[str, dict]:
+def fetch_batch(repos: Sequence[str], client: httpx.Client) -> dict[str, dict]:
    """Fetch star data for a batch of repos via GitHub GraphQL API."""
    query = build_graphql_query(repos)
    if not query:
@@ -112,7 +110,7 @@ def main() -> None:
    print(f"Found {len(current_repos)} GitHub repos in README.md")

    cache = load_stars(CACHE_FILE)
-    now = datetime.now(timezone.utc)
+    now = datetime.now(UTC)

    # Prune entries not in current README
    pruned = {k: v for k, v in cache.items() if k in current_repos}
@@ -121,13 +119,13 @@ def main() -> None:
    cache = pruned

    # Determine which repos need fetching (missing or stale)
+    max_age = timedelta(hours=CACHE_MAX_AGE_HOURS)
    to_fetch = []
    for repo in sorted(current_repos):
        entry = cache.get(repo)
        if entry and "fetched_at" in entry:
            fetched = datetime.fromisoformat(entry["fetched_at"])
-            age_hours = (now - fetched).total_seconds() / 3600
-            if age_hours < CACHE_MAX_AGE_HOURS:
+            if now - fetched < max_age:
                continue
        to_fetch.append(repo)

@@ -150,13 +148,11 @@ def main() -> None:
        transport=httpx.HTTPTransport(retries=2),
        timeout=30,
    ) as client:
-        for i in range(0, len(to_fetch), BATCH_SIZE):
-            batch = to_fetch[i : i + BATCH_SIZE]
-            batch_num = i // BATCH_SIZE + 1
+        for batch_num, batch in enumerate(batched(to_fetch, BATCH_SIZE), 1):
            print(f"Fetching batch {batch_num}/{total_batches} ({len(batch)} repos)...")

            try:
-                results = fetch_batch(batch, client=client)
+                results = fetch_batch(batch, client)
            except httpx.HTTPStatusError as e:
                print(f"HTTP error {e.response.status_code}", file=sys.stderr)
                if e.response.status_code == 401:
--- a/website/readme_parser.py
+++ b/website/readme_parser.py
@@ -62,46 +62,44 @@ def slugify(name: str) -> str:
 # --- Inline renderers -------------------------------------------------------


-def render_inline_html(children: list[SyntaxTreeNode]) -> str:
-    """Render inline AST nodes to HTML with proper escaping."""
+def _render_inline(children: list[SyntaxTreeNode], *, html: bool) -> str:
+    """Render inline AST nodes to HTML or plain text."""
    parts: list[str] = []
    for child in children:
        match child.type:
            case "text":
-                parts.append(str(escape(child.content)))
+                parts.append(str(escape(child.content)) if html else child.content)
+            case "html_inline":
+                if html:
+                    parts.append(str(escape(child.content)))
            case "softbreak":
                parts.append(" ")
-            case "link":
-                href = str(escape(child.attrGet("href") or ""))
-                inner = render_inline_html(child.children)
-                parts.append(
-                    f'<a href="{href}" target="_blank" rel="noopener">{inner}</a>'
-                )
-            case "em":
-                parts.append(f"<em>{render_inline_html(child.children)}</em>")
-            case "strong":
-                parts.append(f"<strong>{render_inline_html(child.children)}</strong>")
            case "code_inline":
-                parts.append(f"<code>{escape(child.content)}</code>")
-            case "html_inline":
-                parts.append(str(escape(child.content)))
+                parts.append(f"<code>{escape(child.content)}</code>" if html else child.content)
+            case "link":
+                inner = _render_inline(child.children, html=html)
+                if html:
+                    href = str(escape(_href(child)))
+                    parts.append(f'<a href="{href}" target="_blank" rel="noopener">{inner}</a>')
+                else:
+                    parts.append(inner)
+            case "em":
+                inner = _render_inline(child.children, html=html)
+                parts.append(f"<em>{inner}</em>" if html else inner)
+            case "strong":
+                inner = _render_inline(child.children, html=html)
+                parts.append(f"<strong>{inner}</strong>" if html else inner)
    return "".join(parts)


+def render_inline_html(children: list[SyntaxTreeNode]) -> str:
+    """Render inline AST nodes to HTML with proper escaping."""
+    return _render_inline(children, html=True)
+
+
 def render_inline_text(children: list[SyntaxTreeNode]) -> str:
    """Render inline AST nodes to plain text (links become their text)."""
-    parts: list[str] = []
-    for child in children:
-        match child.type:
-            case "text":
-                parts.append(child.content)
-            case "softbreak":
-                parts.append(" ")
-            case "code_inline":
-                parts.append(child.content)
-            case "em" | "strong" | "link":
-                parts.append(render_inline_text(child.children))
-    return "".join(parts)
+    return _render_inline(children, html=False)


 # --- AST helpers -------------------------------------------------------------
@@ -147,6 +145,12 @@ def _find_child(node: SyntaxTreeNode, child_type: str) -> SyntaxTreeNode | None:
    return None


+def _href(link: SyntaxTreeNode) -> str:
+    """Return the link's href attribute as a string, or '' if missing."""
+    href = link.attrGet("href")
+    return href if isinstance(href, str) else ""
+
+
 def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None:
    """Find the inline node in a list_item's paragraph."""
    para = _find_child(node, "paragraph")
@@ -155,19 +159,6 @@ def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None:
    return _find_child(para, "inline")


-def _find_first_link(inline: SyntaxTreeNode) -> SyntaxTreeNode | None:
-    """Find the first link node among inline children."""
-    for child in inline.children:
-        if child.type == "link":
-            return child
-    return None
-
-
-def _is_leading_link(inline: SyntaxTreeNode, link: SyntaxTreeNode) -> bool:
-    """Check if the link is the first child of inline (a real entry, not a subcategory label)."""
-    return bool(inline.children) and inline.children[0] is link
-
-
 def _extract_description_html(inline: SyntaxTreeNode, first_link: SyntaxTreeNode) -> str:
    """Extract description HTML from inline content after the first link.

@@ -206,9 +197,9 @@ def _parse_list_entries(
        if inline is None:
            continue

-        first_link = _find_first_link(inline)
+        first_link = _find_child(inline, "link")

-        if first_link is None or not _is_leading_link(inline, first_link):
+        if first_link is None or inline.children[0] is not first_link:
            # Subcategory label: take text before the first link, strip trailing separators
            pre_link = []
            for child in inline.children:
@@ -223,7 +214,7 @@ def _parse_list_entries(

        # Entry with a link
        name = render_inline_text(first_link.children)
-        url = first_link.attrGet("href") or ""
+        url = _href(first_link)
        desc_html = _extract_description_html(inline, first_link)

        # Collect also_see from nested bullet_list
@@ -235,11 +226,11 @@ def _parse_list_entries(
                    continue
                sub_inline = _find_inline(sub_item)
                if sub_inline:
-                    sub_link = _find_first_link(sub_inline)
+                    sub_link = _find_child(sub_inline, "link")
                    if sub_link:
                        also_see.append(AlsoSee(
                            name=render_inline_text(sub_link.children),
-                            url=sub_link.attrGet("href") or "",
+                            url=_href(sub_link),
                        ))

        entries.append(ParsedEntry(
@@ -324,16 +315,13 @@ def _parse_grouped_sections(

    def flush_group() -> None:
        nonlocal current_group_name, current_group_cats
-        if not current_group_cats:
-            current_group_name = None
-            current_group_cats = []
-            return
-        name = current_group_name or "Other"
-        groups.append(ParsedGroup(
-            name=name,
-            slug=slugify(name),
-            categories=list(current_group_cats),
-        ))
+        if current_group_cats:
+            name = current_group_name or "Other"
+            groups.append(ParsedGroup(
+                name=name,
+                slug=slugify(name),
+                categories=list(current_group_cats),
+            ))
        current_group_name = None
        current_group_cats = []

@@ -372,22 +360,17 @@ def _find_link_deep(node: SyntaxTreeNode) -> SyntaxTreeNode | None:

 def _parse_sponsor_item(inline: SyntaxTreeNode) -> ParsedSponsor | None:
    """Parse `**[name](url)**: description` (or `[name](url) - description`)."""
-    link = _find_link_deep(inline)
-    if link is None:
-        return None
-    name = render_inline_text(link.children)
-    url = link.attrGet("href") or ""
-
-    split_idx = None
-    for i, child in enumerate(inline.children):
-        if child is link or _find_link_deep(child) is link:
-            split_idx = i
-            break
-    if split_idx is None:
-        return None
-    desc_html = render_inline_html(inline.children[split_idx + 1 :])
-    desc_html = _SPONSOR_SEP_RE.sub("", desc_html)
-    return ParsedSponsor(name=name, url=url, description=desc_html)
+    for split_idx, child in enumerate(inline.children):
+        link = child if child.type == "link" else _find_link_deep(child)
+        if link is None:
+            continue
+        desc_html = render_inline_html(inline.children[split_idx + 1 :])
+        return ParsedSponsor(
+            name=render_inline_text(link.children),
+            url=_href(link),
+            description=_SPONSOR_SEP_RE.sub("", desc_html),
+        )
+    return None


 def parse_sponsors(text: str) -> list[ParsedSponsor]:
--- a/website/static/style.css
+++ b/website/static/style.css
@@ -294,10 +294,6 @@ kbd {
  color: var(--hero-kicker);
 }

-.section-label {
-  color: var(--accent-deep);
-}
-
 .hero h1 {
  font-family: var(--font-display);
  font-size: clamp(4.5rem, 11vw, 8.5rem);
@@ -414,35 +410,26 @@ kbd {

 .sponsor-meta .section-label {
  margin-bottom: 0;
+  font-size: var(--text-lg);
 }

 .sponsor-become {
-  display: inline-flex;
-  align-items: center;
-  gap: 0.4rem;
  align-self: start;
  color: var(--ink-soft);
  font-size: var(--text-sm);
  font-weight: 700;
  letter-spacing: 0.01em;
-  border-bottom: 1px solid var(--line-strong);
-  padding-bottom: 0.2rem;
+  text-decoration: underline;
+  text-decoration-color: var(--line-strong);
+  text-underline-offset: 0.2em;
  transition:
    color 180ms ease,
-    border-color 180ms ease;
+    text-decoration-color 180ms ease;
 }

 .sponsor-become:hover {
  color: var(--accent-deep);
-  border-bottom-color: var(--accent);
-}
-
-.sponsor-become-arrow {
-  transition: transform 180ms cubic-bezier(0.22, 1, 0.36, 1);
-}
-
-.sponsor-become:hover .sponsor-become-arrow {
-  transform: translateX(0.3rem);
+  text-decoration-color: var(--accent-underline);
 }

 .sponsor-list {
--- a/website/templates/index.html
+++ b/website/templates/index.html
@@ -77,7 +77,6 @@
        rel="noopener"
      >
        Become a sponsor
-        <span class="sponsor-become-arrow" aria-hidden="true">&rarr;</span>
      </a>
    </header>
    <ul class="sponsor-list">
--- a/website/tests/test_build.py
+++ b/website/tests/test_build.py
@@ -10,7 +10,6 @@ from build import (
    detect_source_type,
    extract_entries,
    extract_github_repo,
-    format_stars_short,
    load_stars,
    sort_entries,
 )
@@ -108,7 +107,7 @@ class TestBuild:
            Help!
        """)
        self._make_repo(tmp_path, readme)
-        build(str(tmp_path))
+        build(tmp_path)

        site = tmp_path / "website" / "output"
        assert (site / "index.html").exists()
@@ -135,7 +134,7 @@ class TestBuild:
        stale.mkdir(parents=True)
        (stale / "index.html").write_text("old", encoding="utf-8")

-        build(str(tmp_path))
+        build(tmp_path)

        assert not (tmp_path / "website" / "output" / "categories" / "stale").exists()

@@ -162,7 +161,7 @@ class TestBuild:
            Done.
        """)
        self._make_repo(tmp_path, readme)
-        build(str(tmp_path))
+        build(tmp_path)

        index_html = (tmp_path / "website" / "output" / "index.html").read_text()
        assert "Alpha" in index_html
@@ -186,7 +185,7 @@ class TestBuild:
            Done.
        """)
        self._make_repo(tmp_path, readme)
-        build(str(tmp_path))
+        build(tmp_path)

        index_html = (tmp_path / "website" / "output" / "index.html").read_text()
        assert "django" in index_html
@@ -224,7 +223,7 @@ class TestBuild:
        }
        (data_dir / "github_stars.json").write_text(json.dumps(stars), encoding="utf-8")

-        build(str(tmp_path))
+        build(tmp_path)

        html = (tmp_path / "website" / "output" / "index.html").read_text(encoding="utf-8")
        # Star-sorted: high-stars (5000) before low-stars (100) before no-stars (None)
@@ -363,25 +362,6 @@ class TestDetectSourceType:
        assert detect_source_type("https://github.com/org/repo/wiki") is None


-# ---------------------------------------------------------------------------
-# format_stars_short
-# ---------------------------------------------------------------------------
-
-
-class TestFormatStarsShort:
-    def test_under_1000(self):
-        assert format_stars_short(500) == "500"
-
-    def test_exactly_1000(self):
-        assert format_stars_short(1000) == "1k"
-
-    def test_large_number(self):
-        assert format_stars_short(52000) == "52k"
-
-    def test_zero(self):
-        assert format_stars_short(0) == "0"
-
-
 # ---------------------------------------------------------------------------
 # extract_entries
 # ---------------------------------------------------------------------------
--- a/website/tests/test_readme_parser.py
+++ b/website/tests/test_readme_parser.py
@@ -1,7 +1,7 @@
 """Tests for the readme_parser module."""

-import os
 import textwrap
+from pathlib import Path

 import pytest

@@ -437,9 +437,8 @@ class TestParseSectionEntries:
 class TestParseRealReadme:
    @pytest.fixture(autouse=True)
    def load_readme(self):
-        readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
-        with open(readme_path, encoding="utf-8") as f:
-            self.readme_text = f.read()
+        readme_path = Path(__file__).resolve().parents[2] / "README.md"
+        self.readme_text = readme_path.read_text(encoding="utf-8")
        self.groups = parse_readme(self.readme_text)
        self.cats = [c for g in self.groups for c in g["categories"]]