Merge pull request #3076 from vinta/chore/code-cleanup

chore: simplify website/ Python and polish sponsors section
This commit is contained in:
Vinta Chen
2026-04-19 22:58:38 +08:00
committed by GitHub
7 changed files with 118 additions and 192 deletions

View File

@@ -4,20 +4,12 @@
import json
import re
import shutil
from datetime import datetime, timezone
from datetime import UTC, datetime
from pathlib import Path
from typing import TypedDict
from typing import Any
from jinja2 import Environment, FileSystemLoader
from readme_parser import parse_readme, parse_sponsors
class StarData(TypedDict):
stars: int
owner: str
last_commit_at: str
fetched_at: str
from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors
GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
@@ -46,7 +38,7 @@ def extract_github_repo(url: str) -> str | None:
return m.group(1) if m else None
def load_stars(path: Path) -> dict[str, StarData]:
def load_stars(path: Path) -> dict[str, dict]:
"""Load star data from JSON. Returns empty dict if file doesn't exist or is corrupt."""
if path.exists():
try:
@@ -76,68 +68,55 @@ def sort_entries(entries: list[dict]) -> list[dict]:
def extract_entries(
categories: list[dict],
groups: list[dict],
categories: list[ParsedSection],
groups: list[ParsedGroup],
) -> list[dict]:
"""Flatten categories into individual library entries for table display.
Entries appearing in multiple categories are merged into a single entry
with lists of categories and groups.
"""
cat_to_group: dict[str, str] = {}
for group in groups:
for cat in group["categories"]:
cat_to_group[cat["name"]] = group["name"]
cat_to_group = {cat["name"]: group["name"] for group in groups for cat in group["categories"]}
seen: dict[tuple[str, str], dict] = {} # (url, name) -> entry
entries: list[dict] = []
seen: dict[tuple[str, str], dict[str, Any]] = {} # (url, name) -> entry
entries: list[dict[str, Any]] = []
for cat in categories:
group_name = cat_to_group.get(cat["name"], "Other")
for entry in cat["entries"]:
url = entry["url"]
key = (url, entry["name"])
if key in seen:
existing = seen[key]
if cat["name"] not in existing["categories"]:
existing["categories"].append(cat["name"])
if group_name not in existing["groups"]:
existing["groups"].append(group_name)
subcat = entry["subcategory"]
if subcat:
scoped = f"{cat['name']} > {subcat}"
if not any(s["value"] == scoped for s in existing["subcategories"]):
existing["subcategories"].append({"name": subcat, "value": scoped})
else:
merged = {
key = (entry["url"], entry["name"])
existing: dict[str, Any] | None = seen.get(key)
if existing is None:
existing = {
"name": entry["name"],
"url": url,
"url": entry["url"],
"description": entry["description"],
"categories": [cat["name"]],
"groups": [group_name],
"subcategories": [{"name": entry["subcategory"], "value": f"{cat['name']} > {entry['subcategory']}"}] if entry["subcategory"] else [],
"categories": [],
"groups": [],
"subcategories": [],
"stars": None,
"owner": None,
"last_commit_at": None,
"source_type": detect_source_type(url),
"source_type": detect_source_type(entry["url"]),
"also_see": entry["also_see"],
}
seen[key] = merged
entries.append(merged)
seen[key] = existing
entries.append(existing)
if cat["name"] not in existing["categories"]:
existing["categories"].append(cat["name"])
if group_name not in existing["groups"]:
existing["groups"].append(group_name)
subcat = entry["subcategory"]
if subcat:
scoped = f"{cat['name']} > {subcat}"
if not any(s["value"] == scoped for s in existing["subcategories"]):
existing["subcategories"].append({"name": subcat, "value": scoped})
return entries
def format_stars_short(stars: int) -> str:
"""Format star count as compact string like '230k'."""
if stars >= 1000:
return f"{stars // 1000}k"
return str(stars)
def build(repo_root: str) -> None:
def build(repo_root: Path) -> None:
"""Main build: parse README, render single-page HTML via Jinja2 templates."""
repo = Path(repo_root)
website = repo / "website"
readme_text = (repo / "README.md").read_text(encoding="utf-8")
website = repo_root / "website"
readme_text = (repo_root / "README.md").read_text(encoding="utf-8")
subtitle = ""
for line in readme_text.split("\n"):
@@ -156,7 +135,10 @@ def build(repo_root: str) -> None:
stars_data = load_stars(website / "data" / "github_stars.json")
repo_self = stars_data.get("vinta/awesome-python", {})
repo_stars = format_stars_short(repo_self["stars"]) if "stars" in repo_self else None
repo_stars = None
if "stars" in repo_self:
stars_val = repo_self["stars"]
repo_stars = f"{stars_val // 1000}k" if stars_val >= 1000 else str(stars_val)
for entry in entries:
repo_key = extract_github_repo(entry["url"])
@@ -189,7 +171,7 @@ def build(repo_root: str) -> None:
total_entries=total_entries,
total_categories=len(categories),
repo_stars=repo_stars,
build_date=datetime.now(timezone.utc).strftime("%B %d, %Y"),
build_date=datetime.now(UTC).strftime("%B %d, %Y"),
sponsors=sponsors,
),
encoding="utf-8",
@@ -208,4 +190,4 @@ def build(repo_root: str) -> None:
if __name__ == "__main__":
build(str(Path(__file__).parent.parent))
build(Path(__file__).parent.parent)

View File

@@ -5,7 +5,9 @@ import json
import os
import re
import sys
from datetime import datetime, timezone
from collections.abc import Sequence
from datetime import UTC, datetime, timedelta
from itertools import batched
from pathlib import Path
import httpx
@@ -44,10 +46,8 @@ def save_cache(cache: dict) -> None:
)
def build_graphql_query(repos: list[str]) -> str:
def build_graphql_query(repos: Sequence[str]) -> str:
"""Build a GraphQL query with aliases for up to 100 repos."""
if not repos:
return ""
parts = []
for i, repo in enumerate(repos):
owner, name = repo.split("/", 1)
@@ -64,7 +64,7 @@ def build_graphql_query(repos: list[str]) -> str:
def parse_graphql_response(
data: dict,
repos: list[str],
repos: Sequence[str],
) -> dict[str, dict]:
"""Parse GraphQL response into {owner/repo: {stars, owner}} dict."""
result = {}
@@ -82,9 +82,7 @@ def parse_graphql_response(
return result
def fetch_batch(
repos: list[str], *, client: httpx.Client,
) -> dict[str, dict]:
def fetch_batch(repos: Sequence[str], client: httpx.Client) -> dict[str, dict]:
"""Fetch star data for a batch of repos via GitHub GraphQL API."""
query = build_graphql_query(repos)
if not query:
@@ -112,7 +110,7 @@ def main() -> None:
print(f"Found {len(current_repos)} GitHub repos in README.md")
cache = load_stars(CACHE_FILE)
now = datetime.now(timezone.utc)
now = datetime.now(UTC)
# Prune entries not in current README
pruned = {k: v for k, v in cache.items() if k in current_repos}
@@ -121,13 +119,13 @@ def main() -> None:
cache = pruned
# Determine which repos need fetching (missing or stale)
max_age = timedelta(hours=CACHE_MAX_AGE_HOURS)
to_fetch = []
for repo in sorted(current_repos):
entry = cache.get(repo)
if entry and "fetched_at" in entry:
fetched = datetime.fromisoformat(entry["fetched_at"])
age_hours = (now - fetched).total_seconds() / 3600
if age_hours < CACHE_MAX_AGE_HOURS:
if now - fetched < max_age:
continue
to_fetch.append(repo)
@@ -150,13 +148,11 @@ def main() -> None:
transport=httpx.HTTPTransport(retries=2),
timeout=30,
) as client:
for i in range(0, len(to_fetch), BATCH_SIZE):
batch = to_fetch[i : i + BATCH_SIZE]
batch_num = i // BATCH_SIZE + 1
for batch_num, batch in enumerate(batched(to_fetch, BATCH_SIZE), 1):
print(f"Fetching batch {batch_num}/{total_batches} ({len(batch)} repos)...")
try:
results = fetch_batch(batch, client=client)
results = fetch_batch(batch, client)
except httpx.HTTPStatusError as e:
print(f"HTTP error {e.response.status_code}", file=sys.stderr)
if e.response.status_code == 401:

View File

@@ -62,46 +62,44 @@ def slugify(name: str) -> str:
# --- Inline renderers -------------------------------------------------------
def render_inline_html(children: list[SyntaxTreeNode]) -> str:
"""Render inline AST nodes to HTML with proper escaping."""
def _render_inline(children: list[SyntaxTreeNode], *, html: bool) -> str:
"""Render inline AST nodes to HTML or plain text."""
parts: list[str] = []
for child in children:
match child.type:
case "text":
parts.append(str(escape(child.content)))
parts.append(str(escape(child.content)) if html else child.content)
case "html_inline":
if html:
parts.append(str(escape(child.content)))
case "softbreak":
parts.append(" ")
case "link":
href = str(escape(child.attrGet("href") or ""))
inner = render_inline_html(child.children)
parts.append(
f'<a href="{href}" target="_blank" rel="noopener">{inner}</a>'
)
case "em":
parts.append(f"<em>{render_inline_html(child.children)}</em>")
case "strong":
parts.append(f"<strong>{render_inline_html(child.children)}</strong>")
case "code_inline":
parts.append(f"<code>{escape(child.content)}</code>")
case "html_inline":
parts.append(str(escape(child.content)))
parts.append(f"<code>{escape(child.content)}</code>" if html else child.content)
case "link":
inner = _render_inline(child.children, html=html)
if html:
href = str(escape(_href(child)))
parts.append(f'<a href="{href}" target="_blank" rel="noopener">{inner}</a>')
else:
parts.append(inner)
case "em":
inner = _render_inline(child.children, html=html)
parts.append(f"<em>{inner}</em>" if html else inner)
case "strong":
inner = _render_inline(child.children, html=html)
parts.append(f"<strong>{inner}</strong>" if html else inner)
return "".join(parts)
def render_inline_html(children: list[SyntaxTreeNode]) -> str:
"""Render inline AST nodes to HTML with proper escaping."""
return _render_inline(children, html=True)
def render_inline_text(children: list[SyntaxTreeNode]) -> str:
"""Render inline AST nodes to plain text (links become their text)."""
parts: list[str] = []
for child in children:
match child.type:
case "text":
parts.append(child.content)
case "softbreak":
parts.append(" ")
case "code_inline":
parts.append(child.content)
case "em" | "strong" | "link":
parts.append(render_inline_text(child.children))
return "".join(parts)
return _render_inline(children, html=False)
# --- AST helpers -------------------------------------------------------------
@@ -147,6 +145,12 @@ def _find_child(node: SyntaxTreeNode, child_type: str) -> SyntaxTreeNode | None:
return None
def _href(link: SyntaxTreeNode) -> str:
"""Return the link's href attribute as a string, or '' if missing."""
href = link.attrGet("href")
return href if isinstance(href, str) else ""
def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None:
"""Find the inline node in a list_item's paragraph."""
para = _find_child(node, "paragraph")
@@ -155,19 +159,6 @@ def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None:
return _find_child(para, "inline")
def _find_first_link(inline: SyntaxTreeNode) -> SyntaxTreeNode | None:
"""Find the first link node among inline children."""
for child in inline.children:
if child.type == "link":
return child
return None
def _is_leading_link(inline: SyntaxTreeNode, link: SyntaxTreeNode) -> bool:
"""Check if the link is the first child of inline (a real entry, not a subcategory label)."""
return bool(inline.children) and inline.children[0] is link
def _extract_description_html(inline: SyntaxTreeNode, first_link: SyntaxTreeNode) -> str:
"""Extract description HTML from inline content after the first link.
@@ -206,9 +197,9 @@ def _parse_list_entries(
if inline is None:
continue
first_link = _find_first_link(inline)
first_link = _find_child(inline, "link")
if first_link is None or not _is_leading_link(inline, first_link):
if first_link is None or inline.children[0] is not first_link:
# Subcategory label: take text before the first link, strip trailing separators
pre_link = []
for child in inline.children:
@@ -223,7 +214,7 @@ def _parse_list_entries(
# Entry with a link
name = render_inline_text(first_link.children)
url = first_link.attrGet("href") or ""
url = _href(first_link)
desc_html = _extract_description_html(inline, first_link)
# Collect also_see from nested bullet_list
@@ -235,11 +226,11 @@ def _parse_list_entries(
continue
sub_inline = _find_inline(sub_item)
if sub_inline:
sub_link = _find_first_link(sub_inline)
sub_link = _find_child(sub_inline, "link")
if sub_link:
also_see.append(AlsoSee(
name=render_inline_text(sub_link.children),
url=sub_link.attrGet("href") or "",
url=_href(sub_link),
))
entries.append(ParsedEntry(
@@ -324,16 +315,13 @@ def _parse_grouped_sections(
def flush_group() -> None:
nonlocal current_group_name, current_group_cats
if not current_group_cats:
current_group_name = None
current_group_cats = []
return
name = current_group_name or "Other"
groups.append(ParsedGroup(
name=name,
slug=slugify(name),
categories=list(current_group_cats),
))
if current_group_cats:
name = current_group_name or "Other"
groups.append(ParsedGroup(
name=name,
slug=slugify(name),
categories=list(current_group_cats),
))
current_group_name = None
current_group_cats = []
@@ -372,22 +360,17 @@ def _find_link_deep(node: SyntaxTreeNode) -> SyntaxTreeNode | None:
def _parse_sponsor_item(inline: SyntaxTreeNode) -> ParsedSponsor | None:
"""Parse `**[name](url)**: description` (or `[name](url) - description`)."""
link = _find_link_deep(inline)
if link is None:
return None
name = render_inline_text(link.children)
url = link.attrGet("href") or ""
split_idx = None
for i, child in enumerate(inline.children):
if child is link or _find_link_deep(child) is link:
split_idx = i
break
if split_idx is None:
return None
desc_html = render_inline_html(inline.children[split_idx + 1 :])
desc_html = _SPONSOR_SEP_RE.sub("", desc_html)
return ParsedSponsor(name=name, url=url, description=desc_html)
for split_idx, child in enumerate(inline.children):
link = child if child.type == "link" else _find_link_deep(child)
if link is None:
continue
desc_html = render_inline_html(inline.children[split_idx + 1 :])
return ParsedSponsor(
name=render_inline_text(link.children),
url=_href(link),
description=_SPONSOR_SEP_RE.sub("", desc_html),
)
return None
def parse_sponsors(text: str) -> list[ParsedSponsor]:

View File

@@ -294,10 +294,6 @@ kbd {
color: var(--hero-kicker);
}
.section-label {
color: var(--accent-deep);
}
.hero h1 {
font-family: var(--font-display);
font-size: clamp(4.5rem, 11vw, 8.5rem);
@@ -414,35 +410,26 @@ kbd {
.sponsor-meta .section-label {
margin-bottom: 0;
font-size: var(--text-lg);
}
.sponsor-become {
display: inline-flex;
align-items: center;
gap: 0.4rem;
align-self: start;
color: var(--ink-soft);
font-size: var(--text-sm);
font-weight: 700;
letter-spacing: 0.01em;
border-bottom: 1px solid var(--line-strong);
padding-bottom: 0.2rem;
text-decoration: underline;
text-decoration-color: var(--line-strong);
text-underline-offset: 0.2em;
transition:
color 180ms ease,
border-color 180ms ease;
text-decoration-color 180ms ease;
}
.sponsor-become:hover {
color: var(--accent-deep);
border-bottom-color: var(--accent);
}
.sponsor-become-arrow {
transition: transform 180ms cubic-bezier(0.22, 1, 0.36, 1);
}
.sponsor-become:hover .sponsor-become-arrow {
transform: translateX(0.3rem);
text-decoration-color: var(--accent-underline);
}
.sponsor-list {

View File

@@ -77,7 +77,6 @@
rel="noopener"
>
Become a sponsor
<span class="sponsor-become-arrow" aria-hidden="true">&rarr;</span>
</a>
</header>
<ul class="sponsor-list">

View File

@@ -10,7 +10,6 @@ from build import (
detect_source_type,
extract_entries,
extract_github_repo,
format_stars_short,
load_stars,
sort_entries,
)
@@ -108,7 +107,7 @@ class TestBuild:
Help!
""")
self._make_repo(tmp_path, readme)
build(str(tmp_path))
build(tmp_path)
site = tmp_path / "website" / "output"
assert (site / "index.html").exists()
@@ -135,7 +134,7 @@ class TestBuild:
stale.mkdir(parents=True)
(stale / "index.html").write_text("old", encoding="utf-8")
build(str(tmp_path))
build(tmp_path)
assert not (tmp_path / "website" / "output" / "categories" / "stale").exists()
@@ -162,7 +161,7 @@ class TestBuild:
Done.
""")
self._make_repo(tmp_path, readme)
build(str(tmp_path))
build(tmp_path)
index_html = (tmp_path / "website" / "output" / "index.html").read_text()
assert "Alpha" in index_html
@@ -186,7 +185,7 @@ class TestBuild:
Done.
""")
self._make_repo(tmp_path, readme)
build(str(tmp_path))
build(tmp_path)
index_html = (tmp_path / "website" / "output" / "index.html").read_text()
assert "django" in index_html
@@ -224,7 +223,7 @@ class TestBuild:
}
(data_dir / "github_stars.json").write_text(json.dumps(stars), encoding="utf-8")
build(str(tmp_path))
build(tmp_path)
html = (tmp_path / "website" / "output" / "index.html").read_text(encoding="utf-8")
# Star-sorted: high-stars (5000) before low-stars (100) before no-stars (None)
@@ -363,25 +362,6 @@ class TestDetectSourceType:
assert detect_source_type("https://github.com/org/repo/wiki") is None
# ---------------------------------------------------------------------------
# format_stars_short
# ---------------------------------------------------------------------------
class TestFormatStarsShort:
def test_under_1000(self):
assert format_stars_short(500) == "500"
def test_exactly_1000(self):
assert format_stars_short(1000) == "1k"
def test_large_number(self):
assert format_stars_short(52000) == "52k"
def test_zero(self):
assert format_stars_short(0) == "0"
# ---------------------------------------------------------------------------
# extract_entries
# ---------------------------------------------------------------------------

View File

@@ -1,7 +1,7 @@
"""Tests for the readme_parser module."""
import os
import textwrap
from pathlib import Path
import pytest
@@ -437,9 +437,8 @@ class TestParseSectionEntries:
class TestParseRealReadme:
@pytest.fixture(autouse=True)
def load_readme(self):
readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
with open(readme_path, encoding="utf-8") as f:
self.readme_text = f.read()
readme_path = Path(__file__).resolve().parents[2] / "README.md"
self.readme_text = readme_path.read_text(encoding="utf-8")
self.groups = parse_readme(self.readme_text)
self.cats = [c for g in self.groups for c in g["categories"]]