diff --git a/website/build.py b/website/build.py index f1d00c93..c223ef18 100644 --- a/website/build.py +++ b/website/build.py @@ -4,20 +4,12 @@ import json import re import shutil -from datetime import datetime, timezone +from datetime import UTC, datetime from pathlib import Path -from typing import TypedDict +from typing import Any from jinja2 import Environment, FileSystemLoader -from readme_parser import parse_readme, parse_sponsors - - -class StarData(TypedDict): - stars: int - owner: str - last_commit_at: str - fetched_at: str - +from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$") @@ -46,7 +38,7 @@ def extract_github_repo(url: str) -> str | None: return m.group(1) if m else None -def load_stars(path: Path) -> dict[str, StarData]: +def load_stars(path: Path) -> dict[str, dict]: """Load star data from JSON. Returns empty dict if file doesn't exist or is corrupt.""" if path.exists(): try: @@ -76,68 +68,55 @@ def sort_entries(entries: list[dict]) -> list[dict]: def extract_entries( - categories: list[dict], - groups: list[dict], + categories: list[ParsedSection], + groups: list[ParsedGroup], ) -> list[dict]: """Flatten categories into individual library entries for table display. Entries appearing in multiple categories are merged into a single entry with lists of categories and groups. """ - cat_to_group: dict[str, str] = {} - for group in groups: - for cat in group["categories"]: - cat_to_group[cat["name"]] = group["name"] + cat_to_group = {cat["name"]: group["name"] for group in groups for cat in group["categories"]} - seen: dict[tuple[str, str], dict] = {} # (url, name) -> entry - entries: list[dict] = [] + seen: dict[tuple[str, str], dict[str, Any]] = {} # (url, name) -> entry + entries: list[dict[str, Any]] = [] for cat in categories: group_name = cat_to_group.get(cat["name"], "Other") for entry in cat["entries"]: - url = entry["url"] - key = (url, entry["name"]) - if key in seen: - existing = seen[key] - if cat["name"] not in existing["categories"]: - existing["categories"].append(cat["name"]) - if group_name not in existing["groups"]: - existing["groups"].append(group_name) - subcat = entry["subcategory"] - if subcat: - scoped = f"{cat['name']} > {subcat}" - if not any(s["value"] == scoped for s in existing["subcategories"]): - existing["subcategories"].append({"name": subcat, "value": scoped}) - else: - merged = { + key = (entry["url"], entry["name"]) + existing: dict[str, Any] | None = seen.get(key) + if existing is None: + existing = { "name": entry["name"], - "url": url, + "url": entry["url"], "description": entry["description"], - "categories": [cat["name"]], - "groups": [group_name], - "subcategories": [{"name": entry["subcategory"], "value": f"{cat['name']} > {entry['subcategory']}"}] if entry["subcategory"] else [], + "categories": [], + "groups": [], + "subcategories": [], "stars": None, "owner": None, "last_commit_at": None, - "source_type": detect_source_type(url), + "source_type": detect_source_type(entry["url"]), "also_see": entry["also_see"], } - seen[key] = merged - entries.append(merged) + seen[key] = existing + entries.append(existing) + if cat["name"] not in existing["categories"]: + existing["categories"].append(cat["name"]) + if group_name not in existing["groups"]: + existing["groups"].append(group_name) + subcat = entry["subcategory"] + if subcat: + scoped = f"{cat['name']} > {subcat}" + if not any(s["value"] == scoped for s in existing["subcategories"]): + existing["subcategories"].append({"name": subcat, "value": scoped}) return entries -def format_stars_short(stars: int) -> str: - """Format star count as compact string like '230k'.""" - if stars >= 1000: - return f"{stars // 1000}k" - return str(stars) - - -def build(repo_root: str) -> None: +def build(repo_root: Path) -> None: """Main build: parse README, render single-page HTML via Jinja2 templates.""" - repo = Path(repo_root) - website = repo / "website" - readme_text = (repo / "README.md").read_text(encoding="utf-8") + website = repo_root / "website" + readme_text = (repo_root / "README.md").read_text(encoding="utf-8") subtitle = "" for line in readme_text.split("\n"): @@ -156,7 +135,10 @@ def build(repo_root: str) -> None: stars_data = load_stars(website / "data" / "github_stars.json") repo_self = stars_data.get("vinta/awesome-python", {}) - repo_stars = format_stars_short(repo_self["stars"]) if "stars" in repo_self else None + repo_stars = None + if "stars" in repo_self: + stars_val = repo_self["stars"] + repo_stars = f"{stars_val // 1000}k" if stars_val >= 1000 else str(stars_val) for entry in entries: repo_key = extract_github_repo(entry["url"]) @@ -189,7 +171,7 @@ def build(repo_root: str) -> None: total_entries=total_entries, total_categories=len(categories), repo_stars=repo_stars, - build_date=datetime.now(timezone.utc).strftime("%B %d, %Y"), + build_date=datetime.now(UTC).strftime("%B %d, %Y"), sponsors=sponsors, ), encoding="utf-8", @@ -208,4 +190,4 @@ def build(repo_root: str) -> None: if __name__ == "__main__": - build(str(Path(__file__).parent.parent)) + build(Path(__file__).parent.parent) diff --git a/website/fetch_github_stars.py b/website/fetch_github_stars.py index 741ac07d..c93ef4ec 100644 --- a/website/fetch_github_stars.py +++ b/website/fetch_github_stars.py @@ -5,7 +5,9 @@ import json import os import re import sys -from datetime import datetime, timezone +from collections.abc import Sequence +from datetime import UTC, datetime, timedelta +from itertools import batched from pathlib import Path import httpx @@ -44,10 +46,8 @@ def save_cache(cache: dict) -> None: ) -def build_graphql_query(repos: list[str]) -> str: +def build_graphql_query(repos: Sequence[str]) -> str: """Build a GraphQL query with aliases for up to 100 repos.""" - if not repos: - return "" parts = [] for i, repo in enumerate(repos): owner, name = repo.split("/", 1) @@ -64,7 +64,7 @@ def build_graphql_query(repos: list[str]) -> str: def parse_graphql_response( data: dict, - repos: list[str], + repos: Sequence[str], ) -> dict[str, dict]: """Parse GraphQL response into {owner/repo: {stars, owner}} dict.""" result = {} @@ -82,9 +82,7 @@ def parse_graphql_response( return result -def fetch_batch( - repos: list[str], *, client: httpx.Client, -) -> dict[str, dict]: +def fetch_batch(repos: Sequence[str], client: httpx.Client) -> dict[str, dict]: """Fetch star data for a batch of repos via GitHub GraphQL API.""" query = build_graphql_query(repos) if not query: @@ -112,7 +110,7 @@ def main() -> None: print(f"Found {len(current_repos)} GitHub repos in README.md") cache = load_stars(CACHE_FILE) - now = datetime.now(timezone.utc) + now = datetime.now(UTC) # Prune entries not in current README pruned = {k: v for k, v in cache.items() if k in current_repos} @@ -121,13 +119,13 @@ def main() -> None: cache = pruned # Determine which repos need fetching (missing or stale) + max_age = timedelta(hours=CACHE_MAX_AGE_HOURS) to_fetch = [] for repo in sorted(current_repos): entry = cache.get(repo) if entry and "fetched_at" in entry: fetched = datetime.fromisoformat(entry["fetched_at"]) - age_hours = (now - fetched).total_seconds() / 3600 - if age_hours < CACHE_MAX_AGE_HOURS: + if now - fetched < max_age: continue to_fetch.append(repo) @@ -150,13 +148,11 @@ def main() -> None: transport=httpx.HTTPTransport(retries=2), timeout=30, ) as client: - for i in range(0, len(to_fetch), BATCH_SIZE): - batch = to_fetch[i : i + BATCH_SIZE] - batch_num = i // BATCH_SIZE + 1 + for batch_num, batch in enumerate(batched(to_fetch, BATCH_SIZE), 1): print(f"Fetching batch {batch_num}/{total_batches} ({len(batch)} repos)...") try: - results = fetch_batch(batch, client=client) + results = fetch_batch(batch, client) except httpx.HTTPStatusError as e: print(f"HTTP error {e.response.status_code}", file=sys.stderr) if e.response.status_code == 401: diff --git a/website/readme_parser.py b/website/readme_parser.py index c736b7cc..10d26886 100644 --- a/website/readme_parser.py +++ b/website/readme_parser.py @@ -62,46 +62,44 @@ def slugify(name: str) -> str: # --- Inline renderers ------------------------------------------------------- -def render_inline_html(children: list[SyntaxTreeNode]) -> str: - """Render inline AST nodes to HTML with proper escaping.""" +def _render_inline(children: list[SyntaxTreeNode], *, html: bool) -> str: + """Render inline AST nodes to HTML or plain text.""" parts: list[str] = [] for child in children: match child.type: case "text": - parts.append(str(escape(child.content))) + parts.append(str(escape(child.content)) if html else child.content) + case "html_inline": + if html: + parts.append(str(escape(child.content))) case "softbreak": parts.append(" ") - case "link": - href = str(escape(child.attrGet("href") or "")) - inner = render_inline_html(child.children) - parts.append( - f'{inner}' - ) - case "em": - parts.append(f"{render_inline_html(child.children)}") - case "strong": - parts.append(f"{render_inline_html(child.children)}") case "code_inline": - parts.append(f"{escape(child.content)}") - case "html_inline": - parts.append(str(escape(child.content))) + parts.append(f"{escape(child.content)}" if html else child.content) + case "link": + inner = _render_inline(child.children, html=html) + if html: + href = str(escape(_href(child))) + parts.append(f'{inner}') + else: + parts.append(inner) + case "em": + inner = _render_inline(child.children, html=html) + parts.append(f"{inner}" if html else inner) + case "strong": + inner = _render_inline(child.children, html=html) + parts.append(f"{inner}" if html else inner) return "".join(parts) +def render_inline_html(children: list[SyntaxTreeNode]) -> str: + """Render inline AST nodes to HTML with proper escaping.""" + return _render_inline(children, html=True) + + def render_inline_text(children: list[SyntaxTreeNode]) -> str: """Render inline AST nodes to plain text (links become their text).""" - parts: list[str] = [] - for child in children: - match child.type: - case "text": - parts.append(child.content) - case "softbreak": - parts.append(" ") - case "code_inline": - parts.append(child.content) - case "em" | "strong" | "link": - parts.append(render_inline_text(child.children)) - return "".join(parts) + return _render_inline(children, html=False) # --- AST helpers ------------------------------------------------------------- @@ -147,6 +145,12 @@ def _find_child(node: SyntaxTreeNode, child_type: str) -> SyntaxTreeNode | None: return None +def _href(link: SyntaxTreeNode) -> str: + """Return the link's href attribute as a string, or '' if missing.""" + href = link.attrGet("href") + return href if isinstance(href, str) else "" + + def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None: """Find the inline node in a list_item's paragraph.""" para = _find_child(node, "paragraph") @@ -155,19 +159,6 @@ def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None: return _find_child(para, "inline") -def _find_first_link(inline: SyntaxTreeNode) -> SyntaxTreeNode | None: - """Find the first link node among inline children.""" - for child in inline.children: - if child.type == "link": - return child - return None - - -def _is_leading_link(inline: SyntaxTreeNode, link: SyntaxTreeNode) -> bool: - """Check if the link is the first child of inline (a real entry, not a subcategory label).""" - return bool(inline.children) and inline.children[0] is link - - def _extract_description_html(inline: SyntaxTreeNode, first_link: SyntaxTreeNode) -> str: """Extract description HTML from inline content after the first link. @@ -206,9 +197,9 @@ def _parse_list_entries( if inline is None: continue - first_link = _find_first_link(inline) + first_link = _find_child(inline, "link") - if first_link is None or not _is_leading_link(inline, first_link): + if first_link is None or inline.children[0] is not first_link: # Subcategory label: take text before the first link, strip trailing separators pre_link = [] for child in inline.children: @@ -223,7 +214,7 @@ def _parse_list_entries( # Entry with a link name = render_inline_text(first_link.children) - url = first_link.attrGet("href") or "" + url = _href(first_link) desc_html = _extract_description_html(inline, first_link) # Collect also_see from nested bullet_list @@ -235,11 +226,11 @@ def _parse_list_entries( continue sub_inline = _find_inline(sub_item) if sub_inline: - sub_link = _find_first_link(sub_inline) + sub_link = _find_child(sub_inline, "link") if sub_link: also_see.append(AlsoSee( name=render_inline_text(sub_link.children), - url=sub_link.attrGet("href") or "", + url=_href(sub_link), )) entries.append(ParsedEntry( @@ -324,16 +315,13 @@ def _parse_grouped_sections( def flush_group() -> None: nonlocal current_group_name, current_group_cats - if not current_group_cats: - current_group_name = None - current_group_cats = [] - return - name = current_group_name or "Other" - groups.append(ParsedGroup( - name=name, - slug=slugify(name), - categories=list(current_group_cats), - )) + if current_group_cats: + name = current_group_name or "Other" + groups.append(ParsedGroup( + name=name, + slug=slugify(name), + categories=list(current_group_cats), + )) current_group_name = None current_group_cats = [] @@ -372,22 +360,17 @@ def _find_link_deep(node: SyntaxTreeNode) -> SyntaxTreeNode | None: def _parse_sponsor_item(inline: SyntaxTreeNode) -> ParsedSponsor | None: """Parse `**[name](url)**: description` (or `[name](url) - description`).""" - link = _find_link_deep(inline) - if link is None: - return None - name = render_inline_text(link.children) - url = link.attrGet("href") or "" - - split_idx = None - for i, child in enumerate(inline.children): - if child is link or _find_link_deep(child) is link: - split_idx = i - break - if split_idx is None: - return None - desc_html = render_inline_html(inline.children[split_idx + 1 :]) - desc_html = _SPONSOR_SEP_RE.sub("", desc_html) - return ParsedSponsor(name=name, url=url, description=desc_html) + for split_idx, child in enumerate(inline.children): + link = child if child.type == "link" else _find_link_deep(child) + if link is None: + continue + desc_html = render_inline_html(inline.children[split_idx + 1 :]) + return ParsedSponsor( + name=render_inline_text(link.children), + url=_href(link), + description=_SPONSOR_SEP_RE.sub("", desc_html), + ) + return None def parse_sponsors(text: str) -> list[ParsedSponsor]: diff --git a/website/static/style.css b/website/static/style.css index c8a96fb4..ec395e98 100644 --- a/website/static/style.css +++ b/website/static/style.css @@ -294,10 +294,6 @@ kbd { color: var(--hero-kicker); } -.section-label { - color: var(--accent-deep); -} - .hero h1 { font-family: var(--font-display); font-size: clamp(4.5rem, 11vw, 8.5rem); @@ -414,35 +410,26 @@ kbd { .sponsor-meta .section-label { margin-bottom: 0; + font-size: var(--text-lg); } .sponsor-become { - display: inline-flex; - align-items: center; - gap: 0.4rem; align-self: start; color: var(--ink-soft); font-size: var(--text-sm); font-weight: 700; letter-spacing: 0.01em; - border-bottom: 1px solid var(--line-strong); - padding-bottom: 0.2rem; + text-decoration: underline; + text-decoration-color: var(--line-strong); + text-underline-offset: 0.2em; transition: color 180ms ease, - border-color 180ms ease; + text-decoration-color 180ms ease; } .sponsor-become:hover { color: var(--accent-deep); - border-bottom-color: var(--accent); -} - -.sponsor-become-arrow { - transition: transform 180ms cubic-bezier(0.22, 1, 0.36, 1); -} - -.sponsor-become:hover .sponsor-become-arrow { - transform: translateX(0.3rem); + text-decoration-color: var(--accent-underline); } .sponsor-list { diff --git a/website/templates/index.html b/website/templates/index.html index 87cfbf29..53e968d3 100644 --- a/website/templates/index.html +++ b/website/templates/index.html @@ -77,7 +77,6 @@ rel="noopener" > Become a sponsor -