fix(website): type build template entries

This commit is contained in:
Vinta Chen
2026-05-03 12:08:15 +08:00
parent 38b54caabb
commit a068219684
2 changed files with 110 additions and 103 deletions

View File

@@ -9,10 +9,10 @@ from collections import Counter
from collections.abc import Sequence
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from typing import TypedDict
from jinja2 import Environment, FileSystemLoader
from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors, slugify
from readme_parser import AlsoSee, ParsedGroup, ParsedSection, parse_readme, parse_sponsors, slugify
GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
MARKDOWN_LINK_RE = re.compile(r"\[[^\]]+\]\(([^)\s]+)\)")
@@ -36,6 +36,37 @@ SOURCE_TYPE_DOMAINS = {
}
class TemplateSubcategory(TypedDict):
name: str
value: str
slug: str
url: str
class TemplateEntry(TypedDict):
name: str
url: str
description: str
categories: list[str]
groups: list[str]
subcategories: list[TemplateSubcategory]
stars: int | None
owner: str | None
last_commit_at: str | None
source_type: str | None
also_see: list[AlsoSee]
class SyntheticCategory(TypedDict):
name: str
slug: str
description: str
description_html: str
TemplateCategory = ParsedSection | SyntheticCategory
def detect_source_type(url: str) -> str | None:
"""Detect source type from URL domain. Returns None for GitHub URLs."""
if GITHUB_REPO_URL_RE.match(url):
@@ -64,13 +95,13 @@ def load_stars(path: Path) -> dict[str, dict]:
return {}
def sort_entries(entries: list[dict]) -> list[dict]:
def sort_entries(entries: Sequence[TemplateEntry]) -> list[TemplateEntry]:
"""Sort entries by stars descending, then name ascending.
Three tiers: starred entries first, stdlib second, other non-starred last.
"""
def sort_key(entry: dict) -> tuple[int, int, int, str]:
def sort_key(entry: TemplateEntry) -> tuple[int, int, int, str]:
stars = entry["stars"]
name = entry["name"].lower()
if stars is not None:
@@ -84,13 +115,7 @@ def sort_entries(entries: list[dict]) -> list[dict]:
def build_robots_txt() -> str:
return (
"User-agent: *\n"
"Content-Signal: search=yes, ai-input=yes, ai-train=yes\n"
"Allow: /\n"
"\n"
f"Sitemap: {SITEMAP_URL}\n"
)
return f"User-agent: *\nContent-Signal: search=yes, ai-input=yes, ai-train=yes\nAllow: /\n\nSitemap: {SITEMAP_URL}\n"
def category_path(category: ParsedSection) -> str:
@@ -117,7 +142,7 @@ def subcategory_public_url(category_slug: str, subcategory_slug: str) -> str:
return f"{SITE_URL}categories/{category_slug}/{subcategory_slug}/"
def synthetic_category(name: str, slug: str) -> dict[str, str]:
def synthetic_category(name: str, slug: str) -> SyntheticCategory:
return {"name": name, "slug": slug, "description": "", "description_html": ""}
@@ -202,7 +227,7 @@ def annotate_entries_with_stars(
if not entry or "stars" not in entry:
continue
stripped = line.rstrip("\n")
ending = line[len(stripped):]
ending = line[len(stripped) :]
annotated = f"{stripped} ({format_stars(entry['stars'])}){ending}"
break
out.append(annotated)
@@ -233,7 +258,7 @@ def remove_sponsors_section(markdown: str) -> str:
def extract_entries(
categories: list[ParsedSection],
groups: list[ParsedGroup],
) -> list[dict]:
) -> list[TemplateEntry]:
"""Flatten categories into individual library entries for table display.
Entries appearing in multiple categories are merged into a single entry
@@ -241,27 +266,27 @@ def extract_entries(
"""
cat_to_group = {cat["name"]: group["name"] for group in groups for cat in group["categories"]}
seen: dict[tuple[str, str], dict[str, Any]] = {} # (url, name) -> entry
entries: list[dict[str, Any]] = []
seen: dict[tuple[str, str], TemplateEntry] = {} # (url, name) -> entry
entries: list[TemplateEntry] = []
for cat in categories:
group_name = cat_to_group.get(cat["name"], "Other")
for entry in cat["entries"]:
key = (entry["url"], entry["name"])
existing: dict[str, Any] | None = seen.get(key)
existing = seen.get(key)
if existing is None:
existing = {
"name": entry["name"],
"url": entry["url"],
"description": entry["description"],
"categories": [],
"groups": [],
"subcategories": [],
"stars": None,
"owner": None,
"last_commit_at": None,
"source_type": detect_source_type(entry["url"]),
"also_see": entry["also_see"],
}
existing = TemplateEntry(
name=entry["name"],
url=entry["url"],
description=entry["description"],
categories=[],
groups=[],
subcategories=[],
stars=None,
owner=None,
last_commit_at=None,
source_type=detect_source_type(entry["url"]),
also_see=entry["also_see"],
)
seen[key] = existing
entries.append(existing)
if cat["name"] not in existing["categories"]:
@@ -273,12 +298,14 @@ def extract_entries(
scoped = f"{cat['name']} > {subcat}"
if not any(s["value"] == scoped for s in existing["subcategories"]):
sub_slug = slugify(subcat)
existing["subcategories"].append({
"name": subcat,
"value": scoped,
"slug": sub_slug,
"url": f"/categories/{cat['slug']}/{sub_slug}/",
})
existing["subcategories"].append(
TemplateSubcategory(
name=subcat,
value=scoped,
slug=sub_slug,
url=f"/categories/{cat['slug']}/{sub_slug}/",
)
)
return entries
@@ -303,10 +330,7 @@ def build(repo_root: Path) -> None:
all_top_level_slugs = cat_slugs + group_slugs + [BUILTIN_SLUG]
duplicates = {s for s, n in Counter(all_top_level_slugs).items() if n > 1}
if duplicates:
raise ValueError(
f"slug collision in /categories/ namespace: {sorted(duplicates)}. "
"Rename a category or group so their slugs differ."
)
raise ValueError(f"slug collision in /categories/ namespace: {sorted(duplicates)}. Rename a category or group so their slugs differ.")
total_entries = sum(c["entry_count"] for c in categories)
entries = extract_entries(categories, parsed_groups)
build_date = datetime.now(UTC)
@@ -377,14 +401,14 @@ def build(repo_root: Path) -> None:
categories_dir = site_dir / "categories"
def render_category(
category: dict,
category: TemplateCategory,
*,
category_url: str,
entries: list[dict],
entries: Sequence[TemplateEntry],
current_path: str,
page_dir: Path,
parent_category: dict | None = None,
group_categories: list | None = None,
parent_category: ParsedSection | None = None,
group_categories: Sequence[ParsedSection] | None = None,
) -> None:
page_dir.mkdir(parents=True, exist_ok=True)
(page_dir / "index.html").write_text(
@@ -443,7 +467,7 @@ def build(repo_root: Path) -> None:
encoding="utf-8",
)
subcat_to_entries: dict[str, list[dict]] = {}
subcat_to_entries: dict[str, list[TemplateEntry]] = {}
subcat_meta: dict[str, tuple[str, str, str]] = {} # value -> (cat_slug, sub_slug, sub_name)
cat_slug_by_url_prefix = {f"/categories/{c['slug']}/": c["slug"] for c in categories}
cat_by_slug = {c["slug"]: c for c in categories}
@@ -472,9 +496,7 @@ def build(repo_root: Path) -> None:
if static_src.exists():
shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
markdown_index = annotate_entries_with_stars(
remove_sponsors_section(readme_text), stars_data
)
markdown_index = annotate_entries_with_stars(remove_sponsors_section(readme_text), stars_data)
llms_template = (website / "templates" / "llms.txt").read_text(encoding="utf-8")
llms_txt = build_llms_txt(llms_template, readme_text, stars_data)
(site_dir / "robots.txt").write_text(build_robots_txt(), encoding="utf-8")

View File

@@ -9,8 +9,8 @@ from html.parser import HTMLParser
from pathlib import Path
import pytest
from build import (
TemplateEntry,
annotate_entries_with_stars,
build,
detect_source_type,
@@ -121,28 +121,15 @@ class TestBuild:
encoding="utf-8",
)
(tpl_dir / "category.html").write_text(
'{% extends "base.html" %}{% block content %}'
"<h1>{{ category.name }}</h1>"
"{% for entry in entries %}"
'<a href="{{ entry.url }}">{{ entry.name }}</a>'
"{% endfor %}"
"{% endblock %}",
'{% extends "base.html" %}{% block content %}<h1>{{ category.name }}</h1>{% for entry in entries %}<a href="{{ entry.url }}">{{ entry.name }}</a>{% endfor %}{% endblock %}',
encoding="utf-8",
)
(tpl_dir / "sponsorship.html").write_text(
'{% extends "base.html" %}{% block content %}'
"<h1>Sponsor</h1>"
"{% endblock %}",
'{% extends "base.html" %}{% block content %}<h1>Sponsor</h1>{% endblock %}',
encoding="utf-8",
)
(tpl_dir / "llms.txt").write_text(
"# Awesome Python\n"
"\n"
"Use this list to find Python tools.\n"
"\n"
"# Categories\n"
"\n"
"{{ categories_md }}\n",
"# Awesome Python\n\nUse this list to find Python tools.\n\n# Categories\n\n{{ categories_md }}\n",
encoding="utf-8",
)
@@ -220,19 +207,13 @@ class TestBuild:
site = tmp_path / "website" / "output"
robots = (site / "robots.txt").read_text(encoding="utf-8")
assert robots == (
"User-agent: *\n"
"Content-Signal: search=yes, ai-input=yes, ai-train=yes\n"
"Allow: /\n"
"\n"
"Sitemap: https://awesome-python.com/sitemap.xml\n"
)
assert robots == ("User-agent: *\nContent-Signal: search=yes, ai-input=yes, ai-train=yes\nAllow: /\n\nSitemap: https://awesome-python.com/sitemap.xml\n")
sitemap = ET.parse(site / "sitemap.xml")
root = sitemap.getroot()
ns = {"sitemap": "http://www.sitemaps.org/schemas/sitemap/0.9"}
locs = [loc.text for loc in root.findall("sitemap:url/sitemap:loc", ns)]
lastmods = [lastmod.text for lastmod in root.findall("sitemap:url/sitemap:lastmod", ns)]
locs = [loc.text or "" for loc in root.findall("sitemap:url/sitemap:loc", ns)]
lastmods = [lastmod.text or "" for lastmod in root.findall("sitemap:url/sitemap:lastmod", ns)]
assert root.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}urlset"
assert locs == [
@@ -820,45 +801,61 @@ class TestLoadStars:
# ---------------------------------------------------------------------------
def _template_entry(name: str, stars: int | None, source_type: str | None = None) -> TemplateEntry:
return TemplateEntry(
name=name,
url="",
description="",
categories=[],
groups=[],
subcategories=[],
stars=stars,
owner=None,
last_commit_at=None,
source_type=source_type,
also_see=[],
)
class TestSortEntries:
def test_sorts_by_stars_descending(self):
entries = [
{"name": "a", "stars": 100, "url": ""},
{"name": "b", "stars": 500, "url": ""},
{"name": "c", "stars": 200, "url": ""},
_template_entry("a", 100),
_template_entry("b", 500),
_template_entry("c", 200),
]
result = sort_entries(entries)
assert [e["name"] for e in result] == ["b", "c", "a"]
def test_equal_stars_sorted_alphabetically(self):
entries = [
{"name": "beta", "stars": 100, "url": ""},
{"name": "alpha", "stars": 100, "url": ""},
_template_entry("beta", 100),
_template_entry("alpha", 100),
]
result = sort_entries(entries)
assert [e["name"] for e in result] == ["alpha", "beta"]
def test_no_stars_go_to_bottom(self):
entries = [
{"name": "no-stars", "stars": None, "url": ""},
{"name": "has-stars", "stars": 50, "url": ""},
_template_entry("no-stars", None),
_template_entry("has-stars", 50),
]
result = sort_entries(entries)
assert [e["name"] for e in result] == ["has-stars", "no-stars"]
def test_no_stars_sorted_alphabetically(self):
entries = [
{"name": "zebra", "stars": None, "url": ""},
{"name": "apple", "stars": None, "url": ""},
_template_entry("zebra", None),
_template_entry("apple", None),
]
result = sort_entries(entries)
assert [e["name"] for e in result] == ["apple", "zebra"]
def test_builtin_between_starred_and_unstarred(self):
entries = [
{"name": "builtin", "stars": None, "source_type": "Built-in"},
{"name": "starred", "stars": 100, "source_type": None},
{"name": "unstarred", "stars": None, "source_type": None},
_template_entry("builtin", None, "Built-in"),
_template_entry("starred", 100),
_template_entry("unstarred", None),
]
result = sort_entries(entries)
assert [e["name"] for e in result] == ["starred", "builtin", "unstarred"]
@@ -1005,23 +1002,15 @@ class TestAnnotateEntriesWithStars:
def test_appends_star_count_to_bullet(self):
markdown = "- [foo](https://github.com/owner/foo) - A foo.\n"
stars = {"owner/foo": {"stars": 123, "owner": "owner"}}
assert annotate_entries_with_stars(markdown, stars) == (
"- [foo](https://github.com/owner/foo) - A foo. (123 GitHub stars)\n"
)
assert annotate_entries_with_stars(markdown, stars) == ("- [foo](https://github.com/owner/foo) - A foo. (123 GitHub stars)\n")
def test_uses_first_github_link(self):
markdown = (
"- [foo](https://github.com/owner/foo) - A foo. "
"Also [bar](https://github.com/owner/bar).\n"
)
markdown = "- [foo](https://github.com/owner/foo) - A foo. Also [bar](https://github.com/owner/bar).\n"
stars = {
"owner/foo": {"stars": 10, "owner": "owner"},
"owner/bar": {"stars": 99, "owner": "owner"},
}
assert annotate_entries_with_stars(markdown, stars) == (
"- [foo](https://github.com/owner/foo) - A foo. "
"Also [bar](https://github.com/owner/bar). (10 GitHub stars)\n"
)
assert annotate_entries_with_stars(markdown, stars) == ("- [foo](https://github.com/owner/foo) - A foo. Also [bar](https://github.com/owner/bar). (10 GitHub stars)\n")
def test_skips_entries_without_star_data(self):
markdown = "- [foo](https://github.com/owner/foo) - A foo.\n"
@@ -1040,13 +1029,9 @@ class TestAnnotateEntriesWithStars:
def test_handles_indented_bullets(self):
markdown = " - [foo](https://github.com/owner/foo)\n"
stars = {"owner/foo": {"stars": 7, "owner": "owner"}}
assert annotate_entries_with_stars(markdown, stars) == (
" - [foo](https://github.com/owner/foo) (7 GitHub stars)\n"
)
assert annotate_entries_with_stars(markdown, stars) == (" - [foo](https://github.com/owner/foo) (7 GitHub stars)\n")
def test_preserves_lines_without_trailing_newline(self):
markdown = "- [foo](https://github.com/owner/foo) - A foo."
stars = {"owner/foo": {"stars": 5, "owner": "owner"}}
assert annotate_entries_with_stars(markdown, stars) == (
"- [foo](https://github.com/owner/foo) - A foo. (5 GitHub stars)"
)
assert annotate_entries_with_stars(markdown, stars) == ("- [foo](https://github.com/owner/foo) - A foo. (5 GitHub stars)")