From 10c06fb26d968504aa277a0518fa4fed2e180245 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Thu, 7 May 2026 20:00:56 +0800 Subject: [PATCH] add category links in llms.txt --- website/build.py | 35 ++++++++++++++++++++++++++++++++++- website/tests/test_build.py | 3 ++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/website/build.py b/website/build.py index 96cf0506..bcfc64d1 100644 --- a/website/build.py +++ b/website/build.py @@ -270,6 +270,36 @@ def extract_categories_body(markdown: str) -> str: return "".join(lines[start_idx:end_idx]).rstrip() + "\n" +def github_markdown_anchor(text: str) -> str: + anchor = text.strip().lower() + anchor = re.sub(r"[^\w\s-]", "", anchor) + anchor = re.sub(r"\s", "-", anchor) + return f"#{anchor}" + + +def link_llms_category_index_to_canonical_pages(markdown: str, categories: Sequence[ParsedSection]) -> str: + """Point the README-derived category index at canonical category pages.""" + category_urls = {} + for category in categories: + public_url = category_public_url(category) + category_urls[f"#{category['slug']}"] = public_url + category_urls[github_markdown_anchor(category["name"])] = public_url + lines = markdown.splitlines(keepends=True) + out: list[str] = [] + + def replace_link(match: re.Match[str]) -> str: + target = match.group(1) + url = category_urls.get(target) + if url is None: + return match.group(0) + return match.group(0).replace(f"({target})", f"({url})", 1) + + for line in lines: + out.append(MARKDOWN_LINK_RE.sub(replace_link, line)) + + return "".join(out) + + def build_llms_txt( template_text: str, *, @@ -280,7 +310,10 @@ def build_llms_txt( ) -> str: """Render the llms.txt entry point with the curated category catalog.""" categories_md = annotate_entries_with_stars( - extract_categories_body(readme_text).rstrip(), + link_llms_category_index_to_canonical_pages( + extract_categories_body(readme_text).rstrip(), + categories, + ), stars_data, format_stars=lambda n: f"GitHub stars: {n}", ) diff --git a/website/tests/test_build.py b/website/tests/test_build.py index 0f99351a..caada782 100644 --- a/website/tests/test_build.py +++ b/website/tests/test_build.py @@ -352,7 +352,8 @@ class TestBuild: assert "Sitemap: https://awesome-python.com/sitemap.xml" in llms_txt assert "## Categories" in llms_txt assert "**Tools**" in llms_txt - assert "- [Widgets](#widgets)" in llms_txt + assert "- [Widgets](https://awesome-python.com/categories/widgets/)" in llms_txt + assert "- [Widgets](#widgets)" not in llms_txt assert "### Widgets" in llms_txt assert "- [w1](https://example.com) - A widget." in llms_txt assert "- [w2](https://github.com/owner/w2) - A starred widget. (GitHub stars: 42)" in llms_txt