add category links in llms.txt

This commit is contained in:
Vinta Chen
2026-05-07 20:00:56 +08:00
parent 5f042e2fb2
commit 10c06fb26d
2 changed files with 36 additions and 2 deletions

View File

@@ -270,6 +270,36 @@ def extract_categories_body(markdown: str) -> str:
return "".join(lines[start_idx:end_idx]).rstrip() + "\n"
def github_markdown_anchor(text: str) -> str:
anchor = text.strip().lower()
anchor = re.sub(r"[^\w\s-]", "", anchor)
anchor = re.sub(r"\s", "-", anchor)
return f"#{anchor}"
def link_llms_category_index_to_canonical_pages(markdown: str, categories: Sequence[ParsedSection]) -> str:
"""Point the README-derived category index at canonical category pages."""
category_urls = {}
for category in categories:
public_url = category_public_url(category)
category_urls[f"#{category['slug']}"] = public_url
category_urls[github_markdown_anchor(category["name"])] = public_url
lines = markdown.splitlines(keepends=True)
out: list[str] = []
def replace_link(match: re.Match[str]) -> str:
target = match.group(1)
url = category_urls.get(target)
if url is None:
return match.group(0)
return match.group(0).replace(f"({target})", f"({url})", 1)
for line in lines:
out.append(MARKDOWN_LINK_RE.sub(replace_link, line))
return "".join(out)
def build_llms_txt(
template_text: str,
*,
@@ -280,7 +310,10 @@ def build_llms_txt(
) -> str:
"""Render the llms.txt entry point with the curated category catalog."""
categories_md = annotate_entries_with_stars(
extract_categories_body(readme_text).rstrip(),
link_llms_category_index_to_canonical_pages(
extract_categories_body(readme_text).rstrip(),
categories,
),
stars_data,
format_stars=lambda n: f"GitHub stars: {n}",
)

View File

@@ -352,7 +352,8 @@ class TestBuild:
assert "Sitemap: https://awesome-python.com/sitemap.xml" in llms_txt
assert "## Categories" in llms_txt
assert "**Tools**" in llms_txt
assert "- [Widgets](#widgets)" in llms_txt
assert "- [Widgets](https://awesome-python.com/categories/widgets/)" in llms_txt
assert "- [Widgets](#widgets)" not in llms_txt
assert "### Widgets" in llms_txt
assert "- [w1](https://example.com) - A widget." in llms_txt
assert "- [w2](https://github.com/owner/w2) - A starred widget. (GitHub stars: 42)" in llms_txt