update llms.txt

This commit is contained in:
Vinta Chen
2026-05-04 17:05:05 +08:00
parent 509ebaff7a
commit 3510db9df9
3 changed files with 63 additions and 23 deletions

View File

@@ -248,9 +248,6 @@ def top_level_heading_text(line: str) -> str | None:
return stripped.removeprefix("#").strip().strip("#").strip().strip("*").strip()
LLMS_CATEGORIES_PLACEHOLDER = "{{ categories_md }}"
def extract_categories_body(markdown: str) -> str:
"""Return content under the `# Categories` heading, excluding the heading line itself."""
lines = markdown.splitlines(keepends=True)
@@ -272,11 +269,33 @@ def extract_categories_body(markdown: str) -> str:
return "".join(lines[start_idx:end_idx]).rstrip() + "\n"
def build_llms_txt(template_text: str, readme_text: str, stars_data: dict[str, dict]) -> str:
"""Render the llms.txt template by injecting the README's Categories body, then annotate stars."""
body = extract_categories_body(readme_text).rstrip()
rendered = template_text.replace(LLMS_CATEGORIES_PLACEHOLDER, body)
return annotate_entries_with_stars(rendered, stars_data, format_stars=str)
def build_llms_txt(
template_text: str,
*,
readme_text: str,
stars_data: dict[str, dict],
categories: Sequence[ParsedSection],
total_entries: int,
) -> str:
"""Render the llms.txt entry point with the curated category catalog."""
categories_md = annotate_entries_with_stars(
extract_categories_body(readme_text).rstrip(),
stars_data,
format_stars=lambda n: f"GitHub stars: {n}",
)
text_env = Environment(autoescape=False, trim_blocks=True, lstrip_blocks=True)
rendered = text_env.from_string(template_text).render(
site_url=SITE_URL,
github_repo_url="https://github.com/vinta/awesome-python",
contributing_url="https://github.com/vinta/awesome-python/blob/master/CONTRIBUTING.md",
sponsorship_url=SPONSORSHIP_PUBLIC_URL,
sitemap_url=SITEMAP_URL,
index_markdown_url=f"{SITE_URL}index.md",
categories_md=categories_md,
total_entries=total_entries,
total_categories=len(categories),
)
return rendered.rstrip() + "\n"
def annotate_entries_with_stars(
@@ -592,7 +611,13 @@ def build(repo_root: Path) -> None:
sponsorship_md = repo_root / "SPONSORSHIP.md"
sponsorship_md_mtime = datetime.fromtimestamp(sponsorship_md.stat().st_mtime, tz=UTC).date().isoformat()
llms_template = (website / "templates" / "llms.txt").read_text(encoding="utf-8")
llms_txt = build_llms_txt(llms_template, readme_text, stars_data)
llms_txt = build_llms_txt(
llms_template,
readme_text=readme_text,
stars_data=stars_data,
categories=categories,
total_entries=total_entries,
)
(site_dir / "robots.txt").write_text(build_robots_txt(), encoding="utf-8")
sitemap_date = build_date.date().isoformat()
sitemap_urls = [(SITE_URL, sitemap_date)]

View File

@@ -1,9 +1,18 @@
# Awesome Python
An opinionated guide to the best Python frameworks, libraries, tools, and resources.
Awesome Python is an opinionated catalog of {{ total_entries }} Python frameworks, libraries, tools, and resources across {{ total_categories }} {% if total_categories == 1 %}category{% else %}categories{% endif %}.
Use this curated list when you need to find a high-quality Python library or tool for tasks such as web development, data science, machine learning, AI agents, automation, testing, or DevOps. The trailing number on each entry is its star count on GitHub.
Scan the category index, then jump to the matching section for direct project links and short descriptions. GitHub entries with known star data end with a `GitHub stars: N` note in parentheses; treat it as popularity context, not a quality guarantee. Use the Markdown homepage for project context outside the catalog.
# Categories
## Primary Links
- Homepage: {{ site_url }}
- Markdown homepage: {{ index_markdown_url }}
- GitHub repository: {{ github_repo_url }}
- Contributing guide: {{ contributing_url }}
- Sponsorship: {{ sponsorship_url }}
- Sitemap: {{ sitemap_url }}
## Categories
{{ categories_md }}

View File

@@ -122,7 +122,7 @@ class TestBuild:
encoding="utf-8",
)
(tpl_dir / "llms.txt").write_text(
"# Awesome Python\n\nUse this list to find Python tools.\n\n# Categories\n\n{{ categories_md }}\n",
"# Awesome Python\n\nHomepage: {{ site_url }}\n\n## Categories\n\n{{ categories_md }}\n",
encoding="utf-8",
)
@@ -222,11 +222,7 @@ class TestBuild:
]
assert len(lastmods) == len(locs)
assert lastmod_by_loc["https://awesome-python.com/sponsorship/"] == expected_sponsorship_lastmod
assert all(
start_date <= date.fromisoformat(lastmod) <= end_date
for loc, lastmod in lastmod_by_loc.items()
if loc != "https://awesome-python.com/sponsorship/"
)
assert all(start_date <= date.fromisoformat(lastmod) <= end_date for loc, lastmod in lastmod_by_loc.items() if loc != "https://awesome-python.com/sponsorship/")
assert all(loc.startswith("https://awesome-python.com/") for loc in locs)
assert all("?" not in loc for loc in locs)
@@ -316,6 +312,8 @@ class TestBuild:
---
**Tools**
## Widgets
- [w1](https://example.com) - A widget.
@@ -352,14 +350,22 @@ class TestBuild:
assert "- [w2](https://github.com/owner/w2) - A starred widget. (42 GitHub stars)" in index_md
assert llms_txt.startswith("# Awesome Python\n")
assert "# Categories" in llms_txt
assert "Use this curated list" in llms_txt
assert "Scan the category index" in llms_txt
assert "Homepage: https://awesome-python.com/" in llms_txt
assert "Markdown homepage: https://awesome-python.com/index.md" in llms_txt
assert "GitHub repository: https://github.com/vinta/awesome-python" in llms_txt
assert "Contributing guide: https://github.com/vinta/awesome-python/blob/master/CONTRIBUTING.md" in llms_txt
assert "Sponsorship: https://awesome-python.com/sponsorship/" in llms_txt
assert "Sitemap: https://awesome-python.com/sitemap.xml" in llms_txt
assert "## Categories" in llms_txt
assert "**Tools**" in llms_txt
assert "- [Widgets](#widgets)" in llms_txt
assert "## Widgets" in llms_txt
assert "- [w1](https://example.com) - A widget." in llms_txt
assert "- [w2](https://github.com/owner/w2) - A starred widget. (42)" in llms_txt
assert "{{ categories_md }}" not in llms_txt
assert "- [w2](https://github.com/owner/w2) - A starred widget. (GitHub stars: 42)" in llms_txt
assert llms_txt != readme
assert llms_txt != index_md
assert "# Contributing" not in llms_txt
assert "Help!" not in llms_txt
def test_build_cleans_stale_output(self, tmp_path):
readme = textwrap.dedent("""\