diff --git a/website/build.py b/website/build.py index ce38f420..3520ce6f 100644 --- a/website/build.py +++ b/website/build.py @@ -248,9 +248,6 @@ def top_level_heading_text(line: str) -> str | None: return stripped.removeprefix("#").strip().strip("#").strip().strip("*").strip() -LLMS_CATEGORIES_PLACEHOLDER = "{{ categories_md }}" - - def extract_categories_body(markdown: str) -> str: """Return content under the `# Categories` heading, excluding the heading line itself.""" lines = markdown.splitlines(keepends=True) @@ -272,11 +269,33 @@ def extract_categories_body(markdown: str) -> str: return "".join(lines[start_idx:end_idx]).rstrip() + "\n" -def build_llms_txt(template_text: str, readme_text: str, stars_data: dict[str, dict]) -> str: - """Render the llms.txt template by injecting the README's Categories body, then annotate stars.""" - body = extract_categories_body(readme_text).rstrip() - rendered = template_text.replace(LLMS_CATEGORIES_PLACEHOLDER, body) - return annotate_entries_with_stars(rendered, stars_data, format_stars=str) +def build_llms_txt( + template_text: str, + *, + readme_text: str, + stars_data: dict[str, dict], + categories: Sequence[ParsedSection], + total_entries: int, +) -> str: + """Render the llms.txt entry point with the curated category catalog.""" + categories_md = annotate_entries_with_stars( + extract_categories_body(readme_text).rstrip(), + stars_data, + format_stars=lambda n: f"GitHub stars: {n}", + ) + text_env = Environment(autoescape=False, trim_blocks=True, lstrip_blocks=True) + rendered = text_env.from_string(template_text).render( + site_url=SITE_URL, + github_repo_url="https://github.com/vinta/awesome-python", + contributing_url="https://github.com/vinta/awesome-python/blob/master/CONTRIBUTING.md", + sponsorship_url=SPONSORSHIP_PUBLIC_URL, + sitemap_url=SITEMAP_URL, + index_markdown_url=f"{SITE_URL}index.md", + categories_md=categories_md, + total_entries=total_entries, + total_categories=len(categories), + ) + return rendered.rstrip() + "\n" def annotate_entries_with_stars( @@ -592,7 +611,13 @@ def build(repo_root: Path) -> None: sponsorship_md = repo_root / "SPONSORSHIP.md" sponsorship_md_mtime = datetime.fromtimestamp(sponsorship_md.stat().st_mtime, tz=UTC).date().isoformat() llms_template = (website / "templates" / "llms.txt").read_text(encoding="utf-8") - llms_txt = build_llms_txt(llms_template, readme_text, stars_data) + llms_txt = build_llms_txt( + llms_template, + readme_text=readme_text, + stars_data=stars_data, + categories=categories, + total_entries=total_entries, + ) (site_dir / "robots.txt").write_text(build_robots_txt(), encoding="utf-8") sitemap_date = build_date.date().isoformat() sitemap_urls = [(SITE_URL, sitemap_date)] diff --git a/website/templates/llms.txt b/website/templates/llms.txt index 1db05c3b..279f50df 100644 --- a/website/templates/llms.txt +++ b/website/templates/llms.txt @@ -1,9 +1,18 @@ # Awesome Python -An opinionated guide to the best Python frameworks, libraries, tools, and resources. +Awesome Python is an opinionated catalog of {{ total_entries }} Python frameworks, libraries, tools, and resources across {{ total_categories }} {% if total_categories == 1 %}category{% else %}categories{% endif %}. -Use this curated list when you need to find a high-quality Python library or tool for tasks such as web development, data science, machine learning, AI agents, automation, testing, or DevOps. The trailing number on each entry is its star count on GitHub. +Scan the category index, then jump to the matching section for direct project links and short descriptions. GitHub entries with known star data end with a `GitHub stars: N` note in parentheses; treat it as popularity context, not a quality guarantee. Use the Markdown homepage for project context outside the catalog. -# Categories +## Primary Links + +- Homepage: {{ site_url }} +- Markdown homepage: {{ index_markdown_url }} +- GitHub repository: {{ github_repo_url }} +- Contributing guide: {{ contributing_url }} +- Sponsorship: {{ sponsorship_url }} +- Sitemap: {{ sitemap_url }} + +## Categories {{ categories_md }} diff --git a/website/tests/test_build.py b/website/tests/test_build.py index b95a78b8..a5ce3290 100644 --- a/website/tests/test_build.py +++ b/website/tests/test_build.py @@ -122,7 +122,7 @@ class TestBuild: encoding="utf-8", ) (tpl_dir / "llms.txt").write_text( - "# Awesome Python\n\nUse this list to find Python tools.\n\n# Categories\n\n{{ categories_md }}\n", + "# Awesome Python\n\nHomepage: {{ site_url }}\n\n## Categories\n\n{{ categories_md }}\n", encoding="utf-8", ) @@ -222,11 +222,7 @@ class TestBuild: ] assert len(lastmods) == len(locs) assert lastmod_by_loc["https://awesome-python.com/sponsorship/"] == expected_sponsorship_lastmod - assert all( - start_date <= date.fromisoformat(lastmod) <= end_date - for loc, lastmod in lastmod_by_loc.items() - if loc != "https://awesome-python.com/sponsorship/" - ) + assert all(start_date <= date.fromisoformat(lastmod) <= end_date for loc, lastmod in lastmod_by_loc.items() if loc != "https://awesome-python.com/sponsorship/") assert all(loc.startswith("https://awesome-python.com/") for loc in locs) assert all("?" not in loc for loc in locs) @@ -316,6 +312,8 @@ class TestBuild: --- + **Tools** + ## Widgets - [w1](https://example.com) - A widget. @@ -352,14 +350,22 @@ class TestBuild: assert "- [w2](https://github.com/owner/w2) - A starred widget. (42 GitHub stars)" in index_md assert llms_txt.startswith("# Awesome Python\n") - assert "# Categories" in llms_txt - assert "Use this curated list" in llms_txt + assert "Scan the category index" in llms_txt + assert "Homepage: https://awesome-python.com/" in llms_txt + assert "Markdown homepage: https://awesome-python.com/index.md" in llms_txt + assert "GitHub repository: https://github.com/vinta/awesome-python" in llms_txt + assert "Contributing guide: https://github.com/vinta/awesome-python/blob/master/CONTRIBUTING.md" in llms_txt + assert "Sponsorship: https://awesome-python.com/sponsorship/" in llms_txt + assert "Sitemap: https://awesome-python.com/sitemap.xml" in llms_txt + assert "## Categories" in llms_txt + assert "**Tools**" in llms_txt + assert "- [Widgets](#widgets)" in llms_txt assert "## Widgets" in llms_txt assert "- [w1](https://example.com) - A widget." in llms_txt - assert "- [w2](https://github.com/owner/w2) - A starred widget. (42)" in llms_txt - assert "{{ categories_md }}" not in llms_txt + assert "- [w2](https://github.com/owner/w2) - A starred widget. (GitHub stars: 42)" in llms_txt + assert llms_txt != readme + assert llms_txt != index_md assert "# Contributing" not in llms_txt - assert "Help!" not in llms_txt def test_build_cleans_stale_output(self, tmp_path): readme = textwrap.dedent("""\