diff --git a/.gitignore b/.gitignore
index ca26a6e8..0d9f410b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,12 +10,12 @@ __pycache__/
website/output/
website/data/
-# claude code
-.claude/skills/
-.gstack/
-.playwright-cli/
-.superpowers/
-skills-lock.json
+# planning docs
+docs/
-# codex
+# agents
.agents/
+.claude/skills/
+.superpowers/
+.playwright-cli/
+skills-lock.json
diff --git a/README.md b/README.md
index 51ae9d16..107b6859 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# Awesome Python
-An opinionated list of Python frameworks, libraries, tools, and resources.
+An opinionated guide to the best Python frameworks, libraries, tools, and resources.
# **Sponsors**
diff --git a/website/build.py b/website/build.py
index c223ef18..8fb5f384 100644
--- a/website/build.py
+++ b/website/build.py
@@ -4,6 +4,8 @@
import json
import re
import shutil
+import xml.etree.ElementTree as ET
+from collections.abc import Sequence
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
@@ -12,6 +14,9 @@ from jinja2 import Environment, FileSystemLoader
from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors
GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
+SITE_URL = "https://awesome-python.com/"
+SITEMAP_URL = f"{SITE_URL}sitemap.xml"
+SITEMAP_NS = "http://www.sitemaps.org/schemas/sitemap/0.9"
SOURCE_TYPE_DOMAINS = {
"docs.python.org": "Built-in",
@@ -67,6 +72,59 @@ def sort_entries(entries: list[dict]) -> list[dict]:
return sorted(entries, key=sort_key)
+def build_robots_txt() -> str:
+ return (
+ "User-agent: *\n"
+ "Content-Signal: search=yes, ai-input=yes, ai-train=yes\n"
+ "Allow: /\n"
+ "\n"
+ f"Sitemap: {SITEMAP_URL}\n"
+ )
+
+
+def write_sitemap_xml(path: Path, urls: Sequence[tuple[str, str]]) -> None:
+ ET.register_namespace("", SITEMAP_NS)
+ urlset = ET.Element(f"{{{SITEMAP_NS}}}urlset")
+ for url, lastmod in urls:
+ url_el = ET.SubElement(urlset, f"{{{SITEMAP_NS}}}url")
+ loc_el = ET.SubElement(url_el, f"{{{SITEMAP_NS}}}loc")
+ loc_el.text = url
+ lastmod_el = ET.SubElement(url_el, f"{{{SITEMAP_NS}}}lastmod")
+ lastmod_el.text = lastmod
+
+ ET.ElementTree(urlset).write(path, encoding="utf-8", xml_declaration=True)
+ with path.open("ab") as f:
+ f.write(b"\n")
+
+
+def top_level_heading_text(line: str) -> str | None:
+ stripped = line.strip()
+ if not stripped.startswith("# "):
+ return None
+ return stripped.removeprefix("#").strip().strip("#").strip().strip("*").strip()
+
+
+def remove_sponsors_section(markdown: str) -> str:
+ lines = markdown.splitlines(keepends=True)
+ start_idx = None
+ for i, line in enumerate(lines):
+ heading = top_level_heading_text(line)
+ if heading and heading.lower() == "sponsors":
+ start_idx = i
+ break
+
+ if start_idx is None:
+ return markdown
+
+ end_idx = len(lines)
+ for i, line in enumerate(lines[start_idx + 1 :], start=start_idx + 1):
+ if top_level_heading_text(line):
+ end_idx = i
+ break
+
+ return "".join(lines[:start_idx] + lines[end_idx:])
+
+
def extract_entries(
categories: list[ParsedSection],
groups: list[ParsedGroup],
@@ -131,6 +189,7 @@ def build(repo_root: Path) -> None:
categories = [cat for g in parsed_groups for cat in g["categories"]]
total_entries = sum(c["entry_count"] for c in categories)
entries = extract_entries(categories, parsed_groups)
+ build_date = datetime.now(UTC)
stars_data = load_stars(website / "data" / "github_stars.json")
@@ -155,6 +214,8 @@ def build(repo_root: Path) -> None:
env = Environment(
loader=FileSystemLoader(website / "templates"),
autoescape=True,
+ trim_blocks=True,
+ lstrip_blocks=True,
)
site_dir = website / "output"
@@ -171,7 +232,7 @@ def build(repo_root: Path) -> None:
total_entries=total_entries,
total_categories=len(categories),
repo_stars=repo_stars,
- build_date=datetime.now(UTC).strftime("%B %d, %Y"),
+ build_date=build_date.strftime("%B %d, %Y"),
sponsors=sponsors,
),
encoding="utf-8",
@@ -182,7 +243,11 @@ def build(repo_root: Path) -> None:
if static_src.exists():
shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
- (site_dir / "llms.txt").write_text(readme_text, encoding="utf-8")
+ markdown_index = remove_sponsors_section(readme_text)
+ (site_dir / "robots.txt").write_text(build_robots_txt(), encoding="utf-8")
+ write_sitemap_xml(site_dir / "sitemap.xml", [(SITE_URL, build_date.date().isoformat())])
+ (site_dir / "index.md").write_text(markdown_index, encoding="utf-8")
+ (site_dir / "llms.txt").write_text(markdown_index, encoding="utf-8")
print(f"Built single page with {len(parsed_groups)} groups, {len(categories)} categories")
print(f"Total entries: {total_entries}")
diff --git a/website/templates/base.html b/website/templates/base.html
index 34546e73..af112095 100644
--- a/website/templates/base.html
+++ b/website/templates/base.html
@@ -1,26 +1,27 @@
+ {% set default_meta_title = "Awesome Python" %}
+ {% set default_meta_description = "An opinionated guide to the best Python frameworks, libraries, and tools. Explore " ~ (entries | length) ~ " curated projects across " ~ total_categories ~ " categories, from AI and agents to data science and web development." %}
+ {% set canonical_url = "https://awesome-python.com/" %}
+ {% set social_image_url = "https://awesome-python.com/static/og-image.png" %}
+ {% set meta_title %}{% block title %}{{ default_meta_title }}{% endblock %}{% endset %}
+ {% set meta_description %}{% block description %}{{ default_meta_description }}{% endblock %}{% endset %}
- {% block title %}Awesome Python{% endblock %}
-
-
+ {{ meta_title | trim }}
+
+
+
-
-
-
-
-
+
+
+
+
+
+
+
+
diff --git a/website/tests/test_build.py b/website/tests/test_build.py
index 0b22609a..1feab77d 100644
--- a/website/tests/test_build.py
+++ b/website/tests/test_build.py
@@ -3,6 +3,9 @@
import json
import shutil
import textwrap
+import xml.etree.ElementTree as ET
+from datetime import UTC, date, datetime
+from html.parser import HTMLParser
from pathlib import Path
from build import (
@@ -15,6 +18,40 @@ from build import (
)
from readme_parser import parse_readme, slugify
+
+class HeadMetadataParser(HTMLParser):
+ def __init__(self):
+ super().__init__()
+ self.title_count = 0
+ self.title = ""
+ self.meta_by_name = {}
+ self.meta_by_property = {}
+ self.links_by_rel = {}
+ self._in_title = False
+
+ def handle_starttag(self, tag, attrs):
+ attrs = dict(attrs)
+ if tag == "title":
+ self.title_count += 1
+ self._in_title = True
+ elif tag == "meta":
+ if "name" in attrs:
+ self.meta_by_name[attrs["name"]] = attrs.get("content", "")
+ if "property" in attrs:
+ self.meta_by_property[attrs["property"]] = attrs.get("content", "")
+ elif tag == "link" and attrs.get("rel"):
+ for rel in attrs["rel"].split():
+ self.links_by_rel[rel] = attrs.get("href", "")
+
+ def handle_endtag(self, tag):
+ if tag == "title":
+ self._in_title = False
+
+ def handle_data(self, data):
+ if self._in_title:
+ self.title += data
+
+
# ---------------------------------------------------------------------------
# slugify
# ---------------------------------------------------------------------------
@@ -72,6 +109,11 @@ class TestBuild:
encoding="utf-8",
)
+ def _copy_real_templates(self, tmp_path):
+ real_tpl = Path(__file__).parent / ".." / "templates"
+ tpl_dir = tmp_path / "website" / "templates"
+ shutil.copytree(real_tpl, tpl_dir)
+
def test_build_creates_single_page(self, tmp_path):
readme = textwrap.dedent("""\
# Awesome Python
@@ -114,6 +156,97 @@ class TestBuild:
# No category sub-pages
assert not (site / "categories").exists()
+ def test_build_creates_root_discovery_files(self, tmp_path):
+ readme = textwrap.dedent("""\
+ # Awesome Python
+
+ Intro.
+
+ ---
+
+ ## Widgets
+
+ - [w1](https://example.com) - A widget.
+
+ # Contributing
+
+ Help!
+ """)
+ self._make_repo(tmp_path, readme)
+ start_date = datetime.now(UTC).date()
+ build(tmp_path)
+ end_date = datetime.now(UTC).date()
+
+ site = tmp_path / "website" / "output"
+ robots = (site / "robots.txt").read_text(encoding="utf-8")
+ assert robots == (
+ "User-agent: *\n"
+ "Content-Signal: search=yes, ai-input=yes, ai-train=yes\n"
+ "Allow: /\n"
+ "\n"
+ "Sitemap: https://awesome-python.com/sitemap.xml\n"
+ )
+
+ sitemap = ET.parse(site / "sitemap.xml")
+ root = sitemap.getroot()
+ ns = {"sitemap": "http://www.sitemaps.org/schemas/sitemap/0.9"}
+ locs = [loc.text for loc in root.findall("sitemap:url/sitemap:loc", ns)]
+ lastmods = [lastmod.text for lastmod in root.findall("sitemap:url/sitemap:lastmod", ns)]
+
+ assert root.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}urlset"
+ assert locs == ["https://awesome-python.com/"]
+ assert len(lastmods) == 1
+ assert start_date <= date.fromisoformat(lastmods[0]) <= end_date
+ assert all(loc.startswith("https://awesome-python.com/") for loc in locs)
+ assert all("?" not in loc for loc in locs)
+
+ def test_build_creates_markdown_alternate_without_sponsors(self, tmp_path):
+ readme = textwrap.dedent("""\
+ # Awesome Python
+
+ Intro.
+
+ # **Sponsors**
+
+ - **[Sponsor](https://sponsor.example.com)**: Sponsored tool.
+
+ > Become a sponsor: [Sponsor us](SPONSORSHIP.md).
+
+ # Categories
+
+ **Tools**
+
+ - [Widgets](#widgets)
+
+ ---
+
+ ## Widgets
+
+ - [w1](https://example.com) - A widget.
+
+ # Contributing
+
+ Help!
+ """)
+ (tmp_path / "README.md").write_text(readme, encoding="utf-8")
+ self._copy_real_templates(tmp_path)
+
+ build(tmp_path)
+
+ site = tmp_path / "website" / "output"
+ index_html = (site / "index.html").read_text(encoding="utf-8")
+ index_md = (site / "index.md").read_text(encoding="utf-8")
+ llms_txt = (site / "llms.txt").read_text(encoding="utf-8")
+
+ assert '' in index_html
+ assert index_md == llms_txt
+ assert index_md.startswith("# Awesome Python\n\nIntro.\n\n# Categories")
+ assert "# **Sponsors**" not in index_md
+ assert "Sponsor" not in index_md
+ assert "SPONSORSHIP.md" not in index_md
+ assert "## Widgets" in index_md
+ assert "- [w1](https://example.com) - A widget." in index_md
+
def test_build_cleans_stale_output(self, tmp_path):
readme = textwrap.dedent("""\
# T
@@ -235,6 +368,40 @@ class TestBuild:
# Expand content present
assert "expand-content" in html
+ def test_index_contains_aligned_homepage_metadata(self, tmp_path):
+ readme = (Path(__file__).parents[2] / "README.md").read_text(encoding="utf-8")
+ (tmp_path / "README.md").write_text(readme, encoding="utf-8")
+ self._copy_real_templates(tmp_path)
+
+ build(tmp_path)
+
+ parsed_groups = parse_readme(readme)
+ categories = [cat for group in parsed_groups for cat in group["categories"]]
+ entries = extract_entries(categories, parsed_groups)
+ html = (tmp_path / "website" / "output" / "index.html").read_text(encoding="utf-8")
+ parser = HeadMetadataParser()
+ parser.feed(html)
+
+ expected_title = "Awesome Python"
+ expected_description = f"An opinionated guide to the best Python frameworks, libraries, and tools. Explore {len(entries)} curated projects across {len(categories)} categories, from AI and agents to data science and web development."
+ expected_url = "https://awesome-python.com/"
+ expected_image = "https://awesome-python.com/static/og-image.png"
+
+ assert parser.title_count == 1
+ assert parser.title.strip() == expected_title
+ assert parser.meta_by_name["description"] == expected_description
+ assert parser.links_by_rel["canonical"] == expected_url
+ assert parser.meta_by_property["og:type"] == "website"
+ assert parser.meta_by_property["og:title"] == expected_title
+ assert parser.meta_by_property["og:description"] == expected_description
+ assert parser.meta_by_property["og:image"] == expected_image
+ assert parser.meta_by_property["og:url"] == expected_url
+ assert parser.meta_by_name["twitter:card"] == "summary_large_image"
+ assert parser.meta_by_name["twitter:title"] == expected_title
+ assert parser.meta_by_name["twitter:description"] == expected_description
+ assert parser.meta_by_name["twitter:image"] == expected_image
+ assert "\n