use file modification time as lastmod in sitemap

This commit is contained in:
Vinta Chen
2026-05-04 16:24:45 +08:00
parent d3bce3f61f
commit 509ebaff7a
2 changed files with 20 additions and 8 deletions

View File

@@ -496,13 +496,9 @@ def build(repo_root: Path) -> None:
group_categories: Sequence[ParsedSection] | None = None,
) -> None:
page_dir.mkdir(parents=True, exist_ok=True)
category_description = category_meta_description(
category["name"], len(entries), category["description"]
)
category_description = category_meta_description(category["name"], len(entries), category["description"])
category_json_ld = json.dumps(
build_category_json_ld(
category["name"], category_url, category_description, entries
),
build_category_json_ld(category["name"], category_url, category_description, entries),
ensure_ascii=False,
).replace("</", "<\\/")
(page_dir / "index.html").write_text(
@@ -593,6 +589,8 @@ def build(repo_root: Path) -> None:
shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
markdown_index = annotate_entries_with_stars(remove_sponsors_section(readme_text), stars_data)
sponsorship_md = repo_root / "SPONSORSHIP.md"
sponsorship_md_mtime = datetime.fromtimestamp(sponsorship_md.stat().st_mtime, tz=UTC).date().isoformat()
llms_template = (website / "templates" / "llms.txt").read_text(encoding="utf-8")
llms_txt = build_llms_txt(llms_template, readme_text, stars_data)
(site_dir / "robots.txt").write_text(build_robots_txt(), encoding="utf-8")
@@ -604,7 +602,7 @@ def build(repo_root: Path) -> None:
sitemap_urls.append((BUILTIN_PUBLIC_URL, sitemap_date))
for cat_slug, sub_slug, _ in sorted(subcat_meta.values()):
sitemap_urls.append((subcategory_public_url(cat_slug, sub_slug), sitemap_date))
sitemap_urls.append((SPONSORSHIP_PUBLIC_URL, sitemap_date))
sitemap_urls.append((SPONSORSHIP_PUBLIC_URL, sponsorship_md_mtime))
write_sitemap_xml(site_dir / "sitemap.xml", sitemap_urls)
(site_dir / "index.md").write_text(markdown_index, encoding="utf-8")
(site_dir / "llms.txt").write_text(llms_txt, encoding="utf-8")

View File

@@ -1,6 +1,7 @@
"""Tests for the build module."""
import json
import os
import shutil
import textwrap
import xml.etree.ElementTree as ET
@@ -86,6 +87,10 @@ class TestSubcategoryPath:
class TestBuild:
@pytest.fixture(autouse=True)
def _make_sponsorship_md(self, tmp_path):
(tmp_path / "SPONSORSHIP.md").write_text("# Sponsorship\n", encoding="utf-8")
def _make_repo(self, tmp_path, readme):
(tmp_path / "README.md").write_text(readme, encoding="utf-8")
tpl_dir = tmp_path / "website" / "templates"
@@ -189,6 +194,9 @@ class TestBuild:
Help!
""")
self._make_repo(tmp_path, readme)
sponsorship_mtime = datetime(2024, 1, 2, tzinfo=UTC).timestamp()
os.utime(tmp_path / "SPONSORSHIP.md", (sponsorship_mtime, sponsorship_mtime))
expected_sponsorship_lastmod = "2024-01-02"
start_date = datetime.now(UTC).date()
build(tmp_path)
end_date = datetime.now(UTC).date()
@@ -202,6 +210,7 @@ class TestBuild:
ns = {"sitemap": "http://www.sitemaps.org/schemas/sitemap/0.9"}
locs = [loc.text or "" for loc in root.findall("sitemap:url/sitemap:loc", ns)]
lastmods = [lastmod.text or "" for lastmod in root.findall("sitemap:url/sitemap:lastmod", ns)]
lastmod_by_loc = dict(zip(locs, lastmods, strict=True))
assert root.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}urlset"
assert locs == [
@@ -212,7 +221,12 @@ class TestBuild:
"https://awesome-python.com/sponsorship/",
]
assert len(lastmods) == len(locs)
assert all(start_date <= date.fromisoformat(lastmod) <= end_date for lastmod in lastmods)
assert lastmod_by_loc["https://awesome-python.com/sponsorship/"] == expected_sponsorship_lastmod
assert all(
start_date <= date.fromisoformat(lastmod) <= end_date
for loc, lastmod in lastmod_by_loc.items()
if loc != "https://awesome-python.com/sponsorship/"
)
assert all(loc.startswith("https://awesome-python.com/") for loc in locs)
assert all("?" not in loc for loc in locs)