mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-29 17:20:21 -05:00
fix(citations): add missing bibliography entries and improve validation
Add missing citations to chapter bib files: - carlini2021extracting to privacy_security.bib - koomey2011web to frontiers.bib - quinonero2009dataset to robust_ai.bib Enhance citation validation script: - Strip trailing punctuation (.,;:) from citation keys - Filter out DOI-style citations (e.g., @10.1109/...) - Prevent false positives from citations like [@key.] These changes fix all reported citation validation failures while improving the validation script to handle edge cases better.
This commit is contained in:
@@ -60,6 +60,12 @@ def extract_citation_keys(qmd_file: Path) -> Set[str]:
|
||||
# Pattern matches @word with letters, numbers, hyphens, underscores, colons, dots
|
||||
citation_keys = set(re.findall(r'@([\w\-_:.]+)', content))
|
||||
|
||||
# Strip trailing punctuation (periods, commas) that might be captured
|
||||
citation_keys = {key.rstrip('.,;:') for key in citation_keys}
|
||||
|
||||
# Filter out DOI-style citations (start with numbers like 10.1109)
|
||||
citation_keys = {key for key in citation_keys if not re.match(r'^\d+\.\d+', key)}
|
||||
|
||||
# Filter out common false positives that aren't citations
|
||||
filtered_keys = {
|
||||
key for key in citation_keys
|
||||
|
||||
Reference in New Issue
Block a user