This commit is contained in:
Timothy Jaeryang Baek
2026-03-17 17:58:01 -05:00
parent fcf7208352
commit de3317e26b
220 changed files with 17200 additions and 22836 deletions

View File

@@ -1,13 +1,11 @@
from datetime import datetime
KEYS_TO_EXCLUDE = ["content", "pages", "tables", "paragraphs", "sections", "figures"]
KEYS_TO_EXCLUDE = ['content', 'pages', 'tables', 'paragraphs', 'sections', 'figures']
def filter_metadata(metadata: dict[str, any]) -> dict[str, any]:
# Removes large/redundant fields from metadata dict.
metadata = {
key: value for key, value in metadata.items() if key not in KEYS_TO_EXCLUDE
}
metadata = {key: value for key, value in metadata.items() if key not in KEYS_TO_EXCLUDE}
return metadata