mirror of
https://github.com/open-webui/open-webui.git
synced 2026-05-03 02:39:11 -05:00
30 lines
1.0 KiB
Python
30 lines
1.0 KiB
Python
from datetime import datetime
|
|
|
|
from open_webui.utils.misc import sanitize_text_for_db
|
|
|
|
KEYS_TO_EXCLUDE = ['content', 'pages', 'tables', 'paragraphs', 'sections', 'figures']
|
|
|
|
|
|
def filter_metadata(metadata: dict[str, any]) -> dict[str, any]:
|
|
# Removes large/redundant fields from metadata dict.
|
|
metadata = {key: value for key, value in metadata.items() if key not in KEYS_TO_EXCLUDE}
|
|
return metadata
|
|
|
|
|
|
def process_metadata(
|
|
metadata: dict[str, any],
|
|
) -> dict[str, any]:
|
|
# Removes large fields, converts non-serializable types (datetime, list, dict) to strings,
|
|
# and sanitizes strings for database storage (strips null bytes and invalid surrogates).
|
|
result = {}
|
|
for key, value in metadata.items():
|
|
# Skip large fields
|
|
if key in KEYS_TO_EXCLUDE:
|
|
continue
|
|
# Convert non-serializable fields to strings
|
|
if isinstance(value, (datetime, list, dict)):
|
|
result[key] = sanitize_text_for_db(str(value))
|
|
else:
|
|
result[key] = sanitize_text_for_db(value)
|
|
return result
|