enh: kb metadata search

This commit is contained in:
Timothy Jaeryang Baek
2026-01-09 22:21:00 +04:00
parent eff772562b
commit 3c986adeda
18 changed files with 257 additions and 26 deletions

View File

@@ -427,7 +427,8 @@ class PgvectorClient(VectorDBBase):
self,
collection_name: str,
vectors: List[List[float]],
limit: Optional[int] = None,
filter: Optional[Dict[str, Any]] = None,
limit: int = 10,
) -> Optional[SearchResult]:
try:
if not vectors:
@@ -475,9 +476,40 @@ class PgvectorClient(VectorDBBase):
)
# Build the lateral subquery for each query vector
where_clauses = [DocumentChunk.collection_name == collection_name]
# Apply metadata filter if provided
if filter:
for key, value in filter.items():
if isinstance(value, dict) and "$in" in value:
# Handle $in operator: {"field": {"$in": [values]}}
in_values = value["$in"]
if PGVECTOR_PGCRYPTO:
where_clauses.append(
pgcrypto_decrypt(
DocumentChunk.vmetadata, PGVECTOR_PGCRYPTO_KEY, JSONB
)[key].astext.in_([str(v) for v in in_values])
)
else:
where_clauses.append(
DocumentChunk.vmetadata[key].astext.in_([str(v) for v in in_values])
)
else:
# Handle simple equality: {"field": "value"}
if PGVECTOR_PGCRYPTO:
where_clauses.append(
pgcrypto_decrypt(
DocumentChunk.vmetadata, PGVECTOR_PGCRYPTO_KEY, JSONB
)[key].astext == str(value)
)
else:
where_clauses.append(
DocumentChunk.vmetadata[key].astext == str(value)
)
subq = (
select(*result_fields)
.where(DocumentChunk.collection_name == collection_name)
.where(*where_clauses)
.order_by(
(DocumentChunk.vector.cosine_distance(query_vectors.c.q_vector))
)