enh: kb metadata search

This commit is contained in:
Timothy Jaeryang Baek
2026-01-09 22:21:00 +04:00
parent eff772562b
commit 3c986adeda
18 changed files with 257 additions and 26 deletions

View File

@@ -69,7 +69,7 @@ class ChromaClient(VectorDBBase):
return self.client.delete_collection(name=collection_name)
def search(
self, collection_name: str, vectors: list[list[float | int]], limit: int
self, collection_name: str, vectors: list[list[float | int]], filter: Optional[dict] = None, limit: int = 10
) -> Optional[SearchResult]:
# Search for the nearest neighbor items based on the vectors and return 'limit' number of results.
try:
@@ -78,6 +78,7 @@ class ChromaClient(VectorDBBase):
result = collection.query(
query_embeddings=vectors,
n_results=limit,
where=filter,
)
# chromadb has cosine distance, 2 (worst) -> 0 (best). Re-odering to 0 -> 1

View File

@@ -153,7 +153,7 @@ class ElasticsearchClient(VectorDBBase):
# Status: works
def search(
self, collection_name: str, vectors: list[list[float]], limit: int
self, collection_name: str, vectors: list[list[float]], filter: Optional[dict] = None, limit: int = 10
) -> Optional[SearchResult]:
query = {
"size": limit,

View File

@@ -179,7 +179,7 @@ class MilvusClient(VectorDBBase):
)
def search(
self, collection_name: str, vectors: list[list[float | int]], limit: int
self, collection_name: str, vectors: list[list[float | int]], filter: Optional[dict] = None, limit: int = 10
) -> Optional[SearchResult]:
# Search for the nearest neighbor items based on the vectors and return 'limit' number of results.
collection_name = collection_name.replace("-", "_")

View File

@@ -157,7 +157,7 @@ class MilvusClient(VectorDBBase):
collection.insert(entities)
def search(
self, collection_name: str, vectors: List[List[float]], limit: int
self, collection_name: str, vectors: List[List[float]], filter: Optional[Dict] = None, limit: int = 10
) -> Optional[SearchResult]:
if not vectors:
return None

View File

@@ -233,7 +233,8 @@ class OpenGaussClient(VectorDBBase):
self,
collection_name: str,
vectors: List[List[float]],
limit: Optional[int] = None,
filter: Optional[Dict[str, Any]] = None,
limit: int = 10,
) -> Optional[SearchResult]:
try:
if not vectors:

View File

@@ -113,7 +113,7 @@ class OpenSearchClient(VectorDBBase):
self.client.indices.delete(index=self._get_index_name(collection_name))
def search(
self, collection_name: str, vectors: list[list[float | int]], limit: int
self, collection_name: str, vectors: list[list[float | int]], filter: Optional[dict] = None, limit: int = 10
) -> Optional[SearchResult]:
try:
if not self.has_collection(collection_name):

View File

@@ -521,7 +521,7 @@ class Oracle23aiClient(VectorDBBase):
raise
def search(
self, collection_name: str, vectors: List[List[Union[float, int]]], limit: int
self, collection_name: str, vectors: List[List[Union[float, int]]], filter: Optional[dict] = None, limit: int = 10
) -> Optional[SearchResult]:
"""
Search for similar vectors in the database.

View File

@@ -427,7 +427,8 @@ class PgvectorClient(VectorDBBase):
self,
collection_name: str,
vectors: List[List[float]],
limit: Optional[int] = None,
filter: Optional[Dict[str, Any]] = None,
limit: int = 10,
) -> Optional[SearchResult]:
try:
if not vectors:
@@ -475,9 +476,40 @@ class PgvectorClient(VectorDBBase):
)
# Build the lateral subquery for each query vector
where_clauses = [DocumentChunk.collection_name == collection_name]
# Apply metadata filter if provided
if filter:
for key, value in filter.items():
if isinstance(value, dict) and "$in" in value:
# Handle $in operator: {"field": {"$in": [values]}}
in_values = value["$in"]
if PGVECTOR_PGCRYPTO:
where_clauses.append(
pgcrypto_decrypt(
DocumentChunk.vmetadata, PGVECTOR_PGCRYPTO_KEY, JSONB
)[key].astext.in_([str(v) for v in in_values])
)
else:
where_clauses.append(
DocumentChunk.vmetadata[key].astext.in_([str(v) for v in in_values])
)
else:
# Handle simple equality: {"field": "value"}
if PGVECTOR_PGCRYPTO:
where_clauses.append(
pgcrypto_decrypt(
DocumentChunk.vmetadata, PGVECTOR_PGCRYPTO_KEY, JSONB
)[key].astext == str(value)
)
else:
where_clauses.append(
DocumentChunk.vmetadata[key].astext == str(value)
)
subq = (
select(*result_fields)
.where(DocumentChunk.collection_name == collection_name)
.where(*where_clauses)
.order_by(
(DocumentChunk.vector.cosine_distance(query_vectors.c.q_vector))
)

View File

@@ -391,7 +391,7 @@ class PineconeClient(VectorDBBase):
)
def search(
self, collection_name: str, vectors: List[List[Union[float, int]]], limit: int
self, collection_name: str, vectors: List[List[Union[float, int]]], filter: Optional[dict] = None, limit: int = 10
) -> Optional[SearchResult]:
"""Search for similar vectors in a collection."""
if not vectors or not vectors[0]:

View File

@@ -145,7 +145,7 @@ class QdrantClient(VectorDBBase):
)
def search(
self, collection_name: str, vectors: list[list[float | int]], limit: int
self, collection_name: str, vectors: list[list[float | int]], filter: Optional[dict] = None, limit: int = 10
) -> Optional[SearchResult]:
# Search for the nearest neighbor items based on the vectors and return 'limit' number of results.
if limit is None:

View File

@@ -254,7 +254,7 @@ class QdrantClient(VectorDBBase):
)
def search(
self, collection_name: str, vectors: List[List[float | int]], limit: int
self, collection_name: str, vectors: List[List[float | int]], filter: Optional[Dict] = None, limit: int = 10
) -> Optional[SearchResult]:
"""
Search for the nearest neighbor items based on the vectors with tenant isolation.

View File

@@ -295,7 +295,7 @@ class S3VectorClient(VectorDBBase):
raise
def search(
self, collection_name: str, vectors: List[List[Union[float, int]]], limit: int
self, collection_name: str, vectors: List[List[Union[float, int]]], filter: Optional[dict] = None, limit: int = 10
) -> Optional[SearchResult]:
"""
Search for similar vectors in a collection using multiple query vectors.

View File

@@ -159,7 +159,7 @@ class WeaviateClient(VectorDBBase):
)
def search(
self, collection_name: str, vectors: List[List[Union[float, int]]], limit: int
self, collection_name: str, vectors: List[List[Union[float, int]]], filter: Optional[dict] = None, limit: int = 10
) -> Optional[SearchResult]:
sane_collection_name = self._sanitize_collection_name(collection_name)
if not self.client.collections.exists(sane_collection_name):