mirror of
https://github.com/open-webui/open-webui.git
synced 2026-05-03 18:59:38 -05:00
enh: kb metadata search
This commit is contained in:
@@ -69,7 +69,7 @@ class ChromaClient(VectorDBBase):
|
||||
return self.client.delete_collection(name=collection_name)
|
||||
|
||||
def search(
|
||||
self, collection_name: str, vectors: list[list[float | int]], limit: int
|
||||
self, collection_name: str, vectors: list[list[float | int]], filter: Optional[dict] = None, limit: int = 10
|
||||
) -> Optional[SearchResult]:
|
||||
# Search for the nearest neighbor items based on the vectors and return 'limit' number of results.
|
||||
try:
|
||||
@@ -78,6 +78,7 @@ class ChromaClient(VectorDBBase):
|
||||
result = collection.query(
|
||||
query_embeddings=vectors,
|
||||
n_results=limit,
|
||||
where=filter,
|
||||
)
|
||||
|
||||
# chromadb has cosine distance, 2 (worst) -> 0 (best). Re-odering to 0 -> 1
|
||||
|
||||
@@ -153,7 +153,7 @@ class ElasticsearchClient(VectorDBBase):
|
||||
|
||||
# Status: works
|
||||
def search(
|
||||
self, collection_name: str, vectors: list[list[float]], limit: int
|
||||
self, collection_name: str, vectors: list[list[float]], filter: Optional[dict] = None, limit: int = 10
|
||||
) -> Optional[SearchResult]:
|
||||
query = {
|
||||
"size": limit,
|
||||
|
||||
@@ -179,7 +179,7 @@ class MilvusClient(VectorDBBase):
|
||||
)
|
||||
|
||||
def search(
|
||||
self, collection_name: str, vectors: list[list[float | int]], limit: int
|
||||
self, collection_name: str, vectors: list[list[float | int]], filter: Optional[dict] = None, limit: int = 10
|
||||
) -> Optional[SearchResult]:
|
||||
# Search for the nearest neighbor items based on the vectors and return 'limit' number of results.
|
||||
collection_name = collection_name.replace("-", "_")
|
||||
|
||||
@@ -157,7 +157,7 @@ class MilvusClient(VectorDBBase):
|
||||
collection.insert(entities)
|
||||
|
||||
def search(
|
||||
self, collection_name: str, vectors: List[List[float]], limit: int
|
||||
self, collection_name: str, vectors: List[List[float]], filter: Optional[Dict] = None, limit: int = 10
|
||||
) -> Optional[SearchResult]:
|
||||
if not vectors:
|
||||
return None
|
||||
|
||||
@@ -233,7 +233,8 @@ class OpenGaussClient(VectorDBBase):
|
||||
self,
|
||||
collection_name: str,
|
||||
vectors: List[List[float]],
|
||||
limit: Optional[int] = None,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
limit: int = 10,
|
||||
) -> Optional[SearchResult]:
|
||||
try:
|
||||
if not vectors:
|
||||
|
||||
@@ -113,7 +113,7 @@ class OpenSearchClient(VectorDBBase):
|
||||
self.client.indices.delete(index=self._get_index_name(collection_name))
|
||||
|
||||
def search(
|
||||
self, collection_name: str, vectors: list[list[float | int]], limit: int
|
||||
self, collection_name: str, vectors: list[list[float | int]], filter: Optional[dict] = None, limit: int = 10
|
||||
) -> Optional[SearchResult]:
|
||||
try:
|
||||
if not self.has_collection(collection_name):
|
||||
|
||||
@@ -521,7 +521,7 @@ class Oracle23aiClient(VectorDBBase):
|
||||
raise
|
||||
|
||||
def search(
|
||||
self, collection_name: str, vectors: List[List[Union[float, int]]], limit: int
|
||||
self, collection_name: str, vectors: List[List[Union[float, int]]], filter: Optional[dict] = None, limit: int = 10
|
||||
) -> Optional[SearchResult]:
|
||||
"""
|
||||
Search for similar vectors in the database.
|
||||
|
||||
@@ -427,7 +427,8 @@ class PgvectorClient(VectorDBBase):
|
||||
self,
|
||||
collection_name: str,
|
||||
vectors: List[List[float]],
|
||||
limit: Optional[int] = None,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
limit: int = 10,
|
||||
) -> Optional[SearchResult]:
|
||||
try:
|
||||
if not vectors:
|
||||
@@ -475,9 +476,40 @@ class PgvectorClient(VectorDBBase):
|
||||
)
|
||||
|
||||
# Build the lateral subquery for each query vector
|
||||
where_clauses = [DocumentChunk.collection_name == collection_name]
|
||||
|
||||
# Apply metadata filter if provided
|
||||
if filter:
|
||||
for key, value in filter.items():
|
||||
if isinstance(value, dict) and "$in" in value:
|
||||
# Handle $in operator: {"field": {"$in": [values]}}
|
||||
in_values = value["$in"]
|
||||
if PGVECTOR_PGCRYPTO:
|
||||
where_clauses.append(
|
||||
pgcrypto_decrypt(
|
||||
DocumentChunk.vmetadata, PGVECTOR_PGCRYPTO_KEY, JSONB
|
||||
)[key].astext.in_([str(v) for v in in_values])
|
||||
)
|
||||
else:
|
||||
where_clauses.append(
|
||||
DocumentChunk.vmetadata[key].astext.in_([str(v) for v in in_values])
|
||||
)
|
||||
else:
|
||||
# Handle simple equality: {"field": "value"}
|
||||
if PGVECTOR_PGCRYPTO:
|
||||
where_clauses.append(
|
||||
pgcrypto_decrypt(
|
||||
DocumentChunk.vmetadata, PGVECTOR_PGCRYPTO_KEY, JSONB
|
||||
)[key].astext == str(value)
|
||||
)
|
||||
else:
|
||||
where_clauses.append(
|
||||
DocumentChunk.vmetadata[key].astext == str(value)
|
||||
)
|
||||
|
||||
subq = (
|
||||
select(*result_fields)
|
||||
.where(DocumentChunk.collection_name == collection_name)
|
||||
.where(*where_clauses)
|
||||
.order_by(
|
||||
(DocumentChunk.vector.cosine_distance(query_vectors.c.q_vector))
|
||||
)
|
||||
|
||||
@@ -391,7 +391,7 @@ class PineconeClient(VectorDBBase):
|
||||
)
|
||||
|
||||
def search(
|
||||
self, collection_name: str, vectors: List[List[Union[float, int]]], limit: int
|
||||
self, collection_name: str, vectors: List[List[Union[float, int]]], filter: Optional[dict] = None, limit: int = 10
|
||||
) -> Optional[SearchResult]:
|
||||
"""Search for similar vectors in a collection."""
|
||||
if not vectors or not vectors[0]:
|
||||
|
||||
@@ -145,7 +145,7 @@ class QdrantClient(VectorDBBase):
|
||||
)
|
||||
|
||||
def search(
|
||||
self, collection_name: str, vectors: list[list[float | int]], limit: int
|
||||
self, collection_name: str, vectors: list[list[float | int]], filter: Optional[dict] = None, limit: int = 10
|
||||
) -> Optional[SearchResult]:
|
||||
# Search for the nearest neighbor items based on the vectors and return 'limit' number of results.
|
||||
if limit is None:
|
||||
|
||||
@@ -254,7 +254,7 @@ class QdrantClient(VectorDBBase):
|
||||
)
|
||||
|
||||
def search(
|
||||
self, collection_name: str, vectors: List[List[float | int]], limit: int
|
||||
self, collection_name: str, vectors: List[List[float | int]], filter: Optional[Dict] = None, limit: int = 10
|
||||
) -> Optional[SearchResult]:
|
||||
"""
|
||||
Search for the nearest neighbor items based on the vectors with tenant isolation.
|
||||
|
||||
@@ -295,7 +295,7 @@ class S3VectorClient(VectorDBBase):
|
||||
raise
|
||||
|
||||
def search(
|
||||
self, collection_name: str, vectors: List[List[Union[float, int]]], limit: int
|
||||
self, collection_name: str, vectors: List[List[Union[float, int]]], filter: Optional[dict] = None, limit: int = 10
|
||||
) -> Optional[SearchResult]:
|
||||
"""
|
||||
Search for similar vectors in a collection using multiple query vectors.
|
||||
|
||||
@@ -159,7 +159,7 @@ class WeaviateClient(VectorDBBase):
|
||||
)
|
||||
|
||||
def search(
|
||||
self, collection_name: str, vectors: List[List[Union[float, int]]], limit: int
|
||||
self, collection_name: str, vectors: List[List[Union[float, int]]], filter: Optional[dict] = None, limit: int = 10
|
||||
) -> Optional[SearchResult]:
|
||||
sane_collection_name = self._sanitize_collection_name(collection_name)
|
||||
if not self.client.collections.exists(sane_collection_name):
|
||||
|
||||
Reference in New Issue
Block a user