This commit is contained in:
Timothy Jaeryang Baek
2026-03-17 17:58:01 -05:00
parent fcf7208352
commit de3317e26b
220 changed files with 17200 additions and 22836 deletions

View File

@@ -76,19 +76,19 @@ class QdrantClient(VectorDBBase):
for point in points:
payload = point.payload
ids.append(point.id)
documents.append(payload["text"])
metadatas.append(payload["metadata"])
documents.append(payload['text'])
metadatas.append(payload['metadata'])
return GetResult(
**{
"ids": [ids],
"documents": [documents],
"metadatas": [metadatas],
'ids': [ids],
'documents': [documents],
'metadatas': [metadatas],
}
)
def _create_collection(self, collection_name: str, dimension: int):
collection_name_with_prefix = f"{self.collection_prefix}_{collection_name}"
collection_name_with_prefix = f'{self.collection_prefix}_{collection_name}'
self.client.create_collection(
collection_name=collection_name_with_prefix,
vectors_config=models.VectorParams(
@@ -104,7 +104,7 @@ class QdrantClient(VectorDBBase):
# Create payload indexes for efficient filtering
self.client.create_payload_index(
collection_name=collection_name_with_prefix,
field_name="metadata.hash",
field_name='metadata.hash',
field_schema=models.KeywordIndexParams(
type=models.KeywordIndexType.KEYWORD,
is_tenant=False,
@@ -113,40 +113,34 @@ class QdrantClient(VectorDBBase):
)
self.client.create_payload_index(
collection_name=collection_name_with_prefix,
field_name="metadata.file_id",
field_name='metadata.file_id',
field_schema=models.KeywordIndexParams(
type=models.KeywordIndexType.KEYWORD,
is_tenant=False,
on_disk=self.QDRANT_ON_DISK,
),
)
log.info(f"collection {collection_name_with_prefix} successfully created!")
log.info(f'collection {collection_name_with_prefix} successfully created!')
def _create_collection_if_not_exists(self, collection_name, dimension):
if not self.has_collection(collection_name=collection_name):
self._create_collection(
collection_name=collection_name, dimension=dimension
)
self._create_collection(collection_name=collection_name, dimension=dimension)
def _create_points(self, items: list[VectorItem]):
return [
PointStruct(
id=item["id"],
vector=item["vector"],
payload={"text": item["text"], "metadata": item["metadata"]},
id=item['id'],
vector=item['vector'],
payload={'text': item['text'], 'metadata': item['metadata']},
)
for item in items
]
def has_collection(self, collection_name: str) -> bool:
return self.client.collection_exists(
f"{self.collection_prefix}_{collection_name}"
)
return self.client.collection_exists(f'{self.collection_prefix}_{collection_name}')
def delete_collection(self, collection_name: str):
return self.client.delete_collection(
collection_name=f"{self.collection_prefix}_{collection_name}"
)
return self.client.delete_collection(collection_name=f'{self.collection_prefix}_{collection_name}')
def search(
self,
@@ -160,7 +154,7 @@ class QdrantClient(VectorDBBase):
limit = NO_LIMIT # otherwise qdrant would set limit to 10!
query_response = self.client.query_points(
collection_name=f"{self.collection_prefix}_{collection_name}",
collection_name=f'{self.collection_prefix}_{collection_name}',
query=vectors[0],
limit=limit,
)
@@ -184,13 +178,11 @@ class QdrantClient(VectorDBBase):
field_conditions = []
for key, value in filter.items():
field_conditions.append(
models.FieldCondition(
key=f"metadata.{key}", match=models.MatchValue(value=value)
)
models.FieldCondition(key=f'metadata.{key}', match=models.MatchValue(value=value))
)
points = self.client.scroll(
collection_name=f"{self.collection_prefix}_{collection_name}",
collection_name=f'{self.collection_prefix}_{collection_name}',
scroll_filter=models.Filter(should=field_conditions),
limit=limit,
)
@@ -202,22 +194,22 @@ class QdrantClient(VectorDBBase):
def get(self, collection_name: str) -> Optional[GetResult]:
# Get all the items in the collection.
points = self.client.scroll(
collection_name=f"{self.collection_prefix}_{collection_name}",
collection_name=f'{self.collection_prefix}_{collection_name}',
limit=NO_LIMIT, # otherwise qdrant would set limit to 10!
)
return self._result_to_get_result(points[0])
def insert(self, collection_name: str, items: list[VectorItem]):
# Insert the items into the collection, if the collection does not exist, it will be created.
self._create_collection_if_not_exists(collection_name, len(items[0]["vector"]))
self._create_collection_if_not_exists(collection_name, len(items[0]['vector']))
points = self._create_points(items)
self.client.upload_points(f"{self.collection_prefix}_{collection_name}", points)
self.client.upload_points(f'{self.collection_prefix}_{collection_name}', points)
def upsert(self, collection_name: str, items: list[VectorItem]):
# Update the items in the collection, if the items are not present, insert them. If the collection does not exist, it will be created.
self._create_collection_if_not_exists(collection_name, len(items[0]["vector"]))
self._create_collection_if_not_exists(collection_name, len(items[0]['vector']))
points = self._create_points(items)
return self.client.upsert(f"{self.collection_prefix}_{collection_name}", points)
return self.client.upsert(f'{self.collection_prefix}_{collection_name}', points)
def delete(
self,
@@ -230,26 +222,28 @@ class QdrantClient(VectorDBBase):
if ids:
for id_value in ids:
field_conditions.append(
models.FieldCondition(
key="metadata.id",
match=models.MatchValue(value=id_value),
(
field_conditions.append(
models.FieldCondition(
key='metadata.id',
match=models.MatchValue(value=id_value),
),
),
),
)
elif filter:
for key, value in filter.items():
field_conditions.append(
models.FieldCondition(
key=f"metadata.{key}",
match=models.MatchValue(value=value),
(
field_conditions.append(
models.FieldCondition(
key=f'metadata.{key}',
match=models.MatchValue(value=value),
),
),
),
)
return self.client.delete(
collection_name=f"{self.collection_prefix}_{collection_name}",
points_selector=models.FilterSelector(
filter=models.Filter(must=field_conditions)
),
collection_name=f'{self.collection_prefix}_{collection_name}',
points_selector=models.FilterSelector(filter=models.Filter(must=field_conditions)),
)
def reset(self):