mirror of
https://github.com/open-webui/open-webui.git
synced 2026-05-04 03:16:03 -05:00
Add configurable reranker batch size (env var RAG_RERANKING_BATCH_SIZE, default 32) following the same pattern as RAG_EMBEDDING_BATCH_SIZE. - config.py: PersistentConfig for RAG_RERANKING_BATCH_SIZE - main.py: import, state init, pass to get_reranking_function - colbert.py: accept batch_size param in predict() (was hardcoded 32) - utils.py: get_reranking_function passes batch_size at call time - retrieval.py: expose in config GET/POST endpoints and ConfigForm - Documents.svelte: add Reranking Batch Size input in admin settings Closes #23730
76 lines
3.2 KiB
Python
76 lines
3.2 KiB
Python
import os
|
|
import logging
|
|
import torch
|
|
import numpy as np
|
|
from colbert.infra import ColBERTConfig
|
|
from colbert.modeling.checkpoint import Checkpoint
|
|
|
|
|
|
from open_webui.retrieval.models.base_reranker import BaseReranker
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class ColBERT(BaseReranker):
|
|
def __init__(self, name, **kwargs) -> None:
|
|
log.info('ColBERT: Loading model', name)
|
|
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
|
DOCKER = kwargs.get('env') == 'docker'
|
|
if DOCKER:
|
|
# This is a workaround for the issue with the docker container
|
|
# where the torch extension is not loaded properly
|
|
# and the following error is thrown:
|
|
# /root/.cache/torch_extensions/py311_cpu/segmented_maxsim_cpp/segmented_maxsim_cpp.so: cannot open shared object file: No such file or directory
|
|
|
|
lock_file = '/root/.cache/torch_extensions/py311_cpu/segmented_maxsim_cpp/lock'
|
|
if os.path.exists(lock_file):
|
|
os.remove(lock_file)
|
|
|
|
self.ckpt = Checkpoint(
|
|
name,
|
|
colbert_config=ColBERTConfig(model_name=name),
|
|
).to(self.device)
|
|
pass
|
|
|
|
def calculate_similarity_scores(self, query_embeddings, document_embeddings):
|
|
query_embeddings = query_embeddings.to(self.device)
|
|
document_embeddings = document_embeddings.to(self.device)
|
|
|
|
# Validate dimensions to ensure compatibility
|
|
if query_embeddings.dim() != 3:
|
|
raise ValueError(f'Expected query embeddings to have 3 dimensions, but got {query_embeddings.dim()}.')
|
|
if document_embeddings.dim() != 3:
|
|
raise ValueError(f'Expected document embeddings to have 3 dimensions, but got {document_embeddings.dim()}.')
|
|
if query_embeddings.size(0) not in [1, document_embeddings.size(0)]:
|
|
raise ValueError('There should be either one query or queries equal to the number of documents.')
|
|
|
|
# Transpose the query embeddings to align for matrix multiplication
|
|
transposed_query_embeddings = query_embeddings.permute(0, 2, 1)
|
|
# Compute similarity scores using batch matrix multiplication
|
|
computed_scores = torch.matmul(document_embeddings, transposed_query_embeddings)
|
|
# Apply max pooling to extract the highest semantic similarity across each document's sequence
|
|
maximum_scores = torch.max(computed_scores, dim=1).values
|
|
|
|
# Sum up the maximum scores across features to get the overall document relevance scores
|
|
final_scores = maximum_scores.sum(dim=1)
|
|
|
|
normalized_scores = torch.softmax(final_scores, dim=0)
|
|
|
|
return normalized_scores.detach().cpu().numpy().astype(np.float32)
|
|
|
|
def predict(self, sentences, batch_size=32):
|
|
query = sentences[0][0]
|
|
docs = [i[1] for i in sentences]
|
|
|
|
# Embedding the documents
|
|
embedded_docs = self.ckpt.docFromText(docs, bsize=batch_size)[0]
|
|
# Embedding the queries
|
|
embedded_queries = self.ckpt.queryFromText([query], bsize=batch_size)
|
|
embedded_query = embedded_queries[0]
|
|
|
|
# Calculate retrieval scores for the query against all documents
|
|
scores = self.calculate_similarity_scores(embedded_query.unsqueeze(0), embedded_docs)
|
|
|
|
return scores
|