diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index c282a23d18..9a2aa8b39e 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -544,28 +544,20 @@ async def query_collection_with_hybrid_search( results = [] error = False - # Optimization: Only fetch collection data if explicitly needed for BM25 or enrichment. - # Fetching massive collections (e.g. 160k docs) takes significant time (~30s) and memory. - should_fetch_data = hybrid_bm25_weight > 0 or enable_enriched_texts - - if should_fetch_data: - # Optimization: If only one collection, fetch synchronously to avoid asyncio.to_thread overhead - # which can cause significant regression (e.g. 4s vs 28s) for large payloads. - if len(collection_names) == 1: - try: - col_name = collection_names[0] - log.debug(f"Fetching single collection synchronously: {col_name}") - data = VECTOR_DB_CLIENT.get(collection_name=col_name) - collection_results = {col_name: data} - except Exception as e: - log.exception(f"Failed to fetch collection {col_name}: {e}") - collection_results = {col_name: None} - else: - # Fetch all collections in parallel to reduce latency for multiple collections - collection_results = await fetch_collection_data(collection_names) + # Always fetch collection data - it's required for validation in query_doc_with_hybrid_search. + # Optimization: Use sync fetch for single collection to avoid asyncio overhead. + if len(collection_names) == 1: + try: + col_name = collection_names[0] + log.debug(f"Fetching single collection synchronously: {col_name}") + data = VECTOR_DB_CLIENT.get(collection_name=col_name) + collection_results = {col_name: data} + except Exception as e: + log.exception(f"Failed to fetch collection {col_name}: {e}") + collection_results = {col_name: None} else: - log.debug("Skipping collection fetch: BM25 disabled and no text enrichment needed") - collection_results = {name: None for name in collection_names} + # Fetch all collections in parallel to reduce latency for multiple collections + collection_results = await fetch_collection_data(collection_names) log.info( f"Starting hybrid search for {len(queries)} queries in {len(collection_names)} collections..." @@ -591,16 +583,13 @@ async def query_collection_with_hybrid_search( return None, e # Prepare tasks for all collections and queries - # If we skipped fetching (should_fetch_data=False), we still want to run the query (using vector search only). - # If we tried to fetch and failed (result is None), we skip. - tasks = [] - for collection_name in collection_names: - data = collection_results.get(collection_name) - if data is None and should_fetch_data: - # We needed data but failed to fetch it, skip this collection - continue - for query in queries: - tasks.append((collection_name, query)) + # Skip collections that failed to fetch (data is None) + tasks = [ + (collection_name, query) + for collection_name in collection_names + if collection_results.get(collection_name) is not None + for query in queries + ] # Run all queries in parallel using asyncio.gather task_results = await asyncio.gather(