From 826e9ab317d5376c6eeb93870481dad3bf99ae96 Mon Sep 17 00:00:00 2001 From: Classic298 <27028174+Classic298@users.noreply.github.com> Date: Sun, 11 Jan 2026 20:33:04 +0100 Subject: [PATCH] fix(db): release connection before embeddings in knowledge /metadata/reindex (#20577) Remove Depends(get_session) from POST /metadata/reindex endpoint to prevent database connections from being held during N embedding API calls. This endpoint is CRITICAL as it loops through ALL knowledge bases and calls embed_knowledge_base_metadata() for each one. With the original code, a single connection would be held for the entire duration (potentially minutes for large deployments), completely exhausting the pool. The Knowledges.get_knowledge_bases() function manages its own short-lived session, releasing the connection before the embedding loop begins. --- backend/open_webui/routers/knowledge.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 9fc30424ca..c38c5e0bdf 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -345,10 +345,15 @@ async def reindex_knowledge_files( async def reindex_knowledge_base_metadata_embeddings( request: Request, user=Depends(get_admin_user), - db: Session = Depends(get_session), ): - """Batch embed all existing knowledge bases. Admin only.""" - knowledge_bases = Knowledges.get_knowledge_bases(db=db) + """Batch embed all existing knowledge bases. Admin only. + + NOTE: We intentionally do NOT use Depends(get_session) here. + This endpoint loops through ALL knowledge bases and calls embed_knowledge_base_metadata() + for each one, making N external embedding API calls. Holding a session during + this entire operation would exhaust the connection pool. + """ + knowledge_bases = Knowledges.get_knowledge_bases() log.info(f"Reindexing embeddings for {len(knowledge_bases)} knowledge bases") success_count = 0