diff --git a/backend/open_webui/env.py b/backend/open_webui/env.py index f5253d287b..51f62b7ff1 100644 --- a/backend/open_webui/env.py +++ b/backend/open_webui/env.py @@ -356,6 +356,10 @@ ENABLE_REALTIME_CHAT_SAVE = ( ENABLE_QUERIES_CACHE = os.environ.get("ENABLE_QUERIES_CACHE", "False").lower() == "true" +RAG_SYSTEM_CONTEXT = ( + os.environ.get("RAG_SYSTEM_CONTEXT", "False").lower() == "true" +) + #################################### # REDIS #################################### diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index a6f39e15f9..b7b7dc18bd 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -119,6 +119,7 @@ from open_webui.env import ( BYPASS_MODEL_ACCESS_CONTROL, ENABLE_REALTIME_CHAT_SAVE, ENABLE_QUERIES_CACHE, + RAG_SYSTEM_CONTEXT, ) from open_webui.constants import TASKS @@ -1622,15 +1623,28 @@ async def process_chat_payload(request, form_data, user, metadata, model): raise Exception("No user message found") if context_string != "": - form_data["messages"] = add_or_update_user_message( - rag_template( - request.app.state.config.RAG_TEMPLATE, - context_string, - prompt, - ), - form_data["messages"], - append=False, - ) + if RAG_SYSTEM_CONTEXT: + # Inject into system message for KV prefix caching + form_data["messages"] = add_or_update_system_message( + rag_template( + request.app.state.config.RAG_TEMPLATE, + context_string, + prompt, + ), + form_data["messages"], + append=True, + ) + else: + # Inject into user message + form_data["messages"] = add_or_update_user_message( + rag_template( + request.app.state.config.RAG_TEMPLATE, + context_string, + prompt, + ), + form_data["messages"], + append=False, + ) # If there are citations, add them to the data_items sources = [