From 7aa7bbc390c9a2a1bab23c1a4846ed72865e473d Mon Sep 17 00:00:00 2001
From: Alvin Tang <104285249+alvinttang@users.noreply.github.com>
Date: Mon, 9 Mar 2026 05:50:25 +0800
Subject: [PATCH] fix: correct Azure TTS locale extraction for SSML xml:lang
 (#22443)

The locale for Azure TTS SSML was being extracted with `split("-")[:1]`,
which only takes the first segment (e.g., "en" from "en-US"). The
xml:lang attribute in SSML requires a full locale like "en-US", not just
a language code. This caused Azure TTS to either fail or use incorrect
pronunciation rules.

Changed `[:1]` to `[:2]` to properly extract the locale (e.g., "en-US").

Co-authored-by: gambletan <ethanchang32@gmail.com>
---
 backend/open_webui/routers/audio.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py
index 3156078326..65d17d53dd 100644
--- a/backend/open_webui/routers/audio.py
+++ b/backend/open_webui/routers/audio.py
@@ -492,7 +492,7 @@ async def speech(request: Request, user=Depends(get_verified_user)):
         region = request.app.state.config.TTS_AZURE_SPEECH_REGION or "eastus"
         base_url = request.app.state.config.TTS_AZURE_SPEECH_BASE_URL
         language = request.app.state.config.TTS_VOICE
-        locale = "-".join(request.app.state.config.TTS_VOICE.split("-")[:1])
+        locale = "-".join(request.app.state.config.TTS_VOICE.split("-")[:2])
         output_format = request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT
 
         try: