From 7aa7bbc390c9a2a1bab23c1a4846ed72865e473d Mon Sep 17 00:00:00 2001 From: Alvin Tang <104285249+alvinttang@users.noreply.github.com> Date: Mon, 9 Mar 2026 05:50:25 +0800 Subject: [PATCH] fix: correct Azure TTS locale extraction for SSML xml:lang (#22443) The locale for Azure TTS SSML was being extracted with `split("-")[:1]`, which only takes the first segment (e.g., "en" from "en-US"). The xml:lang attribute in SSML requires a full locale like "en-US", not just a language code. This caused Azure TTS to either fail or use incorrect pronunciation rules. Changed `[:1]` to `[:2]` to properly extract the locale (e.g., "en-US"). Co-authored-by: gambletan --- backend/open_webui/routers/audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index 3156078326..65d17d53dd 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -492,7 +492,7 @@ async def speech(request: Request, user=Depends(get_verified_user)): region = request.app.state.config.TTS_AZURE_SPEECH_REGION or "eastus" base_url = request.app.state.config.TTS_AZURE_SPEECH_BASE_URL language = request.app.state.config.TTS_VOICE - locale = "-".join(request.app.state.config.TTS_VOICE.split("-")[:1]) + locale = "-".join(request.app.state.config.TTS_VOICE.split("-")[:2]) output_format = request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT try: