From 157ae917ebfb60eba0e15e50e2d5ad3e4597db5d Mon Sep 17 00:00:00 2001 From: yoloni-9527 Date: Sun, 22 Mar 2026 05:43:09 +0800 Subject: [PATCH] fix: replace legacy surrogate-pair emoji regex with Unicode property escape (#22915) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous regular expression used manual surrogate-pair ranges to match emojis and missed a large category of commonly used symbols: /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g This approach only covers emojis encoded as surrogate pairs (U+1F000 – U+1F4FF range), but silently skips BMP emojis that use a text- presentation code point followed by the variation selector U+FE0F, such as ❤️ (U+2764 U+FE0F), ☀️, ✅, ⚡, ⭐, and keycap sequences like 1️⃣, as well as ZWJ family sequences (👨‍👩‍👧‍👦) and flag sequences. Replace with the Unicode property escape \p{RGI_Emoji} using the 'v' (unicodeSets) flag introduced in ES2024. This single pattern covers every standardised emoji sequence defined by Unicode, including all the cases above. Browser support: Chrome 112+, Firefox 116+, Safari 17+, Node.js 20+. All browsers targeted by open-webui already support this syntax. Co-authored-by: Tim Baek Co-authored-by: joaoback <156559121+joaoback@users.noreply.github.com> Co-authored-by: yoloni --- src/lib/utils/index.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index 41b1328810..9b2318ab00 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -836,8 +836,12 @@ export const isYoutubeUrl = (url: string) => { }; export const removeEmojis = (str: string) => { - // Regular expression to match emojis - const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g; + // Use Unicode property escape with the 'v' flag (ES2024) to match all + // standardised emoji sequences, including text-presentation emoji + variation + // selector (e.g. ❤️, ☀️, ✅), keycap sequences (e.g. 1️⃣), ZWJ families + // (e.g. 👨‍👩‍👧‍👦) and flag sequences (e.g. 🏳️‍🌈). + // The previous surrogate-pair regex missed the entire BMP emoji category. + const emojiRegex = /\p{RGI_Emoji}/gv; // Replace emojis with an empty string return str.replace(emojiRegex, '');