mirror of
https://github.com/open-webui/open-webui.git
synced 2026-05-21 17:13:00 -05:00
fix: replace legacy surrogate-pair emoji regex with Unicode property escape (#22915)
The previous regular expression used manual surrogate-pair ranges to match emojis and missed a large category of commonly used symbols: /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g This approach only covers emojis encoded as surrogate pairs (U+1F000 – U+1F4FF range), but silently skips BMP emojis that use a text- presentation code point followed by the variation selector U+FE0F, such as ❤️ (U+2764 U+FE0F), ☀️, ✅, ⚡, ⭐, and keycap sequences like 1️⃣, as well as ZWJ family sequences (👨👩👧👦) and flag sequences. Replace with the Unicode property escape \p{RGI_Emoji} using the 'v' (unicodeSets) flag introduced in ES2024. This single pattern covers every standardised emoji sequence defined by Unicode, including all the cases above. Browser support: Chrome 112+, Firefox 116+, Safari 17+, Node.js 20+. All browsers targeted by open-webui already support this syntax. Co-authored-by: Tim Baek <tim@openwebui.com> Co-authored-by: joaoback <156559121+joaoback@users.noreply.github.com> Co-authored-by: yoloni <yoloni@tencent.com>
This commit is contained in:
@@ -836,8 +836,12 @@ export const isYoutubeUrl = (url: string) => {
|
||||
};
|
||||
|
||||
export const removeEmojis = (str: string) => {
|
||||
// Regular expression to match emojis
|
||||
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
|
||||
// Use Unicode property escape with the 'v' flag (ES2024) to match all
|
||||
// standardised emoji sequences, including text-presentation emoji + variation
|
||||
// selector (e.g. ❤️, ☀️, ✅), keycap sequences (e.g. 1️⃣), ZWJ families
|
||||
// (e.g. 👨👩👧👦) and flag sequences (e.g. 🏳️🌈).
|
||||
// The previous surrogate-pair regex missed the entire BMP emoji category.
|
||||
const emojiRegex = /\p{RGI_Emoji}/gv;
|
||||
|
||||
// Replace emojis with an empty string
|
||||
return str.replace(emojiRegex, '');
|
||||
|
||||
Reference in New Issue
Block a user