mirror of
https://github.com/actualbudget/actual.git
synced 2026-04-28 18:40:34 -05:00
fix: use Unicode-aware SQLite LIKE filtering (#2903)
* fix: use Unicode-aware SQLite filtering * Add release notes, fix type check * Fix code styling
This commit is contained in:
@@ -4,6 +4,8 @@ import { v4 as uuidv4 } from 'uuid';
|
||||
|
||||
import { removeFile, readFile } from '../fs';
|
||||
|
||||
import { unicodeLike } from './unicodeLike';
|
||||
|
||||
function verifyParamTypes(sql, arr) {
|
||||
arr.forEach(val => {
|
||||
if (typeof val !== 'string' && typeof val !== 'number' && val !== null) {
|
||||
@@ -101,7 +103,7 @@ function regexp(regex: string, text: string | null) {
|
||||
|
||||
export function openDatabase(pathOrBuffer: string | Buffer) {
|
||||
const db = new SQL(pathOrBuffer);
|
||||
// Define Unicode-aware LOWER and UPPER implementation.
|
||||
// Define Unicode-aware LOWER, UPPER, and LIKE implementation.
|
||||
// This is necessary because better-sqlite3 uses SQLite build without ICU support.
|
||||
// @ts-expect-error @types/better-sqlite3 does not support setting strict 3rd argument
|
||||
db.function('UNICODE_LOWER', { deterministic: true }, (arg: string | null) =>
|
||||
@@ -112,6 +114,8 @@ export function openDatabase(pathOrBuffer: string | Buffer) {
|
||||
arg?.toUpperCase(),
|
||||
);
|
||||
// @ts-expect-error @types/better-sqlite3 does not support setting strict 3rd argument
|
||||
db.function('UNICODE_LIKE', { deterministic: true }, unicodeLike);
|
||||
// @ts-expect-error @types/better-sqlite3 does not support setting strict 3rd argument
|
||||
db.function('REGEXP', { deterministic: true }, regexp);
|
||||
return db;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
// @ts-strict-ignore
|
||||
import initSqlJS, { type SqlJsStatic, type Database } from '@jlongster/sql.js';
|
||||
|
||||
import { unicodeLike } from './unicodeLike';
|
||||
|
||||
let SQL: SqlJsStatic | null = null;
|
||||
|
||||
export async function init() {
|
||||
@@ -193,7 +195,7 @@ export async function openDatabase(pathOrBuffer?: string | Buffer) {
|
||||
db = new SQL.Database();
|
||||
}
|
||||
|
||||
// Define Unicode-aware LOWER and UPPER implementation.
|
||||
// Define Unicode-aware LOWER, UPPER, and LIKE implementation.
|
||||
// This is necessary because sql.js uses SQLite build without ICU support.
|
||||
//
|
||||
// Note that this function should ideally be created with a deterministic flag
|
||||
@@ -201,6 +203,7 @@ export async function openDatabase(pathOrBuffer?: string | Buffer) {
|
||||
// but SQL.js does not support this: https://github.com/sql-js/sql.js/issues/551
|
||||
db.create_function('UNICODE_LOWER', arg => arg?.toLowerCase());
|
||||
db.create_function('UNICODE_UPPER', arg => arg?.toUpperCase());
|
||||
db.create_function('UNICODE_LIKE', unicodeLike);
|
||||
db.create_function('REGEXP', regexp);
|
||||
return db;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
import { unicodeLike } from './unicodeLike';
|
||||
|
||||
describe('unicode LIKE functionality', () => {
|
||||
it('empty pattern should not match to a value', () => {
|
||||
const result = unicodeLike(null, 'value');
|
||||
|
||||
expect(result).toBe(0);
|
||||
});
|
||||
|
||||
it('empty pattern should not match to null', () => {
|
||||
const result = unicodeLike(null, null);
|
||||
|
||||
expect(result).toBe(0);
|
||||
});
|
||||
|
||||
it('should match special characters', () => {
|
||||
const result = unicodeLike('.*+^${}()|[]\\', '.*+^${}()|[]\\');
|
||||
|
||||
expect(result).toBe(1);
|
||||
});
|
||||
|
||||
it('should use ? as the single character placeholder', () => {
|
||||
const result = unicodeLike('t?st', 'test');
|
||||
|
||||
expect(result).toBe(1);
|
||||
});
|
||||
|
||||
it('should use % as the zero-or-more characters placeholder', () => {
|
||||
const result = unicodeLike('t%st', 'te123st');
|
||||
|
||||
expect(result).toBe(1);
|
||||
});
|
||||
|
||||
it('should ignore case for unicode', () => {
|
||||
const result = unicodeLike('á', 'Ábcdefg');
|
||||
|
||||
expect(result).toBe(1);
|
||||
});
|
||||
|
||||
it('should ignore case for ascii', () => {
|
||||
const result = unicodeLike('a', 'Abcdefg');
|
||||
|
||||
expect(result).toBe(1);
|
||||
});
|
||||
|
||||
it('should treat null value as empty string', () => {
|
||||
const result = unicodeLike('%', null);
|
||||
|
||||
expect(result).toBe(1);
|
||||
});
|
||||
|
||||
it('should not match null value to the string “null”', () => {
|
||||
const result = unicodeLike('null', null);
|
||||
|
||||
expect(result).toBe(0);
|
||||
});
|
||||
});
|
||||
32
packages/loot-core/src/platform/server/sqlite/unicodeLike.ts
Normal file
32
packages/loot-core/src/platform/server/sqlite/unicodeLike.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
// @ts-strict-ignore
|
||||
import LRU from 'lru-cache';
|
||||
|
||||
const likePatternCache = new LRU({ max: 500 });
|
||||
|
||||
export function unicodeLike(
|
||||
pattern: string | null,
|
||||
value: string | null,
|
||||
): number {
|
||||
if (!pattern) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!value) {
|
||||
value = '';
|
||||
}
|
||||
|
||||
let cachedRegExp = likePatternCache.get(pattern);
|
||||
if (!cachedRegExp) {
|
||||
// we don't escape ? and % because we don't know
|
||||
// whether they originate from the user input or from our query compiler.
|
||||
// Maybe improve the query compiler to correctly process these characters?
|
||||
const processedPattern = pattern
|
||||
.replace(/[.*+^${}()|[\]\\]/g, '\\$&')
|
||||
.replaceAll('?', '.')
|
||||
.replaceAll('%', '.*');
|
||||
cachedRegExp = new RegExp(processedPattern, 'i');
|
||||
likePatternCache.set(pattern, cachedRegExp);
|
||||
}
|
||||
|
||||
return cachedRegExp.test(value) ? 1 : 0;
|
||||
}
|
||||
@@ -123,6 +123,28 @@ describe('sheet language', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('`like` should use unicode function', () => {
|
||||
const result = generateSQLWithState(
|
||||
q('transactions')
|
||||
.select('payee')
|
||||
.filter({ 'payee.name': { $like: `%TEST%` } })
|
||||
.serialize(),
|
||||
schemaWithRefs,
|
||||
);
|
||||
expect(result.sql).toMatch(`UNICODE_LIKE('%TEST%', payees1.name)`);
|
||||
});
|
||||
|
||||
it('`notlike` should use unicode function', () => {
|
||||
const result = generateSQLWithState(
|
||||
q('transactions')
|
||||
.select('payee')
|
||||
.filter({ 'payee.name': { $notlike: `%TEST%` } })
|
||||
.serialize(),
|
||||
schemaWithRefs,
|
||||
);
|
||||
expect(result.sql).toMatch(`NOT UNICODE_LIKE('%TEST%', payees1.name)`);
|
||||
});
|
||||
|
||||
it('`select` allows nested functions', () => {
|
||||
const result = generateSQLWithState(
|
||||
q('transactions')
|
||||
|
||||
@@ -720,7 +720,7 @@ const compileOp = saveStack('op', (state, fieldRef, opData) => {
|
||||
}
|
||||
case '$like': {
|
||||
const [left, right] = valArray(state, [lhs, rhs], ['string', 'string']);
|
||||
return `${left} LIKE ${right}`;
|
||||
return `UNICODE_LIKE(${right}, ${left})`;
|
||||
}
|
||||
case '$regexp': {
|
||||
const [left, right] = valArray(state, [lhs, rhs], ['string', 'string']);
|
||||
@@ -728,7 +728,7 @@ const compileOp = saveStack('op', (state, fieldRef, opData) => {
|
||||
}
|
||||
case '$notlike': {
|
||||
const [left, right] = valArray(state, [lhs, rhs], ['string', 'string']);
|
||||
return `(${left} NOT LIKE ${right}\n OR ${left} IS NULL)`;
|
||||
return `(NOT UNICODE_LIKE(${right}, ${left})\n OR ${left} IS NULL)`;
|
||||
}
|
||||
default:
|
||||
throw new CompileError(`Unknown operator: ${op}`);
|
||||
|
||||
6
upcoming-release-notes/2903.md
Normal file
6
upcoming-release-notes/2903.md
Normal file
@@ -0,0 +1,6 @@
|
||||
---
|
||||
category: Bugfix
|
||||
authors: [dymanoid]
|
||||
---
|
||||
|
||||
Use Unicode-aware database queries for filtering and searching.
|
||||
Reference in New Issue
Block a user