Use Unicode-aware implementations of LOWER() and UPPER() in SQL queries (#865)

Fixes #840 by creating application-defined SQL functions
(https://www.sqlite.org/appfunc.html) for Unicode-aware implementations
of `LOWER()` and `UPPER()`. This uses
`String.prototype.toLower/UpperCase()` JS method.

I initially wanted to just redefine `LOWER()` and `UPPER()` but due to
[sql.js not supporting the definition of deterministic
functions](https://github.com/sql-js/sql.js/issues/551), I had to just
define them as separate functions and use that in the appropriate
places. It's probably better like that anyway...
This commit is contained in:
Jakub Kuczys
2023-04-07 22:33:19 +02:00
committed by GitHub
parent aa2e837e7e
commit 835c1a54f7
6 changed files with 57 additions and 29 deletions

View File

@@ -84,7 +84,16 @@ export async function asyncTransaction(db, fn) {
}
export function openDatabase(pathOrBuffer) {
return new Database(pathOrBuffer);
let db = new Database(pathOrBuffer);
// Define Unicode-aware LOWER and UPPER implementation.
// This is necessary because better-sqlite3 uses SQLite build without ICU support.
db.function('UNICODE_LOWER', { deterministic: true }, arg =>
arg?.toLowerCase(),
);
db.function('UNICODE_UPPER', { deterministic: true }, arg =>
arg?.toUpperCase(),
);
return db;
}
export function closeDatabase(db) {

View File

@@ -151,36 +151,48 @@ export async function asyncTransaction(db, fn) {
}
export async function openDatabase(pathOrBuffer?: string | Buffer) {
let db = null;
if (pathOrBuffer) {
if (typeof pathOrBuffer !== 'string') {
return new SQL.Database(pathOrBuffer);
}
db = new SQL.Database(pathOrBuffer);
} else {
let path = pathOrBuffer;
if (path !== ':memory:') {
if (typeof SharedArrayBuffer === 'undefined') {
// @ts-expect-error FS missing in sql.js types
let stream = SQL.FS.open(SQL.FS.readlink(path), 'a+');
await stream.node.contents.readIfFallback();
// @ts-expect-error FS missing in sql.js types
SQL.FS.close(stream);
}
let path = pathOrBuffer;
if (path !== ':memory:') {
if (typeof SharedArrayBuffer === 'undefined') {
// @ts-expect-error FS missing in sql.js types
let stream = SQL.FS.open(SQL.FS.readlink(path), 'a+');
await stream.node.contents.readIfFallback();
// @ts-expect-error FS missing in sql.js types
SQL.FS.close(stream);
db = new SQL.Database(
// @ts-expect-error FS missing in sql.js types
path.includes('/blocked') ? path : SQL.FS.readlink(path),
// @ts-expect-error 2nd argument missed in sql.js types
{ filename: true },
);
db.exec(`
PRAGMA journal_mode=MEMORY;
PRAGMA cache_size=-10000;
`);
}
let db = new SQL.Database(
// @ts-expect-error FS missing in sql.js types
path.includes('/blocked') ? path : SQL.FS.readlink(path),
// @ts-expect-error 2nd argument missed in sql.js types
{ filename: true },
);
db.exec(`
PRAGMA journal_mode=MEMORY;
PRAGMA cache_size=-10000;
`);
return db;
}
}
return new SQL.Database();
if (db === null) {
db = new SQL.Database();
}
// Define Unicode-aware LOWER and UPPER implementation.
// This is necessary because sql.js uses SQLite build without ICU support.
//
// Note that this function should ideally be created with a deterministic flag
// to allow SQLite to better optimize calls to it by factoring them out of inner loops
// but SQL.js does not support this: https://github.com/sql-js/sql.js/issues/551
db.create_function('UNICODE_LOWER', arg => arg?.toLowerCase());
db.create_function('UNICODE_UPPER', arg => arg?.toUpperCase());
return db;
}
export function closeDatabase(db) {

View File

@@ -4,7 +4,7 @@ export async function createPayee(description) {
// Check to make sure no payee already exists with exactly the same
// name
let row = await db.first(
`SELECT id FROM payees WHERE LOWER(name) = ? AND tombstone = 0`,
`SELECT id FROM payees WHERE UNICODE_LOWER(name) = ? AND tombstone = 0`,
[description.toLowerCase()],
);

View File

@@ -564,7 +564,7 @@ const compileFunction = saveStack('function', (state, func) => {
case '$lower': {
validateArgLength(args, 1);
let [arg1] = valArray(state, args, ['string']);
return typed(`LOWER(${arg1})`, 'string');
return typed(`UNICODE_LOWER(${arg1})`, 'string');
}
// integer/float functions

View File

@@ -516,9 +516,10 @@ export async function getOrphanedPayees() {
}
export async function getPayeeByName(name) {
return first(`SELECT * FROM payees WHERE LOWER(name) = ? AND tombstone = 0`, [
name.toLowerCase(),
]);
return first(
`SELECT * FROM payees WHERE UNICODE_LOWER(name) = ? AND tombstone = 0`,
[name.toLowerCase()],
);
}
export function insertPayeeRule(rule) {

View File

@@ -0,0 +1,6 @@
---
category: Bugfix
authors: [Jackenmen]
---
Fix case-insensitive matching of strings for uppercase letters from non-English alphabets