feat(tasks): add support for ParadeDB when searching tasks

This commit is contained in:
kolaente
2025-06-17 14:18:10 +02:00
parent e7f5142e3d
commit 3db1ddcee4
7 changed files with 219 additions and 22 deletions

View File

@@ -38,8 +38,13 @@ import (
_ "github.com/mattn/go-sqlite3" // Because.
)
// We only want one instance of the engine, so we can reate it once and reuse it
var x *xorm.Engine
var (
// We only want one instance of the engine, so we can create it once and reuse it
x *xorm.Engine
// paradedbInstalled marks whether the paradedb extension is available
// and can be used for full text search.
paradedbInstalled bool
)
// CreateDBEngine initializes a db engine from the config
func CreateDBEngine() (engine *xorm.Engine, err error) {
@@ -174,6 +179,8 @@ func initPostgresEngine() (engine *xorm.Engine, err error) {
return
}
engine.SetConnMaxLifetime(maxLifetime)
checkParadeDB(engine)
return
}
@@ -241,3 +248,49 @@ func GetDialect() string {
return dialect
}
func checkParadeDB(engine *xorm.Engine) {
if engine.Dialect().URI().DBType != schemas.POSTGRES {
return
}
exists := false
if _, err := engine.SQL("SELECT EXISTS (SELECT 1 FROM pg_extension WHERE extname='pg_search')").Get(&exists); err != nil {
log.Errorf("could not check for paradedb extension: %v", err)
return
}
if !exists {
return
}
paradedbInstalled = true
log.Debug("ParadeDB extension detected, using @@@ search operator")
}
func CreateParadeDBIndexes() error {
if !paradedbInstalled {
return nil
}
// ParadeDB only allows one bm25 index per table, so we create a single index covering both fields
// Use optimized configuration with fast fields and field boosting for better performance
indexSQL := `CREATE INDEX IF NOT EXISTS idx_tasks_paradedb ON tasks USING bm25 (id, title, description, project_id, done)
WITH (
key_field='id',
text_fields='{
"title": {"fast": true, "record": "freq"},
"description": {"fast": true, "record": "freq"}
}',
numeric_fields='{
"project_id": {"fast": true}
}',
boolean_fields='{
"done": {"fast": true}
}'
)`
if _, err := x.Exec(indexSQL); err != nil {
return fmt.Errorf("could not ensure paradedb task index: %w", err)
}
return nil
}

View File

@@ -17,6 +17,8 @@
package db
import (
"strings"
"xorm.io/builder"
"xorm.io/xorm/schemas"
)
@@ -28,8 +30,58 @@ import (
// See https://stackoverflow.com/q/7005302/10924593
func ILIKE(column, search string) builder.Cond {
if Type() == schemas.POSTGRES {
if paradedbInstalled {
return builder.Expr(column+" @@@ ?", search)
}
return builder.Expr(column+" ILIKE ?", "%"+search+"%")
}
return &builder.Like{column, "%" + search + "%"}
}
func ParadeDBAvailable() bool {
return Type() == schemas.POSTGRES && paradedbInstalled
}
// MultiFieldSearch performs an optimized search across multiple fields for ParadeDB
// using a single query rather than multiple OR conditions.
// Falls back to individual ILIKE queries for PGroonga and standard PostgreSQL.
func MultiFieldSearch(fields []string, search string) builder.Cond {
if Type() == schemas.POSTGRES {
if paradedbInstalled {
// For ParadeDB, use the optimized disjunction_max approach for multi-field search
// This provides better relevance scoring than individual OR conditions
if len(fields) == 1 {
// Single field search - use optimized match function
return builder.Expr("id @@@ paradedb.match(?, ?)", fields[0], search)
}
// Multi-field search - use disjunction_max for optimal performance
fieldMatches := make([]string, len(fields))
args := make([]interface{}, len(fields)*2)
for i, field := range fields {
fieldMatches[i] = "paradedb.match(?, ?)"
args[i*2] = field
args[i*2+1] = search
}
return builder.Expr("id @@@ paradedb.disjunction_max(ARRAY["+strings.Join(fieldMatches, ", ")+"])", args...)
}
// For standard PostgreSQL, use ILIKE on all fields
conditions := make([]builder.Cond, len(fields))
for i, field := range fields {
conditions[i] = builder.Expr(field+" ILIKE ?", "%"+search+"%")
}
return builder.Or(conditions...)
}
// For non-PostgreSQL databases, use LIKE on all fields
conditions := make([]builder.Cond, len(fields))
for i, field := range fields {
conditions[i] = &builder.Like{field, "%" + search + "%"}
}
return builder.Or(conditions...)
}
// IsParadeDBInstalled returns true if ParadeDB extension is available
func IsParadeDBInstalled() bool {
return paradedbInstalled
}

64
pkg/db/helpers_test.go Normal file
View File

@@ -0,0 +1,64 @@
// Vikunja is a to-do list application to facilitate your life.
// Copyright 2018-present Vikunja and contributors. All rights reserved.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package db
import (
"testing"
"xorm.io/builder"
)
func TestMultiFieldSearchLogic(t *testing.T) {
// Test the logic without requiring database initialization
fields := []string{"title", "description"}
search := "test"
// Test with ParadeDB enabled
originalParadeDB := paradedbInstalled
paradedbInstalled = true
defer func() { paradedbInstalled = originalParadeDB }()
// We'll test the logic by checking if the right type of condition is created
// without relying on the Type() function that requires DB initialization
// Create conditions manually for each database type
conditions := make([]builder.Cond, len(fields))
for i, field := range fields {
conditions[i] = &builder.Like{field, "%" + search + "%"}
}
fallbackCond := builder.Or(conditions...)
// Test ParadeDB query string generation
fieldQueries := make([]string, len(fields))
for i, field := range fields {
fieldQueries[i] = field + ":" + search
}
expectedParadeDBQuery := "title:test OR description:test"
actualQuery := fieldQueries[0] + " OR " + fieldQueries[1]
if actualQuery != expectedParadeDBQuery {
t.Errorf("Expected ParadeDB query '%s', got '%s'", expectedParadeDBQuery, actualQuery)
}
// Test that fallback condition is created correctly
if fallbackCond == nil {
t.Fatal("Expected non-nil fallback condition")
}
t.Logf("ParadeDB query would be: %s", expectedParadeDBQuery)
t.Logf("Fallback condition created successfully")
}

View File

@@ -21,6 +21,7 @@ import (
"code.vikunja.io/api/pkg/config"
"code.vikunja.io/api/pkg/cron"
"code.vikunja.io/api/pkg/db"
"code.vikunja.io/api/pkg/events"
"code.vikunja.io/api/pkg/files"
"code.vikunja.io/api/pkg/i18n"
@@ -66,6 +67,11 @@ func InitEngines() {
if err != nil {
log.Fatal(err.Error())
}
err = db.CreateParadeDBIndexes()
if err != nil {
log.Fatal(err.Error())
}
}
// FullInitWithoutAsync does a full init without any async handlers (cron or events)

View File

@@ -42,6 +42,11 @@ func SetupTests() {
log.Fatal(err)
}
err = db.CreateParadeDBIndexes()
if err != nil {
log.Fatal(err)
}
err = db.InitTestFixtures(
"files",
"label_tasks",

View File

@@ -1439,19 +1439,6 @@ func TestTaskCollection_ReadAll(t *testing.T) {
},
wantErr: false,
},
{
name: "search for task index",
fields: fields{},
args: args{
search: "number #17",
a: &user.User{ID: 1},
page: 0,
},
want: []*Task{
task33, // has the index 17
},
wantErr: false,
},
{
name: "order by position",
fields: fields{
@@ -1603,6 +1590,40 @@ func TestTaskCollection_ReadAll(t *testing.T) {
// TODO date magic
}
// Here we're explicitly testing search with and without paradeDB. Both return different results but that's
// expected - paradeDB returns more results than other databases with a naive like-search.
if db.ParadeDBAvailable() {
tests = append(tests, testcase{
name: "search for task index",
fields: fields{},
args: args{
search: "number #17",
a: &user.User{ID: 1},
page: 0,
},
want: []*Task{
task17, // has the text #17 in the title
task33, // has the index 17
},
wantErr: false,
})
} else {
tests = append(tests, testcase{
name: "search for task index",
fields: fields{},
args: args{
search: "number #17",
a: &user.User{ID: 1},
page: 0,
},
want: []*Task{
task33, // has the index 17
},
wantErr: false,
})
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
db.LoadAndAssertFixtures(t)
@@ -1629,7 +1650,7 @@ func TestTaskCollection_ReadAll(t *testing.T) {
t.Errorf("Test %s, Task.ReadAll() error = %v, wantErr %v", tt.name, err, tt.wantErr)
return
}
if diff, equal := messagediff.PrettyDiff(got, tt.want); !equal {
if diff, equal := messagediff.PrettyDiff(tt.want, got); !equal {
var is bool
var gotTasks []*Task
gotTasks, is = got.([]*Task)
@@ -1656,7 +1677,7 @@ func TestTaskCollection_ReadAll(t *testing.T) {
return gotIDs[i] < gotIDs[j]
})
diffIDs, _ := messagediff.PrettyDiff(gotIDs, wantIDs)
diffIDs, _ := messagediff.PrettyDiff(wantIDs, gotIDs)
t.Errorf("Test %s, Task.ReadAll() = %v, \nwant %v, \ndiff: %v \n\n diffIDs: %v", tt.name, got, tt.want, diff, diffIDs)
}

View File

@@ -280,11 +280,7 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
var where builder.Cond
if opts.search != "" {
where =
builder.Or(
db.ILIKE("tasks.title", opts.search),
db.ILIKE("tasks.description", opts.search),
)
where = db.MultiFieldSearchWithTableAlias([]string{"title", "description"}, opts.search, "tasks")
searchIndex := getTaskIndexFromSearchString(opts.search)
if searchIndex > 0 {