From bede383535791f541a48bb07d7ee30e1471a4094 Mon Sep 17 00:00:00 2001 From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> Date: Mon, 20 Oct 2025 22:58:25 +0800 Subject: [PATCH] fix errors --- .../db_migrations/013_bigint_size_fields.py | 199 ++++++++++++++++++ src/kohakuhub/db.py | 6 +- src/kohakuhub/main.py | 1 + 3 files changed, 203 insertions(+), 3 deletions(-) create mode 100644 scripts/db_migrations/013_bigint_size_fields.py diff --git a/scripts/db_migrations/013_bigint_size_fields.py b/scripts/db_migrations/013_bigint_size_fields.py new file mode 100644 index 0000000..b212a84 --- /dev/null +++ b/scripts/db_migrations/013_bigint_size_fields.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +""" +Migration 013: Change size fields from INTEGER to BIGINT. + +This fixes the "integer out of range" error when uploading large files (>2GB). +PostgreSQL INTEGER is limited to 2,147,483,647 (~2.1GB), but LFS files +can be much larger. + +Changes: +- File.size: INTEGER → BIGINT +- StagingUpload.size: INTEGER → BIGINT +- LFSObjectHistory.size: INTEGER → BIGINT +""" + +import sys +import os + +# Add src to path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src")) +# Add db_migrations to path (for _migration_utils) +sys.path.insert(0, os.path.dirname(__file__)) + +from kohakuhub.db import db +from kohakuhub.config import cfg +from _migration_utils import should_skip_due_to_future_migrations, check_table_exists + +MIGRATION_NUMBER = 13 + + +def is_applied(db, cfg): + """Check if THIS migration has been applied. + + Returns True if all size columns are already BIGINT. + """ + try: + cursor = db.cursor() + if cfg.app.db_backend == "postgres": + # Check File.size + cursor.execute( + """ + SELECT data_type + FROM information_schema.columns + WHERE table_name='file' AND column_name='size' + """ + ) + result = cursor.fetchone() + if not result or result[0] != "bigint": + return False + + # Check StagingUpload.size + cursor.execute( + """ + SELECT data_type + FROM information_schema.columns + WHERE table_name='stagingupload' AND column_name='size' + """ + ) + result = cursor.fetchone() + if not result or result[0] != "bigint": + return False + + # Check LFSObjectHistory.size + cursor.execute( + """ + SELECT data_type + FROM information_schema.columns + WHERE table_name='lfsobjecthistory' AND column_name='size' + """ + ) + result = cursor.fetchone() + if not result or result[0] != "bigint": + return False + + return True + else: + # SQLite uses INTEGER for both int and bigint, no distinction needed + # SQLite INTEGER can store up to 8 bytes (64-bit), so no migration needed + return True + except Exception: + # Error = treat as applied (safe fallback) + return True + + +def migrate_postgres(): + """Change size fields to BIGINT in PostgreSQL.""" + cursor = db.cursor() + + print("Changing size fields from INTEGER to BIGINT...") + + # File.size + print(" 1. File.size...") + try: + cursor.execute("ALTER TABLE file ALTER COLUMN size TYPE BIGINT") + print(" ✓ Changed File.size to BIGINT") + except Exception as e: + if "does not exist" in str(e).lower(): + print(f" - Table/column doesn't exist: {e}") + else: + raise + + # StagingUpload.size + print(" 2. StagingUpload.size...") + try: + cursor.execute("ALTER TABLE stagingupload ALTER COLUMN size TYPE BIGINT") + print(" ✓ Changed StagingUpload.size to BIGINT") + except Exception as e: + if "does not exist" in str(e).lower(): + print(f" - Table/column doesn't exist: {e}") + else: + raise + + # LFSObjectHistory.size + print(" 3. LFSObjectHistory.size...") + try: + cursor.execute("ALTER TABLE lfsobjecthistory ALTER COLUMN size TYPE BIGINT") + print(" ✓ Changed LFSObjectHistory.size to BIGINT") + except Exception as e: + if "does not exist" in str(e).lower(): + print(f" - Table/column doesn't exist: {e}") + else: + raise + + return True + + +def migrate_sqlite(): + """No migration needed for SQLite. + + SQLite INTEGER can store up to 8 bytes (64-bit signed), which is + equivalent to BIGINT. No schema change is needed. + """ + print("SQLite migration not needed:") + print(" SQLite INTEGER already supports 64-bit values (same as BIGINT)") + print( + " ✓ All size fields can already store values up to 9,223,372,036,854,775,807" + ) + return True + + +def run(): + """Run this migration. + + Returns: + True if successful, False otherwise + """ + db.connect(reuse_if_open=True) + + try: + # Check if should skip due to future migrations + if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg): + print("Migration 013: Skipped (superseded by future migration)") + return True + + # Check if tables exist + required_tables = ["file", "stagingupload", "lfsobjecthistory"] + for table_name in required_tables: + if not check_table_exists(db, table_name): + print(f"Migration 013: Skipped ({table_name} table doesn't exist yet)") + return True + + # Check if already applied + if is_applied(db, cfg): + print("Migration 013: Already applied (all size fields are BIGINT)") + return True + + print("=" * 70) + print("Migration 013: Change size fields to BIGINT") + print("=" * 70) + + # Run migration in transaction + with db.atomic(): + if cfg.app.db_backend == "postgres": + result = migrate_postgres() + else: + result = migrate_sqlite() + + if result: + print("\n" + "=" * 70) + print("Migration 013: ✓ Completed Successfully") + print("=" * 70) + print("\nSummary:") + print(" • File.size: INTEGER → BIGINT") + print(" • StagingUpload.size: INTEGER → BIGINT") + print(" • LFSObjectHistory.size: INTEGER → BIGINT") + print("\nFiles larger than 2GB can now be stored without errors.") + + return result + + except Exception as e: + print(f"\nMigration 013: ✗ Failed - {e}") + import traceback + + traceback.print_exc() + return False + + +if __name__ == "__main__": + success = run() + sys.exit(0 if success else 1) diff --git a/src/kohakuhub/db.py b/src/kohakuhub/db.py index ac922da..70155e3 100644 --- a/src/kohakuhub/db.py +++ b/src/kohakuhub/db.py @@ -174,7 +174,7 @@ class File(BaseModel): Repository, backref="files", on_delete="CASCADE", index=True ) path_in_repo = CharField(index=True) - size = IntegerField(default=0) + size = BigIntegerField(default=0) # Changed from IntegerField to support files >2GB sha256 = CharField(index=True) lfs = BooleanField(default=False) is_deleted = BooleanField(default=False, index=True) # Soft delete flag @@ -197,7 +197,7 @@ class StagingUpload(BaseModel): revision = CharField(index=True) path_in_repo = CharField() sha256 = CharField(default="") - size = IntegerField(default=0) + size = BigIntegerField(default=0) # Changed from IntegerField to support files >2GB upload_id = CharField(null=True) storage_key = CharField() lfs = BooleanField(default=False) @@ -272,7 +272,7 @@ class LFSObjectHistory(BaseModel): ) path_in_repo = CharField(index=True) # File path sha256 = CharField(index=True) # LFS object hash - size = IntegerField() + size = BigIntegerField() # Changed from IntegerField to support files >2GB commit_id = CharField(index=True) # LakeFS commit ID # Optional link to File record for faster lookups # IMPORTANT: on_delete="SET NULL" prevents CASCADE deletion when File is deleted diff --git a/src/kohakuhub/main.py b/src/kohakuhub/main.py index 01a4909..519a6da 100644 --- a/src/kohakuhub/main.py +++ b/src/kohakuhub/main.py @@ -125,6 +125,7 @@ async def public_resolve_head( name=name, revision=revision, path=path, + request=request, user=user, )