update migration method

2026-03-11 17:34:08 -05:00 · 2025-10-16 16:59:34 +08:00
parent c40fbe6f10
commit c48997824f
12 changed files with 1506 additions and 43 deletions
--- a/scripts/db_migrations/001_repository_schema.py
+++ b/scripts/db_migrations/001_repository_schema.py
@@ -9,11 +9,49 @@ For example: user/myrepo can exist as both a model and dataset.
 import sys
 import os

+# Add src to path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
+# Add db_migrations to path (for _migration_utils)
+sys.path.insert(0, os.path.dirname(__file__))

 from kohakuhub.db import db
 from kohakuhub.config import cfg

+# Import migration utilities
+from _migration_utils import should_skip_due_to_future_migrations
+
+# Migration number for this script
+MIGRATION_NUMBER = 1
+
+
+def is_applied(db, cfg):
+    """Check if THIS migration has been applied.
+
+    Returns True if the unique constraint has been removed from Repository.full_id.
+    """
+    try:
+        cursor = db.cursor()
+        if cfg.app.db_backend == "postgres":
+            cursor.execute(
+                """
+                SELECT indexname, indexdef
+                FROM pg_indexes
+                WHERE tablename = 'repository' AND indexname = 'repository_full_id'
+            """
+            )
+            result = cursor.fetchone()
+            if result and len(result) > 1:
+                # If UNIQUE is in the index definition, migration NOT applied
+                return "UNIQUE" not in result[1].upper()
+            # Index doesn't exist or can't check = assume applied
+            return True
+        else:
+            # SQLite: Hard to detect constraint removal, assume applied if table exists
+            return db.table_exists("repository")
+    except Exception:
+        # Error = treat as applied (skip migration)
+        return True
+

 def check_migration_needed():
    """Check if unique constraint exists on full_id."""
@@ -44,6 +82,11 @@ def run():
    db.connect(reuse_if_open=True)

    try:
+        # Check if any future migration has been applied
+        if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
+            print("Migration 001: Skipped (superseded by future migration)")
+            return True
+
        if not check_migration_needed():
            print("Migration 001: Already applied (constraint removed)")
            return True
--- a/scripts/db_migrations/002_user_org_quotas.py
+++ b/scripts/db_migrations/002_user_org_quotas.py
@@ -12,9 +12,22 @@ import os

 # Add src to path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
+# Add db_migrations to path (for _migration_utils)
+sys.path.insert(0, os.path.dirname(__file__))

-from kohakuhub.db import db, User, Organization
+from kohakuhub.db import db
 from kohakuhub.config import cfg
+from _migration_utils import should_skip_due_to_future_migrations, check_column_exists
+
+MIGRATION_NUMBER = 2
+
+
+def is_applied(db, cfg):
+    """Check if THIS migration has been applied.
+
+    Returns True if User.private_quota_bytes column exists.
+    """
+    return check_column_exists(db, cfg, "user", "private_quota_bytes")


 def check_migration_needed():
@@ -171,6 +184,11 @@ def run():
    db.connect(reuse_if_open=True)

    try:
+        # Check if any future migration has been applied
+        if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
+            print("Migration 002: Skipped (superseded by future migration)")
+            return True
+
        if not check_migration_needed():
            print("Migration 002: Already applied (columns exist)")
            return True
--- a/scripts/db_migrations/003_commit_tracking.py
+++ b/scripts/db_migrations/003_commit_tracking.py
@@ -7,8 +7,22 @@ import sys
 import os

 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
+# Add db_migrations to path (for _migration_utils)
+sys.path.insert(0, os.path.dirname(__file__))

-from kohakuhub.db import db, Commit
+from kohakuhub.db import db
+from kohakuhub.config import cfg
+from _migration_utils import should_skip_due_to_future_migrations, check_table_exists
+
+MIGRATION_NUMBER = 3
+
+
+def is_applied(db, cfg):
+    """Check if THIS migration has been applied.
+
+    Returns True if Commit table exists.
+    """
+    return check_table_exists(db, "commit")


 def check_migration_needed():
@@ -21,12 +35,66 @@ def run():
    db.connect(reuse_if_open=True)

    try:
+        # Check if any future migration has been applied
+        if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
+            print("Migration 003: Skipped (superseded by future migration)")
+            return True
+
        if not check_migration_needed():
            print("Migration 003: Already applied (Commit table exists)")
            return True

        print("Migration 003: Creating Commit table...")
-        db.create_tables([Commit], safe=True)
+
+        cursor = db.cursor()
+        if cfg.app.db_backend == "postgres":
+            # PostgreSQL: Create Commit table
+            cursor.execute(
+                """
+                CREATE TABLE IF NOT EXISTS commit (
+                    id SERIAL PRIMARY KEY,
+                    commit_id VARCHAR(255) NOT NULL,
+                    repo_full_id VARCHAR(255) NOT NULL,
+                    author_id INTEGER NOT NULL,
+                    message TEXT,
+                    created_at TIMESTAMP NOT NULL
+                )
+            """
+            )
+            cursor.execute(
+                "CREATE INDEX IF NOT EXISTS commit_commit_id ON commit(commit_id)"
+            )
+            cursor.execute(
+                "CREATE INDEX IF NOT EXISTS commit_repo_full_id ON commit(repo_full_id)"
+            )
+            cursor.execute(
+                "CREATE INDEX IF NOT EXISTS commit_author_id ON commit(author_id)"
+            )
+        else:
+            # SQLite: Create Commit table
+            cursor.execute(
+                """
+                CREATE TABLE IF NOT EXISTS commit (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    commit_id VARCHAR(255) NOT NULL,
+                    repo_full_id VARCHAR(255) NOT NULL,
+                    author_id INTEGER NOT NULL,
+                    message TEXT,
+                    created_at DATETIME NOT NULL
+                )
+            """
+            )
+            cursor.execute(
+                "CREATE INDEX IF NOT EXISTS commit_commit_id ON commit(commit_id)"
+            )
+            cursor.execute(
+                "CREATE INDEX IF NOT EXISTS commit_repo_full_id ON commit(repo_full_id)"
+            )
+            cursor.execute(
+                "CREATE INDEX IF NOT EXISTS commit_author_id ON commit(author_id)"
+            )
+
+        db.commit()
        print("Migration 003: ✓ Completed")
        return True

--- a/scripts/db_migrations/004_repo_quotas.py
+++ b/scripts/db_migrations/004_repo_quotas.py
@@ -11,9 +11,22 @@ import os

 # Add src to path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
+# Add db_migrations to path (for _migration_utils)
+sys.path.insert(0, os.path.dirname(__file__))

-from kohakuhub.db import db, Repository
+from kohakuhub.db import db
 from kohakuhub.config import cfg
+from _migration_utils import should_skip_due_to_future_migrations, check_column_exists
+
+MIGRATION_NUMBER = 4
+
+
+def is_applied(db, cfg):
+    """Check if THIS migration has been applied.
+
+    Returns True if Repository.quota_bytes column exists.
+    """
+    return check_column_exists(db, cfg, "repository", "quota_bytes")


 def check_migration_needed():
@@ -92,6 +105,11 @@ def run():
    db.connect(reuse_if_open=True)

    try:
+        # Check if any future migration has been applied
+        if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
+            print("Migration 004: Skipped (superseded by future migration)")
+            return True
+
        if not check_migration_needed():
            print("Migration 004: Already applied (columns exist)")
            return True
--- a/scripts/db_migrations/005_profiles_and_invitations.py
+++ b/scripts/db_migrations/005_profiles_and_invitations.py
@@ -19,9 +19,22 @@ if sys.platform == "win32":

 # Add src to path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
+# Add db_migrations to path (for _migration_utils)
+sys.path.insert(0, os.path.dirname(__file__))

 from kohakuhub.db import db
 from kohakuhub.config import cfg
+from _migration_utils import should_skip_due_to_future_migrations, check_column_exists
+
+MIGRATION_NUMBER = 5
+
+
+def is_applied(db, cfg):
+    """Check if THIS migration has been applied.
+
+    Returns True if User.full_name column exists.
+    """
+    return check_column_exists(db, cfg, "user", "full_name")


 def check_migration_needed():
@@ -234,6 +247,11 @@ def run():
    db.connect(reuse_if_open=True)

    try:
+        # Check if any future migration has been applied
+        if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
+            print("Migration 005: Skipped (superseded by future migration)")
+            return True
+
        if not check_migration_needed():
            print("Migration 005: Already applied (columns exist)")
            return True
--- a/scripts/db_migrations/006_invitation_multi_use.py
+++ b/scripts/db_migrations/006_invitation_multi_use.py
@@ -18,9 +18,30 @@ if sys.platform == "win32":

 # Add src to path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
+# Add db_migrations to path (for _migration_utils)
+sys.path.insert(0, os.path.dirname(__file__))

 from kohakuhub.db import db
 from kohakuhub.config import cfg
+from _migration_utils import (
+    should_skip_due_to_future_migrations,
+    check_column_exists,
+    check_table_exists,
+)
+
+MIGRATION_NUMBER = 6
+
+
+def is_applied(db, cfg):
+    """Check if THIS migration has been applied.
+
+    Returns True if Invitation.max_usage column exists.
+    """
+    # First check if invitation table exists
+    if not check_table_exists(db, "invitation"):
+        # Table doesn't exist, migration not applicable
+        return True
+    return check_column_exists(db, cfg, "invitation", "max_usage")


 def check_migration_needed():
@@ -155,8 +176,32 @@ def run():
    db.connect(reuse_if_open=True)

    try:
-        if not check_migration_needed():
-            print("Migration 006: Already applied (columns exist)")
+        # Check if any future migration has been applied
+        if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
+            print("Migration 006: Skipped (superseded by future migration)")
+            return True
+
+        migration_needed = check_migration_needed()
+        if not migration_needed:
+            # Check if table exists to provide better message
+            cursor = db.cursor()
+            if cfg.app.db_backend == "postgres":
+                cursor.execute(
+                    "SELECT table_name FROM information_schema.tables WHERE table_name='invitation'"
+                )
+                table_exists = cursor.fetchone() is not None
+            else:
+                cursor.execute(
+                    "SELECT name FROM sqlite_master WHERE type='table' AND name='invitation'"
+                )
+                table_exists = cursor.fetchone() is not None
+
+            if not table_exists:
+                print(
+                    "Migration 006: Skipped (invitation table doesn't exist yet, will be created by init_db)"
+                )
+            else:
+                print("Migration 006: Already applied (columns exist)")
            return True

        print("Migration 006: Adding multi-use support to Invitation table...")
--- a/scripts/db_migrations/007_avatar_support.py
+++ b/scripts/db_migrations/007_avatar_support.py
@@ -18,17 +18,51 @@ if sys.platform == "win32":

 # Add src to path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
+# Add db_migrations to path (for _migration_utils)
+sys.path.insert(0, os.path.dirname(__file__))

 from kohakuhub.db import db
 from kohakuhub.config import cfg
+from _migration_utils import should_skip_due_to_future_migrations, check_column_exists
+
+MIGRATION_NUMBER = 7
+
+
+def is_applied(db, cfg):
+    """Check if THIS migration has been applied.
+
+    Returns True if User.avatar column exists.
+    """
+    return check_column_exists(db, cfg, "user", "avatar")


 def check_migration_needed():
-    """Check if this migration needs to run by checking if columns exist."""
+    """Check if this migration needs to run.
+
+    Returns True only if:
+    - User table exists (schema version > 0)
+    - AND User.avatar doesn't exist (schema version < 7)
+
+    Returns False if:
+    - User table doesn't exist (fresh install, version 0, will be created by init_db)
+    - OR User.avatar exists (already at version 7+)
+    """
    cursor = db.cursor()

    if cfg.app.db_backend == "postgres":
-        # Check if User.avatar exists
+        # First check if user table exists
+        cursor.execute(
+            """
+            SELECT table_name
+            FROM information_schema.tables
+            WHERE table_name='user'
+        """
+        )
+        if cursor.fetchone() is None:
+            # Fresh database, tables will be created by init_db() with final schema
+            return False
+
+        # Table exists, check if avatar column exists
        cursor.execute(
            """
            SELECT column_name
@@ -38,7 +72,15 @@ def check_migration_needed():
        )
        return cursor.fetchone() is None
    else:
-        # SQLite: Check via PRAGMA
+        # SQLite: First check if table exists
+        cursor.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name='user'"
+        )
+        if cursor.fetchone() is None:
+            # Fresh database, tables will be created by init_db() with final schema
+            return False
+
+        # Table exists, check via PRAGMA if avatar column exists
        cursor.execute("PRAGMA table_info(user)")
        columns = [row[1] for row in cursor.fetchall()]
        return "avatar" not in columns
@@ -133,8 +175,32 @@ def run():
    db.connect(reuse_if_open=True)

    try:
-        if not check_migration_needed():
-            print("Migration 007: Already applied (columns exist)")
+        # Check if any future migration has been applied
+        if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
+            print("Migration 007: Skipped (superseded by future migration)")
+            return True
+
+        migration_needed = check_migration_needed()
+        if not migration_needed:
+            # Check if table exists to provide better message
+            cursor = db.cursor()
+            if cfg.app.db_backend == "postgres":
+                cursor.execute(
+                    "SELECT table_name FROM information_schema.tables WHERE table_name='user'"
+                )
+                table_exists = cursor.fetchone() is not None
+            else:
+                cursor.execute(
+                    "SELECT name FROM sqlite_master WHERE type='table' AND name='user'"
+                )
+                table_exists = cursor.fetchone() is not None
+
+            if not table_exists:
+                print(
+                    "Migration 007: Skipped (user table doesn't exist yet, will be created by init_db)"
+                )
+            else:
+                print("Migration 007: Already applied (columns exist)")
            return True

        print("Migration 007: Adding avatar support to User and Organization tables...")
--- a/scripts/db_migrations/008_foreignkey_refactoring.py
+++ b/scripts/db_migrations/008_foreignkey_refactoring.py
@@ -0,0 +1,791 @@
+#!/usr/bin/env python3
+"""
+Migration 008: Major database refactoring - Merge User/Organization + Add ForeignKeys.
+
+BREAKING CHANGE: This is a major schema refactoring.
+BACKUP YOUR DATABASE BEFORE RUNNING THIS MIGRATION!
+
+Changes:
+1. Merge Organization table into User table (add is_org flag)
+2. Convert all integer ID fields to proper ForeignKey constraints
+3. Add owner fields to File and Commit for denormalized access
+
+New schema:
+- User.is_org: distinguishes users (FALSE) from organizations (TRUE)
+- EmailVerification.user: ForeignKey to User
+- Session.user: ForeignKey to User
+- Token.user: ForeignKey to User
+- Repository.owner: ForeignKey to User (can be user or org)
+- File.repository: ForeignKey to Repository
+- File.owner: ForeignKey to User (denormalized from repository.owner)
+- StagingUpload.repository: ForeignKey to Repository
+- StagingUpload.uploader: ForeignKey to User
+- UserOrganization.user: ForeignKey to User
+- UserOrganization.organization: ForeignKey to User (is_org=TRUE)
+- Commit.repository: ForeignKey to Repository
+- Commit.author: ForeignKey to User (who made commit)
+- Commit.owner: ForeignKey to User (repository owner, denormalized)
+- LFSObjectHistory.repository: ForeignKey to Repository
+- LFSObjectHistory.file: ForeignKey to File (nullable)
+- SSHKey.user: ForeignKey to User
+- Invitation.created_by: ForeignKey to User
+- Invitation.used_by: ForeignKey to User (nullable)
+
+This migration cannot be easily rolled back. Test thoroughly before deploying to production!
+"""
+
+import sys
+import os
+
+# Fix Windows encoding issues
+if sys.platform == "win32":
+    import io
+
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
+
+# Add src to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
+# Add db_migrations to path (for _migration_utils)
+sys.path.insert(0, os.path.dirname(__file__))
+
+from kohakuhub.db import db
+from kohakuhub.config import cfg
+from _migration_utils import should_skip_due_to_future_migrations
+
+MIGRATION_NUMBER = 8
+
+
+def is_applied(db, cfg):
+    """Check if THIS migration has been applied.
+
+    Returns True if User.is_org column exists (signature of migration 008).
+
+    NOTE: We implement this inline without importing check_column_exists
+    to avoid any potential issues with circular imports or schema mismatches.
+    """
+    try:
+        cursor = db.cursor()
+        if cfg.app.db_backend == "postgres":
+            cursor.execute(
+                """
+                SELECT column_name
+                FROM information_schema.columns
+                WHERE table_name='user' AND column_name='is_org'
+            """,
+            )
+            return cursor.fetchone() is not None
+        else:
+            # SQLite
+            cursor.execute("PRAGMA table_info(user)")
+            columns = [row[1] for row in cursor.fetchall()]
+            return "is_org" in columns
+    except Exception:
+        # Error = treat as applied (safe fallback to skip this migration)
+        return True
+
+
+def check_migration_needed():
+    """Check if this migration needs to run.
+
+    Returns True only if:
+    - User table exists (schema version > 0)
+    - AND User.is_org doesn't exist (schema version < 8)
+
+    Returns False if:
+    - User table doesn't exist (fresh install, version 0, will be created by init_db)
+    - OR User.is_org exists (already at version 8+)
+    """
+    cursor = db.cursor()
+
+    if cfg.app.db_backend == "postgres":
+        # First check if user table exists
+        cursor.execute(
+            """
+            SELECT table_name
+            FROM information_schema.tables
+            WHERE table_name='user'
+        """
+        )
+        if cursor.fetchone() is None:
+            # Fresh database, tables will be created by init_db() with final schema
+            return False
+
+        # Table exists, check if is_org column exists
+        cursor.execute(
+            """
+            SELECT column_name
+            FROM information_schema.columns
+            WHERE table_name='user' AND column_name='is_org'
+        """
+        )
+        return cursor.fetchone() is None
+    else:
+        # SQLite: First check if table exists
+        cursor.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name='user'"
+        )
+        if cursor.fetchone() is None:
+            # Fresh database, tables will be created by init_db() with final schema
+            return False
+
+        # Table exists, check via PRAGMA if is_org column exists
+        cursor.execute("PRAGMA table_info(user)")
+        columns = [row[1] for row in cursor.fetchall()]
+        return "is_org" not in columns
+
+
+def migrate_sqlite():
+    """Migrate SQLite database.
+
+    Strategy:
+    1. Add new columns to User table (is_org, description, make email/password nullable)
+    2. Migrate Organization data into User table
+    3. Create temporary mapping table for old org IDs
+    4. Update all FK references
+    5. Drop Organization table
+    6. Rebuild tables with proper ForeignKey constraints
+    """
+    cursor = db.cursor()
+
+    print("\n=== Phase 1: Backup Warning ===")
+    print("⚠️  This migration modifies the database schema significantly.")
+    print("⚠️  BACKUP YOUR DATABASE before proceeding!")
+    print("")
+
+    # Allow auto-confirmation via environment variable (for Docker/CI)
+    auto_confirm = os.environ.get("KOHAKU_HUB_AUTO_MIGRATE", "").lower() in (
+        "true",
+        "1",
+        "yes",
+    )
+    if auto_confirm:
+        print("  Auto-confirmation enabled (KOHAKU_HUB_AUTO_MIGRATE=true)")
+        response = "yes"
+    else:
+        response = input("Type 'yes' to continue: ")
+
+    if response.lower() != "yes":
+        print("Migration cancelled.")
+        return False
+
+    print("\n=== Phase 2: Add new columns to User table ===")
+
+    # Add is_org column
+    try:
+        cursor.execute("ALTER TABLE user ADD COLUMN is_org BOOLEAN DEFAULT FALSE")
+        print("  ✓ Added User.is_org")
+    except Exception as e:
+        if "duplicate column" in str(e).lower():
+            print("  - User.is_org already exists")
+        else:
+            raise
+
+    # Add description column (for orgs)
+    try:
+        cursor.execute("ALTER TABLE user ADD COLUMN description TEXT DEFAULT NULL")
+        print("  ✓ Added User.description")
+    except Exception as e:
+        if "duplicate column" in str(e).lower():
+            print("  - User.description already exists")
+        else:
+            raise
+
+    # Add normalized_name column (for O(1) conflict checking)
+    try:
+        cursor.execute("ALTER TABLE user ADD COLUMN normalized_name TEXT")
+        print("  ✓ Added User.normalized_name")
+    except Exception as e:
+        if "duplicate column" in str(e).lower():
+            print("  - User.normalized_name already exists")
+        else:
+            raise
+
+    # Note: SQLite doesn't support ALTER COLUMN to make existing columns nullable
+    # This will require table recreation, which we'll handle in a full rebuild
+
+    db.commit()
+
+    # Populate normalized_name for existing users
+    print("  Populating User.normalized_name for existing users...")
+    cursor.execute("SELECT id, username FROM user")
+    users = cursor.fetchall()
+
+    for user_id, username in users:
+        # Normalize: lowercase, remove hyphens and underscores
+        normalized = username.lower().replace("-", "").replace("_", "")
+        cursor.execute(
+            "UPDATE user SET normalized_name = ? WHERE id = ?", (normalized, user_id)
+        )
+
+    db.commit()
+    print(f"    ✓ Populated normalized_name for {len(users)} existing users")
+
+    print("\n=== Phase 3: Migrate Organization data into User table ===")
+
+    # Check if organization table exists
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='organization'"
+    )
+    if cursor.fetchone():
+        # Get all organizations
+        cursor.execute(
+            "SELECT id, name, description, private_quota_bytes, public_quota_bytes, "
+            "private_used_bytes, public_used_bytes, bio, website, social_media, "
+            "avatar, avatar_updated_at, created_at FROM organization"
+        )
+        orgs = cursor.fetchall()
+
+        print(f"  Found {len(orgs)} organization(s) to migrate")
+
+        # Create mapping table for old org IDs -> new user IDs
+        cursor.execute(
+            "CREATE TABLE IF NOT EXISTS _org_id_mapping (old_org_id INTEGER, new_user_id INTEGER)"
+        )
+
+        for org in orgs:
+            (
+                org_id,
+                name,
+                description,
+                private_quota_bytes,
+                public_quota_bytes,
+                private_used_bytes,
+                public_used_bytes,
+                bio,
+                website,
+                social_media,
+                avatar,
+                avatar_updated_at,
+                created_at,
+            ) = org
+
+            # Normalize name for conflict checking
+            normalized = name.lower().replace("-", "").replace("_", "")
+
+            # Insert organization as user with is_org=TRUE
+            # email and password_hash will be NULL for organizations
+            cursor.execute(
+                """
+                INSERT INTO user (username, normalized_name, is_org, email, password_hash, email_verified, is_active,
+                                 private_quota_bytes, public_quota_bytes, private_used_bytes, public_used_bytes,
+                                 full_name, bio, description, website, social_media,
+                                 avatar, avatar_updated_at, created_at)
+                VALUES (?, ?, TRUE, NULL, NULL, FALSE, TRUE, ?, ?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
+            """,
+                (
+                    name,
+                    normalized,
+                    private_quota_bytes,
+                    public_quota_bytes,
+                    private_used_bytes,
+                    public_used_bytes,
+                    bio,
+                    description,
+                    website,
+                    social_media,
+                    avatar,
+                    avatar_updated_at,
+                    created_at,
+                ),
+            )
+
+            new_user_id = cursor.lastrowid
+
+            # Store mapping
+            cursor.execute(
+                "INSERT INTO _org_id_mapping (old_org_id, new_user_id) VALUES (?, ?)",
+                (org_id, new_user_id),
+            )
+
+            print(f"  ✓ Migrated organization '{name}' (id {org_id} -> {new_user_id})")
+
+        db.commit()
+        print(f"  ✓ All {len(orgs)} organizations migrated to User table")
+    else:
+        print("  - No organization table found, skipping")
+
+    print("\n=== Phase 4: Update Foreign Key references ===")
+
+    # 4a. Update UserOrganization.organization to reference new User IDs
+    cursor.execute("SELECT id, organization FROM userorganization")
+    memberships = cursor.fetchall()
+
+    for membership_id, old_org_id in memberships:
+        cursor.execute(
+            "SELECT new_user_id FROM _org_id_mapping WHERE old_org_id = ?",
+            (old_org_id,),
+        )
+        result = cursor.fetchone()
+        if result:
+            new_user_id = result[0]
+            cursor.execute(
+                "UPDATE userorganization SET organization = ? WHERE id = ?",
+                (new_user_id, membership_id),
+            )
+
+    db.commit()
+    print(f"  ✓ Updated {len(memberships)} UserOrganization records")
+
+    # 4b. Add owner column to File table (denormalized from repository.owner)
+    print("  Adding File.owner_id column...")
+    try:
+        cursor.execute("ALTER TABLE file ADD COLUMN owner_id INTEGER")
+        print("    ✓ Added File.owner_id column")
+    except Exception as e:
+        if "duplicate column" not in str(e).lower():
+            raise
+        print("    - File.owner_id already exists")
+
+    # Update File.owner_id from Repository.owner_id
+    cursor.execute(
+        """
+        UPDATE file SET owner_id = (
+            SELECT owner_id FROM repository
+            WHERE repository.full_id = file.repo_full_id
+            LIMIT 1
+        )
+    """
+    )
+    print(f"    ✓ Updated File.owner_id for all files")
+    db.commit()
+
+    # 4c. Add owner column to Commit table (repository owner)
+    print("  Adding Commit.owner_id column...")
+    try:
+        cursor.execute("ALTER TABLE commit ADD COLUMN owner_id INTEGER")
+        print("    ✓ Added Commit.owner_id column")
+    except Exception as e:
+        if "duplicate column" not in str(e).lower():
+            raise
+        print("    - Commit.owner_id already exists")
+
+    # Update Commit.owner_id from Repository.owner_id
+    cursor.execute(
+        """
+        UPDATE commit SET owner_id = (
+            SELECT owner_id FROM repository
+            WHERE repository.full_id = commit.repo_full_id
+            LIMIT 1
+        )
+    """
+    )
+    print(f"    ✓ Updated Commit.owner_id for all commits")
+    db.commit()
+
+    # 4d. Add uploader column to StagingUpload table
+    print("  Adding StagingUpload.uploader_id column...")
+    try:
+        cursor.execute(
+            "ALTER TABLE stagingupload ADD COLUMN uploader_id INTEGER DEFAULT NULL"
+        )
+        print("    ✓ Added StagingUpload.uploader_id column")
+    except Exception as e:
+        if "duplicate column" not in str(e).lower():
+            raise
+        print("    - StagingUpload.uploader_id already exists")
+    db.commit()
+
+    # 4e. Add file FK column to LFSObjectHistory table
+    print("  Adding LFSObjectHistory.file_id column...")
+    try:
+        cursor.execute(
+            "ALTER TABLE lfsobjecthistory ADD COLUMN file_id INTEGER DEFAULT NULL"
+        )
+        print("    ✓ Added LFSObjectHistory.file_id column")
+    except Exception as e:
+        if "duplicate column" not in str(e).lower():
+            raise
+        print("    - LFSObjectHistory.file_id already exists")
+
+    # Update LFSObjectHistory.file_id from File table
+    cursor.execute(
+        """
+        UPDATE lfsobjecthistory SET file_id = (
+            SELECT id FROM file
+            WHERE file.repo_full_id = lfsobjecthistory.repo_full_id
+            AND file.path_in_repo = lfsobjecthistory.path_in_repo
+            LIMIT 1
+        )
+    """
+    )
+    print(f"    ✓ Updated LFSObjectHistory.file_id for all records")
+    db.commit()
+
+    print("\n=== Phase 5: Cleanup ===")
+
+    # Drop temporary mapping table
+    try:
+        cursor.execute("DROP TABLE _org_id_mapping")
+        print("  ✓ Dropped temporary mapping table")
+    except Exception as e:
+        print(f"  - Failed to drop mapping table (non-fatal): {e}")
+
+    # Drop Organization table
+    try:
+        cursor.execute("DROP TABLE organization")
+        print("  ✓ Dropped Organization table")
+        db.commit()
+    except Exception as e:
+        print(f"  - Failed to drop organization table: {e}")
+        # Non-fatal, continue
+
+    print("\n⚠️  IMPORTANT: Foreign key constraints require table recreation in SQLite")
+    print("⚠️  Peewee will handle this automatically on next application startup")
+    print("⚠️  The application will recreate tables with proper ForeignKey constraints")
+
+    return True
+
+
+def migrate_postgres():
+    """Migrate PostgreSQL database."""
+    cursor = db.cursor()
+
+    print("\n=== Phase 1: Backup Warning ===")
+    print("⚠️  This migration modifies the database schema significantly.")
+    print("⚠️  BACKUP YOUR DATABASE before proceeding!")
+    print("")
+
+    # Allow auto-confirmation via environment variable (for Docker/CI)
+    auto_confirm = os.environ.get("KOHAKU_HUB_AUTO_MIGRATE", "").lower() in (
+        "true",
+        "1",
+        "yes",
+    )
+    if auto_confirm:
+        print("  Auto-confirmation enabled (KOHAKU_HUB_AUTO_MIGRATE=true)")
+        response = "yes"
+    else:
+        response = input("Type 'yes' to continue: ")
+
+    if response.lower() != "yes":
+        print("Migration cancelled.")
+        return False
+
+    print("\n=== Phase 2: Add new columns to User table ===")
+
+    # Add is_org column
+    try:
+        cursor.execute('ALTER TABLE "user" ADD COLUMN is_org BOOLEAN DEFAULT FALSE')
+        print("  ✓ Added User.is_org")
+    except Exception as e:
+        if "already exists" in str(e).lower():
+            print("  - User.is_org already exists")
+            db.rollback()
+        else:
+            raise
+
+    # Add description column
+    try:
+        cursor.execute('ALTER TABLE "user" ADD COLUMN description TEXT DEFAULT NULL')
+        print("  ✓ Added User.description")
+    except Exception as e:
+        if "already exists" in str(e).lower():
+            print("  - User.description already exists")
+            db.rollback()
+        else:
+            raise
+
+    # Add normalized_name column (for O(1) conflict checking)
+    try:
+        cursor.execute('ALTER TABLE "user" ADD COLUMN normalized_name TEXT')
+        print("  ✓ Added User.normalized_name")
+    except Exception as e:
+        if "already exists" in str(e).lower():
+            print("  - User.normalized_name already exists")
+            db.rollback()
+        else:
+            raise
+
+    # Make email and password_hash nullable
+    try:
+        cursor.execute('ALTER TABLE "user" ALTER COLUMN email DROP NOT NULL')
+        cursor.execute('ALTER TABLE "user" ALTER COLUMN password_hash DROP NOT NULL')
+        print("  ✓ Made email and password_hash nullable")
+    except Exception as e:
+        print(f"  - Failed to make columns nullable (may already be nullable): {e}")
+        db.rollback()
+
+    db.commit()
+
+    # Populate normalized_name for existing users
+    print("  Populating User.normalized_name for existing users...")
+    cursor.execute('SELECT id, username FROM "user"')
+    users = cursor.fetchall()
+
+    for user_id, username in users:
+        # Normalize: lowercase, remove hyphens and underscores
+        normalized = username.lower().replace("-", "").replace("_", "")
+        cursor.execute(
+            'UPDATE "user" SET normalized_name = %s WHERE id = %s',
+            (normalized, user_id),
+        )
+
+    db.commit()
+    print(f"    ✓ Populated normalized_name for {len(users)} existing users")
+
+    print("\n=== Phase 3: Migrate Organization data into User table ===")
+
+    # Check if organization table exists
+    cursor.execute(
+        "SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'organization')"
+    )
+    if cursor.fetchone()[0]:
+        # Get all organizations
+        cursor.execute(
+            "SELECT id, name, description, private_quota_bytes, public_quota_bytes, "
+            "private_used_bytes, public_used_bytes, bio, website, social_media, "
+            "avatar, avatar_updated_at, created_at FROM organization"
+        )
+        orgs = cursor.fetchall()
+
+        print(f"  Found {len(orgs)} organization(s) to migrate")
+
+        # Create temporary mapping table
+        cursor.execute(
+            "CREATE TEMP TABLE _org_id_mapping (old_org_id INTEGER, new_user_id INTEGER)"
+        )
+
+        for org in orgs:
+            (
+                org_id,
+                name,
+                description,
+                private_quota_bytes,
+                public_quota_bytes,
+                private_used_bytes,
+                public_used_bytes,
+                bio,
+                website,
+                social_media,
+                avatar,
+                avatar_updated_at,
+                created_at,
+            ) = org
+
+            # Normalize name for conflict checking
+            normalized = name.lower().replace("-", "").replace("_", "")
+
+            # Insert organization as user with is_org=TRUE
+            cursor.execute(
+                """
+                INSERT INTO "user" (username, normalized_name, is_org, email, password_hash, email_verified, is_active,
+                                   private_quota_bytes, public_quota_bytes, private_used_bytes, public_used_bytes,
+                                   full_name, bio, description, website, social_media,
+                                   avatar, avatar_updated_at, created_at)
+                VALUES (%s, %s, TRUE, NULL, NULL, FALSE, TRUE, %s, %s, %s, %s, NULL, %s, %s, %s, %s, %s, %s, %s)
+                RETURNING id
+            """,
+                (
+                    name,
+                    normalized,
+                    private_quota_bytes,
+                    public_quota_bytes,
+                    private_used_bytes,
+                    public_used_bytes,
+                    bio,
+                    description,
+                    website,
+                    social_media,
+                    avatar,
+                    avatar_updated_at,
+                    created_at,
+                ),
+            )
+
+            new_user_id = cursor.fetchone()[0]
+
+            # Store mapping
+            cursor.execute(
+                "INSERT INTO _org_id_mapping (old_org_id, new_user_id) VALUES (%s, %s)",
+                (org_id, new_user_id),
+            )
+
+            print(f"  ✓ Migrated organization '{name}' (id {org_id} -> {new_user_id})")
+
+        db.commit()
+        print(f"  ✓ All {len(orgs)} organizations migrated to User table")
+    else:
+        print("  - No organization table found, skipping")
+
+    print("\n=== Phase 4: Update Foreign Key references ===")
+
+    # 4a. Update UserOrganization.organization to reference new User IDs
+    cursor.execute(
+        "UPDATE userorganization SET organization = m.new_user_id "
+        "FROM _org_id_mapping m WHERE userorganization.organization = m.old_org_id"
+    )
+    affected = cursor.rowcount
+    db.commit()
+    print(f"  ✓ Updated {affected} UserOrganization records")
+
+    # 4b. Add owner column to File table (denormalized from repository.owner)
+    print("  Adding File.owner_id column...")
+    try:
+        cursor.execute("ALTER TABLE file ADD COLUMN owner_id INTEGER")
+        print("    ✓ Added File.owner_id column")
+    except Exception as e:
+        if "already exists" in str(e).lower():
+            print("    - File.owner_id already exists")
+            db.rollback()
+        else:
+            raise
+
+    # Update File.owner_id from Repository.owner_id
+    cursor.execute(
+        """
+        UPDATE file SET owner_id = repository.owner_id
+        FROM repository
+        WHERE repository.full_id = file.repo_full_id
+    """
+    )
+    print(f"    ✓ Updated File.owner_id for all files")
+    db.commit()
+
+    # 4c. Add owner column to Commit table (repository owner)
+    print("  Adding Commit.owner_id column...")
+    try:
+        cursor.execute("ALTER TABLE commit ADD COLUMN owner_id INTEGER")
+        print("    ✓ Added Commit.owner_id column")
+    except Exception as e:
+        if "already exists" in str(e).lower():
+            print("    - Commit.owner_id already exists")
+            db.rollback()
+        else:
+            raise
+
+    # Update Commit.owner_id from Repository.owner_id
+    cursor.execute(
+        """
+        UPDATE commit SET owner_id = repository.owner_id
+        FROM repository
+        WHERE repository.full_id = commit.repo_full_id
+    """
+    )
+    print(f"    ✓ Updated Commit.owner_id for all commits")
+    db.commit()
+
+    # 4d. Add uploader column to StagingUpload table
+    print("  Adding StagingUpload.uploader_id column...")
+    try:
+        cursor.execute(
+            "ALTER TABLE stagingupload ADD COLUMN uploader_id INTEGER DEFAULT NULL"
+        )
+        print("    ✓ Added StagingUpload.uploader_id column")
+    except Exception as e:
+        if "already exists" in str(e).lower():
+            print("    - StagingUpload.uploader_id already exists")
+            db.rollback()
+        else:
+            raise
+    db.commit()
+
+    # 4e. Add file FK column to LFSObjectHistory table
+    print("  Adding LFSObjectHistory.file_id column...")
+    try:
+        cursor.execute(
+            "ALTER TABLE lfsobjecthistory ADD COLUMN file_id INTEGER DEFAULT NULL"
+        )
+        print("    ✓ Added LFSObjectHistory.file_id column")
+    except Exception as e:
+        if "already exists" in str(e).lower():
+            print("    - LFSObjectHistory.file_id already exists")
+            db.rollback()
+        else:
+            raise
+
+    # Update LFSObjectHistory.file_id from File table
+    cursor.execute(
+        """
+        UPDATE lfsobjecthistory SET file_id = file.id
+        FROM file
+        WHERE file.repo_full_id = lfsobjecthistory.repo_full_id
+        AND file.path_in_repo = lfsobjecthistory.path_in_repo
+    """
+    )
+    print(f"    ✓ Updated LFSObjectHistory.file_id for all records")
+    db.commit()
+
+    print("\n=== Phase 5: Drop old Organization table ===")
+
+    try:
+        cursor.execute("DROP TABLE IF EXISTS organization CASCADE")
+        print("  ✓ Dropped Organization table")
+        db.commit()
+    except Exception as e:
+        print(f"  - Failed to drop organization table: {e}")
+        db.rollback()
+
+    print("\n⚠️  IMPORTANT: Table recreation with Foreign Keys")
+    print("⚠️  Peewee ORM will handle ForeignKey constraint creation on next startup")
+    print("⚠️  You may need to restart the application for changes to take effect")
+
+    return True
+
+
+def run():
+    """Run this migration."""
+    db.connect(reuse_if_open=True)
+
+    try:
+        # Check if any future migration has been applied (for extensibility)
+        if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
+            print("Migration 008: Skipped (superseded by future migration)")
+            return True
+
+        migration_needed = check_migration_needed()
+        if not migration_needed:
+            # Check if table exists to provide better message
+            cursor = db.cursor()
+            if cfg.app.db_backend == "postgres":
+                cursor.execute(
+                    "SELECT table_name FROM information_schema.tables WHERE table_name='user'"
+                )
+                table_exists = cursor.fetchone() is not None
+            else:
+                cursor.execute(
+                    "SELECT name FROM sqlite_master WHERE type='table' AND name='user'"
+                )
+                table_exists = cursor.fetchone() is not None
+
+            if not table_exists:
+                print(
+                    "Migration 008: Skipped (user table doesn't exist yet, will be created by init_db)"
+                )
+            else:
+                print("Migration 008: Already applied (User.is_org exists)")
+            return True
+
+        print("=" * 70)
+        print("Migration 008: Major Database Refactoring")
+        print("Merging User/Organization tables + Adding ForeignKey constraints")
+        print("=" * 70)
+
+        if cfg.app.db_backend == "postgres":
+            result = migrate_postgres()
+        else:
+            result = migrate_sqlite()
+
+        if result:
+            print("\n" + "=" * 70)
+            print("Migration 008: ✓ Completed Successfully")
+            print("=" * 70)
+            print("\nNext steps:")
+            print("1. Restart the application to apply ForeignKey constraints")
+            print("2. Test all functionality thoroughly")
+            print("3. Monitor logs for any foreign key constraint violations")
+
+        return result
+
+    except Exception as e:
+        print(f"\nMigration 008: ✗ Failed - {e}")
+        import traceback
+
+        traceback.print_exc()
+        return False
+    finally:
+        db.close()
+
+
+if __name__ == "__main__":
+    success = run()
+    sys.exit(0 if success else 1)
--- a/scripts/db_migrations/README.md
+++ b/scripts/db_migrations/README.md
@@ -8,24 +8,61 @@ This directory contains database migration scripts for KohakuHub.
 2. **Sequential execution**: Migrations run in numerical order (001, 002, 003, etc.)
 3. **Idempotent**: Safe to run multiple times - already-applied migrations are skipped
 4. **Auto-run**: Migrations automatically run on container startup via `docker/startup.py`
+5. **Self-healing**: Each migration automatically checks if ANY future migration has been applied
+   - If migration 005 finds that migration 008 is applied, it skips (changes already included)
+   - Works automatically for any future migrations (009, 010, etc.)
+   - No hardcoding - migrations discover and check future migrations dynamically

 ## Migration Order

-| # | Name | Description |
-|---|------|-------------|
-| 001 | repository_schema | Remove unique constraint from Repository.full_id |
-| 002 | user_org_quotas | Add private/public quota fields to User/Organization |
-| 003 | commit_tracking | Add Commit table for tracking user commits |
-| 004 | repo_quotas | Add quota/used_bytes fields to Repository |
+| # | Name | Description | Notes |
+|---|------|-------------|-------|
+| 001 | repository_schema | Remove unique constraint from Repository.full_id | Skipped if post-008 |
+| 002 | user_org_quotas | Add private/public quota fields to User/Organization | Skipped if post-008 |
+| 003 | commit_tracking | Add Commit table for tracking user commits | Skipped if post-008 |
+| 004 | repo_quotas | Add quota/used_bytes fields to Repository | Skipped if post-008 |
+| 005 | profiles_and_invitations | Add profile fields and invitation system | Skipped if post-008 |
+| 006 | invitation_multi_use | Add multi-use support to invitations | Skipped if post-008 |
+| 007 | avatar_support | Add avatar fields to User/Organization | Skipped if post-008 |
+| 008 | foreignkey_refactoring | **BREAKING** Merge User/Organization tables + Add ForeignKeys | Major schema change |
+
+## Migration 008 Schema Refactoring
+
+**Migration 008 is a major schema refactoring that:**
+- Merges the Organization table into User table (adds `is_org` flag)
+- Converts all integer ID references to proper ForeignKey constraints
+- Adds denormalized owner fields for performance
+
+**If you have an existing database:**
+- Migrations 001-007 will automatically skip (changes already included in 008)
+- Migration 008 requires confirmation (or set `KOHAKU_HUB_AUTO_MIGRATE=true` in Docker)
+- **BACKUP YOUR DATABASE BEFORE RUNNING 008**
+
+**For fresh/new databases:**
+- Recreate the database from scratch instead of running migrations:
+  ```bash
+  # Stop services
+  docker-compose down
+
+  # Remove old database data
+  rm -rf hub-meta/postgres-data/*
+
+  # Restart (will auto-create schema)
+  docker-compose up -d
+  ```
+- Fresh databases get the latest schema automatically via `init_db()`
+- All migrations will skip (nothing to migrate)

 ## Creating New Migrations

 1. Create a new file: `scripts/db_migrations/00X_name.py`
 2. Implement these functions:
+   - `MIGRATION_NUMBER` - Constant with migration number (e.g., 9)
+   - `is_applied(db, cfg)` - Check if THIS migration has been applied (for future migrations to detect)
   - `check_migration_needed()` - Returns True if migration should run
   - `migrate_sqlite()` - SQLite migration logic
   - `migrate_postgres()` - PostgreSQL migration logic
-   - `run()` - Main entry point
+   - `run()` - Main entry point that uses `should_skip_due_to_future_migrations()`

 3. Template:
 ```python
@@ -34,13 +71,38 @@ This directory contains database migration scripts for KohakuHub.

 import sys
 import os
+
+# Add src to path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
+# Add db_migrations to path (for _migration_utils)
+sys.path.insert(0, os.path.dirname(__file__))

 from kohakuhub.db import db
 from kohakuhub.config import cfg
+from _migration_utils import should_skip_due_to_future_migrations, check_column_exists, check_table_exists
+
+# IMPORTANT: Do NOT import Peewee models (User, Repository, etc.)
+# Models may be renamed/deleted in future versions, breaking old migrations.
+# Use raw SQL queries instead.
+
+MIGRATION_NUMBER = X  # Replace X with actual number
+
+
+def is_applied(db, cfg):
+    """Check if THIS migration has been applied.
+
+    This function is called by older migrations to detect if this migration
+    has already applied their changes. Choose a unique signature column/table.
+
+    Returns True if this migration is applied, False otherwise.
+    Errors should return True (treat as applied, skip older migrations).
+    """
+    # Example: Check if a signature column/table exists
+    return check_column_exists(db, cfg, "mytable", "mycolumn")
+

 def check_migration_needed():
-    """Check if columns/tables exist."""
+    """Check if this migration needs to run."""
    cursor = db.cursor()
    if cfg.app.db_backend == "postgres":
        cursor.execute("""
@@ -53,22 +115,31 @@ def check_migration_needed():
        columns = [row[1] for row in cursor.fetchall()]
        return 'mycolumn' not in columns

+
 def migrate_sqlite():
    cursor = db.cursor()
    cursor.execute("ALTER TABLE mytable ADD COLUMN mycolumn INTEGER")
    db.commit()

+
 def migrate_postgres():
    cursor = db.cursor()
    cursor.execute("ALTER TABLE mytable ADD COLUMN mycolumn BIGINT")
    db.commit()

+
 def run():
    db.connect(reuse_if_open=True)
    try:
+        # Check if any future migration has been applied (auto-skip if superseded)
+        if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
+            print("Migration 00X: Skipped (superseded by future migration)")
+            return True
+
        if not check_migration_needed():
            print("Migration 00X: Already applied")
            return True
+
        print("Migration 00X: Running...")
        if cfg.app.db_backend == "postgres":
            migrate_postgres()
@@ -78,6 +149,8 @@ def run():
        return True
    except Exception as e:
        print(f"Migration 00X: ✗ Failed - {e}")
+        import traceback
+        traceback.print_exc()
        return False
    finally:
        db.close()
@@ -101,8 +174,138 @@ python scripts/run_migrations.py
 python scripts/db_migrations/001_repository_schema.py
 ```

+## Best Practices
+
+### For Fresh Databases
+**Recommended:** Delete database data and let `init_db()` create the latest schema:
+```bash
+docker-compose down
+rm -rf hub-meta/postgres-data/*
+docker-compose up -d
+```
+
+This is faster and cleaner than running all migrations sequentially.
+
+### For Existing Databases
+1. **Backup first!** Always backup before running migrations
+2. Run migrations via the automatic startup process
+3. Set `KOHAKU_HUB_AUTO_MIGRATE=true` to skip confirmation prompts in Docker
+4. Monitor logs for migration status
+
+### For Development
+```bash
+# Run all pending migrations
+python scripts/run_migrations.py
+
+# Run specific migration
+python scripts/db_migrations/001_repository_schema.py
+```
+
+## Migration System Design
+
+### Self-Healing Future-Migration Detection
+
+Each migration automatically checks if any **future** migration has been applied before running:
+
+**How it works:**
+1. Migration 003 is about to run
+2. Checks migrations 008, 007, 006, 005, 004 (newest to oldest)
+3. If migration 008's `is_applied()` returns True → skip migration 003
+4. If all future migrations return False → run migration 003 normally
+
+**Benefits:**
+- No hardcoding of specific migration numbers
+- Automatically works when you add migration 009, 010, etc.
+- Errors/exceptions in `is_applied()` are treated as "not applied" (safe fallback)
+- Makes migrations resilient to major schema refactorings
+
+**Each migration must implement:**
+```python
+def is_applied(db, cfg):
+    """Check if THIS migration has been applied.
+
+    Choose a unique signature (table or column) that this migration creates.
+    Errors should return True (treat as applied to be safe).
+    """
+    return check_column_exists(db, cfg, "mytable", "my_signature_column")
+```
+
+### Important Guidelines
+
+#### DO NOT Import Peewee Models
+**Never import models like `User`, `Repository`, `Organization`, etc. in migrations!**
+
+```python
+# ❌ BAD - Will break if model is renamed/deleted
+from kohakuhub.db import db, User, Organization
+db.create_tables([User], safe=True)
+
+# ✅ GOOD - Use raw SQL instead
+from kohakuhub.db import db
+cursor = db.cursor()
+cursor.execute("CREATE TABLE IF NOT EXISTS user (...)")
+```
+
+**Why?**
+- Migrations are permanent historical records
+- Models may be renamed, deleted, or refactored in future versions
+- Importing models creates tight coupling that breaks old migrations
+- Example: Migration 002 imported `Organization`, which no longer exists after migration 008
+
+**Use raw SQL for all schema changes:**
+- Table creation: `CREATE TABLE IF NOT EXISTS`
+- Column addition: `ALTER TABLE ... ADD COLUMN`
+- Index creation: `CREATE INDEX IF NOT EXISTS`
+
 ## Notes

 - Migrations are idempotent - safe to re-run
 - Failed migrations will prevent server startup
+- Each migration auto-skips if ANY future migration has been applied
+- Use raw SQL queries instead of importing Peewee models
+- Errors in `is_applied()` are treated as "applied" (safe fallback)
 - Old migration scripts in `scripts/migrate_*.py` are kept for reference
+
+## Utilities (`_migration_utils.py`)
+
+Common helper functions available to all migrations:
+
+```python
+from _migration_utils import (
+    should_skip_due_to_future_migrations,  # Check if future migrations applied
+    check_table_exists,                     # Check if table exists
+    check_column_exists,                    # Check if column exists
+)
+
+# Usage in migration
+if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
+    print("Migration skipped - superseded by future migration")
+    return True
+```
+
+## Troubleshooting
+
+### Error: "column repository_id does not exist"
+
+This error indicates the database is in an inconsistent state (mix of old and new schema).
+
+**Cause:** The database has some tables created with the new schema (post-migration 008), but migrations never ran to update the data properly.
+
+**Solution 1 (Recommended):** Drop database and start fresh:
+```bash
+docker-compose down
+rm -rf hub-meta/postgres-data/*
+docker-compose up -d
+```
+
+**Solution 2:** Manually fix the inconsistency:
+1. Connect to database: `docker exec -it postgres psql -U hub -d hubdb`
+2. Check schema: `\d file` and `\d repository`
+3. If File table has `repository_id` but no data, drop and recreate:
+   ```sql
+   DROP TABLE IF EXISTS file CASCADE;
+   DROP TABLE IF EXISTS repository CASCADE;
+   -- Restart container to recreate tables
+   ```
+
+**Prevention:** Always run migrations before application starts (handled automatically in Docker)
--- a/scripts/db_migrations/_migration_utils.py
+++ b/scripts/db_migrations/_migration_utils.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+"""
+Shared utilities for database migrations.
+
+This module provides common functionality for checking migration status
+without importing any Peewee models (to avoid breaking old migrations).
+"""
+
+import os
+from pathlib import Path
+
+
+def should_skip_due_to_future_migrations(
+    current_migration_number: int, db, cfg
+) -> bool:
+    """Check if any future migration has been applied, indicating this migration should skip.
+
+    Args:
+        current_migration_number: The number of the current migration (e.g., 3 for migration 003)
+        db: Database connection object
+        cfg: Config object with db_backend property
+
+    Returns:
+        True if any future migration is applied (skip current migration)
+        False if no future migrations found (run current migration)
+
+    How it works:
+    1. Discovers all migration files with higher numbers
+    2. Checks from newest to oldest if migration is applied
+    3. If ANY future migration is applied, current migration should skip
+    4. Errors/exceptions are treated as "not applied"
+    """
+    migrations_dir = Path(__file__).parent
+
+    # Find all migration files with numbers greater than current
+    future_migrations = []
+    for file_path in migrations_dir.glob("*.py"):
+        if file_path.name.startswith("_"):
+            continue  # Skip utility files
+
+        # Extract migration number (e.g., "003" from "003_commit_tracking.py")
+        try:
+            number_str = file_path.stem.split("_")[0]
+            number = int(number_str)
+            if number > current_migration_number:
+                future_migrations.append((number, file_path))
+        except (ValueError, IndexError):
+            continue  # Skip files without valid number prefix
+
+    if not future_migrations:
+        # No future migrations found, don't skip
+        return False
+
+    # Sort from newest to oldest
+    future_migrations.sort(reverse=True)
+
+    # Check each future migration from newest to oldest
+    for number, file_path in future_migrations:
+        try:
+            # Dynamically import the migration module
+            import importlib.util
+            import sys
+
+            # Create a unique module name to avoid conflicts
+            module_name = f"_temp_migration_{number:03d}"
+
+            # Remove module if already loaded (cleanup from previous attempts)
+            if module_name in sys.modules:
+                del sys.modules[module_name]
+
+            spec = importlib.util.spec_from_file_location(module_name, file_path)
+            module = importlib.util.module_from_spec(spec)
+
+            # Load module in isolated environment
+            sys.modules[module_name] = module
+            spec.loader.exec_module(module)
+
+            # Check if migration has is_applied() function
+            if hasattr(module, "is_applied"):
+                try:
+                    is_applied = module.is_applied(db, cfg)
+                    if is_applied:
+                        # Future migration is applied, current migration should skip
+                        # Clean up module
+                        if module_name in sys.modules:
+                            del sys.modules[module_name]
+                        return True
+                except Exception as e:
+                    # Error checking = treat as not applied, continue checking
+                    # This is expected when checking migrations on old schema
+                    pass
+
+            # Clean up module
+            if module_name in sys.modules:
+                del sys.modules[module_name]
+
+        except Exception as e:
+            # Error loading module = treat as not applied, continue checking
+            pass
+
+    # No future migrations are applied, don't skip
+    return False
+
+
+def check_table_exists(db, table_name: str) -> bool:
+    """Check if a table exists in the database.
+
+    Args:
+        db: Database connection object
+        table_name: Name of the table to check
+
+    Returns:
+        True if table exists, False otherwise
+    """
+    try:
+        return db.table_exists(table_name)
+    except Exception:
+        return False
+
+
+def check_column_exists(db, cfg, table_name: str, column_name: str) -> bool:
+    """Check if a column exists in a table.
+
+    Args:
+        db: Database connection object
+        cfg: Config object with db_backend property
+        table_name: Name of the table
+        column_name: Name of the column to check
+
+    Returns:
+        True if column exists, False otherwise
+    """
+    try:
+        cursor = db.cursor()
+        if cfg.app.db_backend == "postgres":
+            cursor.execute(
+                """
+                SELECT column_name
+                FROM information_schema.columns
+                WHERE table_name=%s AND column_name=%s
+            """,
+                (table_name, column_name),
+            )
+            return cursor.fetchone() is not None
+        else:
+            # SQLite
+            cursor.execute(f"PRAGMA table_info({table_name})")
+            columns = [row[1] for row in cursor.fetchall()]
+            return column_name in columns
+    except Exception:
+        return False
--- a/scripts/generate_docker_compose.py
+++ b/scripts/generate_docker_compose.py
@@ -15,7 +15,15 @@ from pathlib import Path


 def generate_secret(length: int = 32) -> str:
-    """Generate a random secret key."""
+    """Generate a random URL-safe secret key.
+
+    Args:
+        length: Number of random bytes (result will be ~1.33x longer due to base64 encoding)
+                Common values: 32 (→43 chars), 48 (→64 chars)
+
+    Returns:
+        URL-safe base64 encoded string
+    """
    return secrets.token_urlsafe(length)


@@ -275,6 +283,7 @@ def generate_hub_api_service(config: dict) -> str:
      - KOHAKU_HUB_LFS_THRESHOLD_BYTES=1000000
      - KOHAKU_HUB_LFS_KEEP_VERSIONS=5
      - KOHAKU_HUB_LFS_AUTO_GC=true
+      - KOHAKU_HUB_AUTO_MIGRATE=true # Auto-confirm database migrations (required for Docker)

      ## ===== Auth & SMTP Configuration =====
      - KOHAKU_HUB_REQUIRE_EMAIL_VERIFICATION=false
@@ -388,35 +397,43 @@ def load_config_file(config_path: Path) -> dict:
        config["lakefs_use_postgres"] = lakefs.getboolean("use_postgres", fallback=True)
        config["lakefs_db"] = lakefs.get("database", fallback="lakefs")
        config["lakefs_encrypt_key"] = lakefs.get(
-            "encrypt_key", fallback=generate_secret()
+            "encrypt_key", fallback=generate_secret(32)  # 43 chars
        )
    else:
        config["lakefs_use_postgres"] = True
        config["lakefs_db"] = "lakefs"
-        config["lakefs_encrypt_key"] = generate_secret()
+        config["lakefs_encrypt_key"] = generate_secret(32)  # 43 chars

    # S3 section
    if parser.has_section("s3"):
        s3 = parser["s3"]
        config["s3_builtin"] = s3.getboolean("builtin", fallback=True)
        config["s3_endpoint"] = s3.get("endpoint", fallback="http://minio:9000")
-        config["s3_access_key"] = s3.get("access_key", fallback="minioadmin")
-        config["s3_secret_key"] = s3.get("secret_key", fallback="minioadmin")
+        config["s3_access_key"] = s3.get(
+            "access_key", fallback=generate_secret(24)
+        )  # 32 chars
+        config["s3_secret_key"] = s3.get(
+            "secret_key", fallback=generate_secret(48)
+        )  # 64 chars
        config["s3_region"] = s3.get("region", fallback="")
    else:
        config["s3_builtin"] = True
        config["s3_endpoint"] = "http://minio:9000"
-        config["s3_access_key"] = "minioadmin"
-        config["s3_secret_key"] = "minioadmin"
+        config["s3_access_key"] = generate_secret(24)  # 32 chars
+        config["s3_secret_key"] = generate_secret(48)  # 64 chars

    # Security section
    if parser.has_section("security"):
        sec = parser["security"]
-        config["session_secret"] = sec.get("session_secret", fallback=generate_secret())
-        config["admin_secret"] = sec.get("admin_secret", fallback=generate_secret())
+        config["session_secret"] = sec.get(
+            "session_secret", fallback=generate_secret(48)
+        )  # 64 chars
+        config["admin_secret"] = sec.get(
+            "admin_secret", fallback=generate_secret(48)
+        )  # 64 chars
    else:
-        config["session_secret"] = generate_secret()
-        config["admin_secret"] = generate_secret()
+        config["session_secret"] = generate_secret(48)  # 64 chars
+        config["admin_secret"] = generate_secret(48)  # 64 chars

    # Network section
    if parser.has_section("network"):
@@ -467,9 +484,10 @@ builtin = true
 # secret_key = your-secret-key
 # region = us-east-1

-# If builtin = true, you can customize MinIO credentials:
-access_key = minioadmin
-secret_key = minioadmin
+# If builtin = true, MinIO credentials are auto-generated (recommended)
+# You can override by uncommenting and setting custom values:
+# access_key = your-custom-access-key
+# secret_key = your-custom-secret-key

 [security]
 # Session and admin secrets (auto-generated if not specified)
@@ -600,8 +618,21 @@ def interactive_config() -> dict:
    config["s3_builtin"] = ask_yes_no("Use built-in MinIO container?", default=True)

    if config["s3_builtin"]:
-        config["s3_access_key"] = ask_string("MinIO access key", default="minioadmin")
-        config["s3_secret_key"] = ask_string("MinIO secret key", default="minioadmin")
+        # Generate secure random credentials for MinIO
+        default_access_key = generate_secret(24)  # 32 chars
+        default_secret_key = generate_secret(48)  # 64 chars
+
+        print(f"Generated MinIO access key: {default_access_key}")
+        print(f"Generated MinIO secret key: {default_secret_key}")
+        use_generated = ask_yes_no("Use generated MinIO credentials?", default=True)
+
+        if use_generated:
+            config["s3_access_key"] = default_access_key
+            config["s3_secret_key"] = default_secret_key
+        else:
+            config["s3_access_key"] = ask_string("MinIO access key")
+            config["s3_secret_key"] = ask_string("MinIO secret key")
+
        config["s3_endpoint"] = "http://minio:9000"
    else:
        config["s3_endpoint"] = ask_string("S3 endpoint URL")
@@ -613,7 +644,7 @@ def interactive_config() -> dict:

    # Security Configuration
    print("--- Security Configuration ---")
-    default_session_secret = generate_secret()
+    default_session_secret = generate_secret(48)  # 64 chars for session encryption
    print(f"Generated session secret: {default_session_secret}")
    use_generated = ask_yes_no("Use generated session secret?", default=True)

@@ -628,7 +659,7 @@ def interactive_config() -> dict:
    if same_as_session:
        config["admin_secret"] = config["session_secret"]
    else:
-        default_admin_secret = generate_secret()
+        default_admin_secret = generate_secret(48)  # 64 chars for admin token
        print(f"Generated admin secret: {default_admin_secret}")
        use_generated_admin = ask_yes_no("Use generated admin secret?", default=True)

@@ -638,7 +669,7 @@ def interactive_config() -> dict:
            config["admin_secret"] = ask_string("Admin secret token")

    # LakeFS encryption key
-    config["lakefs_encrypt_key"] = generate_secret()
+    config["lakefs_encrypt_key"] = generate_secret(32)  # 43 chars

    # Network configuration
    print()
--- a/scripts/run_migrations.py
+++ b/scripts/run_migrations.py
@@ -78,15 +78,13 @@ def run_migrations():
    print(f"Database URL: {cfg.app.database_url}")
    print()

-    # Initialize database (create tables if they don't exist)
-    print("Initializing database...")
-    init_db()
-    print("✓ Database initialized\n")
-
    # Discover migrations
    migrations = discover_migrations()
    if not migrations:
        print("No migrations found in db_migrations/")
+        print("\nInitializing database (creating tables)...")
+        init_db()
+        print("✓ Database initialized\n")
        return True

    print(f"Found {len(migrations)} migration(s):\n")
@@ -122,6 +120,19 @@ def run_migrations():

        print()

+    # Initialize database AFTER migrations (create tables/indexes if needed)
+    if all_success:
+        print("\nFinalizing database schema (ensuring all tables/indexes exist)...")
+        try:
+            init_db()
+            print("✓ Database schema finalized\n")
+        except Exception as e:
+            print(f"✗ Failed to finalize database schema: {e}")
+            import traceback
+
+            traceback.print_exc()
+            all_success = False
+
    # Summary
    print("=" * 70)
    if all_success: