update migration method

This commit is contained in:
Kohaku-Blueleaf
2025-10-16 16:59:34 +08:00
parent c40fbe6f10
commit c48997824f
12 changed files with 1506 additions and 43 deletions

View File

@@ -9,11 +9,49 @@ For example: user/myrepo can exist as both a model and dataset.
import sys
import os
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
# Add db_migrations to path (for _migration_utils)
sys.path.insert(0, os.path.dirname(__file__))
from kohakuhub.db import db
from kohakuhub.config import cfg
# Import migration utilities
from _migration_utils import should_skip_due_to_future_migrations
# Migration number for this script
MIGRATION_NUMBER = 1
def is_applied(db, cfg):
"""Check if THIS migration has been applied.
Returns True if the unique constraint has been removed from Repository.full_id.
"""
try:
cursor = db.cursor()
if cfg.app.db_backend == "postgres":
cursor.execute(
"""
SELECT indexname, indexdef
FROM pg_indexes
WHERE tablename = 'repository' AND indexname = 'repository_full_id'
"""
)
result = cursor.fetchone()
if result and len(result) > 1:
# If UNIQUE is in the index definition, migration NOT applied
return "UNIQUE" not in result[1].upper()
# Index doesn't exist or can't check = assume applied
return True
else:
# SQLite: Hard to detect constraint removal, assume applied if table exists
return db.table_exists("repository")
except Exception:
# Error = treat as applied (skip migration)
return True
def check_migration_needed():
"""Check if unique constraint exists on full_id."""
@@ -44,6 +82,11 @@ def run():
db.connect(reuse_if_open=True)
try:
# Check if any future migration has been applied
if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
print("Migration 001: Skipped (superseded by future migration)")
return True
if not check_migration_needed():
print("Migration 001: Already applied (constraint removed)")
return True

View File

@@ -12,9 +12,22 @@ import os
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
# Add db_migrations to path (for _migration_utils)
sys.path.insert(0, os.path.dirname(__file__))
from kohakuhub.db import db, User, Organization
from kohakuhub.db import db
from kohakuhub.config import cfg
from _migration_utils import should_skip_due_to_future_migrations, check_column_exists
MIGRATION_NUMBER = 2
def is_applied(db, cfg):
"""Check if THIS migration has been applied.
Returns True if User.private_quota_bytes column exists.
"""
return check_column_exists(db, cfg, "user", "private_quota_bytes")
def check_migration_needed():
@@ -171,6 +184,11 @@ def run():
db.connect(reuse_if_open=True)
try:
# Check if any future migration has been applied
if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
print("Migration 002: Skipped (superseded by future migration)")
return True
if not check_migration_needed():
print("Migration 002: Already applied (columns exist)")
return True

View File

@@ -7,8 +7,22 @@ import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
# Add db_migrations to path (for _migration_utils)
sys.path.insert(0, os.path.dirname(__file__))
from kohakuhub.db import db, Commit
from kohakuhub.db import db
from kohakuhub.config import cfg
from _migration_utils import should_skip_due_to_future_migrations, check_table_exists
MIGRATION_NUMBER = 3
def is_applied(db, cfg):
"""Check if THIS migration has been applied.
Returns True if Commit table exists.
"""
return check_table_exists(db, "commit")
def check_migration_needed():
@@ -21,12 +35,66 @@ def run():
db.connect(reuse_if_open=True)
try:
# Check if any future migration has been applied
if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
print("Migration 003: Skipped (superseded by future migration)")
return True
if not check_migration_needed():
print("Migration 003: Already applied (Commit table exists)")
return True
print("Migration 003: Creating Commit table...")
db.create_tables([Commit], safe=True)
cursor = db.cursor()
if cfg.app.db_backend == "postgres":
# PostgreSQL: Create Commit table
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS commit (
id SERIAL PRIMARY KEY,
commit_id VARCHAR(255) NOT NULL,
repo_full_id VARCHAR(255) NOT NULL,
author_id INTEGER NOT NULL,
message TEXT,
created_at TIMESTAMP NOT NULL
)
"""
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS commit_commit_id ON commit(commit_id)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS commit_repo_full_id ON commit(repo_full_id)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS commit_author_id ON commit(author_id)"
)
else:
# SQLite: Create Commit table
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS commit (
id INTEGER PRIMARY KEY AUTOINCREMENT,
commit_id VARCHAR(255) NOT NULL,
repo_full_id VARCHAR(255) NOT NULL,
author_id INTEGER NOT NULL,
message TEXT,
created_at DATETIME NOT NULL
)
"""
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS commit_commit_id ON commit(commit_id)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS commit_repo_full_id ON commit(repo_full_id)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS commit_author_id ON commit(author_id)"
)
db.commit()
print("Migration 003: ✓ Completed")
return True

View File

@@ -11,9 +11,22 @@ import os
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
# Add db_migrations to path (for _migration_utils)
sys.path.insert(0, os.path.dirname(__file__))
from kohakuhub.db import db, Repository
from kohakuhub.db import db
from kohakuhub.config import cfg
from _migration_utils import should_skip_due_to_future_migrations, check_column_exists
MIGRATION_NUMBER = 4
def is_applied(db, cfg):
"""Check if THIS migration has been applied.
Returns True if Repository.quota_bytes column exists.
"""
return check_column_exists(db, cfg, "repository", "quota_bytes")
def check_migration_needed():
@@ -92,6 +105,11 @@ def run():
db.connect(reuse_if_open=True)
try:
# Check if any future migration has been applied
if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
print("Migration 004: Skipped (superseded by future migration)")
return True
if not check_migration_needed():
print("Migration 004: Already applied (columns exist)")
return True

View File

@@ -19,9 +19,22 @@ if sys.platform == "win32":
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
# Add db_migrations to path (for _migration_utils)
sys.path.insert(0, os.path.dirname(__file__))
from kohakuhub.db import db
from kohakuhub.config import cfg
from _migration_utils import should_skip_due_to_future_migrations, check_column_exists
MIGRATION_NUMBER = 5
def is_applied(db, cfg):
"""Check if THIS migration has been applied.
Returns True if User.full_name column exists.
"""
return check_column_exists(db, cfg, "user", "full_name")
def check_migration_needed():
@@ -234,6 +247,11 @@ def run():
db.connect(reuse_if_open=True)
try:
# Check if any future migration has been applied
if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
print("Migration 005: Skipped (superseded by future migration)")
return True
if not check_migration_needed():
print("Migration 005: Already applied (columns exist)")
return True

View File

@@ -18,9 +18,30 @@ if sys.platform == "win32":
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
# Add db_migrations to path (for _migration_utils)
sys.path.insert(0, os.path.dirname(__file__))
from kohakuhub.db import db
from kohakuhub.config import cfg
from _migration_utils import (
should_skip_due_to_future_migrations,
check_column_exists,
check_table_exists,
)
MIGRATION_NUMBER = 6
def is_applied(db, cfg):
"""Check if THIS migration has been applied.
Returns True if Invitation.max_usage column exists.
"""
# First check if invitation table exists
if not check_table_exists(db, "invitation"):
# Table doesn't exist, migration not applicable
return True
return check_column_exists(db, cfg, "invitation", "max_usage")
def check_migration_needed():
@@ -155,8 +176,32 @@ def run():
db.connect(reuse_if_open=True)
try:
if not check_migration_needed():
print("Migration 006: Already applied (columns exist)")
# Check if any future migration has been applied
if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
print("Migration 006: Skipped (superseded by future migration)")
return True
migration_needed = check_migration_needed()
if not migration_needed:
# Check if table exists to provide better message
cursor = db.cursor()
if cfg.app.db_backend == "postgres":
cursor.execute(
"SELECT table_name FROM information_schema.tables WHERE table_name='invitation'"
)
table_exists = cursor.fetchone() is not None
else:
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='invitation'"
)
table_exists = cursor.fetchone() is not None
if not table_exists:
print(
"Migration 006: Skipped (invitation table doesn't exist yet, will be created by init_db)"
)
else:
print("Migration 006: Already applied (columns exist)")
return True
print("Migration 006: Adding multi-use support to Invitation table...")

View File

@@ -18,17 +18,51 @@ if sys.platform == "win32":
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
# Add db_migrations to path (for _migration_utils)
sys.path.insert(0, os.path.dirname(__file__))
from kohakuhub.db import db
from kohakuhub.config import cfg
from _migration_utils import should_skip_due_to_future_migrations, check_column_exists
MIGRATION_NUMBER = 7
def is_applied(db, cfg):
"""Check if THIS migration has been applied.
Returns True if User.avatar column exists.
"""
return check_column_exists(db, cfg, "user", "avatar")
def check_migration_needed():
"""Check if this migration needs to run by checking if columns exist."""
"""Check if this migration needs to run.
Returns True only if:
- User table exists (schema version > 0)
- AND User.avatar doesn't exist (schema version < 7)
Returns False if:
- User table doesn't exist (fresh install, version 0, will be created by init_db)
- OR User.avatar exists (already at version 7+)
"""
cursor = db.cursor()
if cfg.app.db_backend == "postgres":
# Check if User.avatar exists
# First check if user table exists
cursor.execute(
"""
SELECT table_name
FROM information_schema.tables
WHERE table_name='user'
"""
)
if cursor.fetchone() is None:
# Fresh database, tables will be created by init_db() with final schema
return False
# Table exists, check if avatar column exists
cursor.execute(
"""
SELECT column_name
@@ -38,7 +72,15 @@ def check_migration_needed():
)
return cursor.fetchone() is None
else:
# SQLite: Check via PRAGMA
# SQLite: First check if table exists
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='user'"
)
if cursor.fetchone() is None:
# Fresh database, tables will be created by init_db() with final schema
return False
# Table exists, check via PRAGMA if avatar column exists
cursor.execute("PRAGMA table_info(user)")
columns = [row[1] for row in cursor.fetchall()]
return "avatar" not in columns
@@ -133,8 +175,32 @@ def run():
db.connect(reuse_if_open=True)
try:
if not check_migration_needed():
print("Migration 007: Already applied (columns exist)")
# Check if any future migration has been applied
if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
print("Migration 007: Skipped (superseded by future migration)")
return True
migration_needed = check_migration_needed()
if not migration_needed:
# Check if table exists to provide better message
cursor = db.cursor()
if cfg.app.db_backend == "postgres":
cursor.execute(
"SELECT table_name FROM information_schema.tables WHERE table_name='user'"
)
table_exists = cursor.fetchone() is not None
else:
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='user'"
)
table_exists = cursor.fetchone() is not None
if not table_exists:
print(
"Migration 007: Skipped (user table doesn't exist yet, will be created by init_db)"
)
else:
print("Migration 007: Already applied (columns exist)")
return True
print("Migration 007: Adding avatar support to User and Organization tables...")

View File

@@ -0,0 +1,791 @@
#!/usr/bin/env python3
"""
Migration 008: Major database refactoring - Merge User/Organization + Add ForeignKeys.
BREAKING CHANGE: This is a major schema refactoring.
BACKUP YOUR DATABASE BEFORE RUNNING THIS MIGRATION!
Changes:
1. Merge Organization table into User table (add is_org flag)
2. Convert all integer ID fields to proper ForeignKey constraints
3. Add owner fields to File and Commit for denormalized access
New schema:
- User.is_org: distinguishes users (FALSE) from organizations (TRUE)
- EmailVerification.user: ForeignKey to User
- Session.user: ForeignKey to User
- Token.user: ForeignKey to User
- Repository.owner: ForeignKey to User (can be user or org)
- File.repository: ForeignKey to Repository
- File.owner: ForeignKey to User (denormalized from repository.owner)
- StagingUpload.repository: ForeignKey to Repository
- StagingUpload.uploader: ForeignKey to User
- UserOrganization.user: ForeignKey to User
- UserOrganization.organization: ForeignKey to User (is_org=TRUE)
- Commit.repository: ForeignKey to Repository
- Commit.author: ForeignKey to User (who made commit)
- Commit.owner: ForeignKey to User (repository owner, denormalized)
- LFSObjectHistory.repository: ForeignKey to Repository
- LFSObjectHistory.file: ForeignKey to File (nullable)
- SSHKey.user: ForeignKey to User
- Invitation.created_by: ForeignKey to User
- Invitation.used_by: ForeignKey to User (nullable)
This migration cannot be easily rolled back. Test thoroughly before deploying to production!
"""
import sys
import os
# Fix Windows encoding issues
if sys.platform == "win32":
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
# Add db_migrations to path (for _migration_utils)
sys.path.insert(0, os.path.dirname(__file__))
from kohakuhub.db import db
from kohakuhub.config import cfg
from _migration_utils import should_skip_due_to_future_migrations
MIGRATION_NUMBER = 8
def is_applied(db, cfg):
"""Check if THIS migration has been applied.
Returns True if User.is_org column exists (signature of migration 008).
NOTE: We implement this inline without importing check_column_exists
to avoid any potential issues with circular imports or schema mismatches.
"""
try:
cursor = db.cursor()
if cfg.app.db_backend == "postgres":
cursor.execute(
"""
SELECT column_name
FROM information_schema.columns
WHERE table_name='user' AND column_name='is_org'
""",
)
return cursor.fetchone() is not None
else:
# SQLite
cursor.execute("PRAGMA table_info(user)")
columns = [row[1] for row in cursor.fetchall()]
return "is_org" in columns
except Exception:
# Error = treat as applied (safe fallback to skip this migration)
return True
def check_migration_needed():
"""Check if this migration needs to run.
Returns True only if:
- User table exists (schema version > 0)
- AND User.is_org doesn't exist (schema version < 8)
Returns False if:
- User table doesn't exist (fresh install, version 0, will be created by init_db)
- OR User.is_org exists (already at version 8+)
"""
cursor = db.cursor()
if cfg.app.db_backend == "postgres":
# First check if user table exists
cursor.execute(
"""
SELECT table_name
FROM information_schema.tables
WHERE table_name='user'
"""
)
if cursor.fetchone() is None:
# Fresh database, tables will be created by init_db() with final schema
return False
# Table exists, check if is_org column exists
cursor.execute(
"""
SELECT column_name
FROM information_schema.columns
WHERE table_name='user' AND column_name='is_org'
"""
)
return cursor.fetchone() is None
else:
# SQLite: First check if table exists
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='user'"
)
if cursor.fetchone() is None:
# Fresh database, tables will be created by init_db() with final schema
return False
# Table exists, check via PRAGMA if is_org column exists
cursor.execute("PRAGMA table_info(user)")
columns = [row[1] for row in cursor.fetchall()]
return "is_org" not in columns
def migrate_sqlite():
"""Migrate SQLite database.
Strategy:
1. Add new columns to User table (is_org, description, make email/password nullable)
2. Migrate Organization data into User table
3. Create temporary mapping table for old org IDs
4. Update all FK references
5. Drop Organization table
6. Rebuild tables with proper ForeignKey constraints
"""
cursor = db.cursor()
print("\n=== Phase 1: Backup Warning ===")
print("⚠️ This migration modifies the database schema significantly.")
print("⚠️ BACKUP YOUR DATABASE before proceeding!")
print("")
# Allow auto-confirmation via environment variable (for Docker/CI)
auto_confirm = os.environ.get("KOHAKU_HUB_AUTO_MIGRATE", "").lower() in (
"true",
"1",
"yes",
)
if auto_confirm:
print(" Auto-confirmation enabled (KOHAKU_HUB_AUTO_MIGRATE=true)")
response = "yes"
else:
response = input("Type 'yes' to continue: ")
if response.lower() != "yes":
print("Migration cancelled.")
return False
print("\n=== Phase 2: Add new columns to User table ===")
# Add is_org column
try:
cursor.execute("ALTER TABLE user ADD COLUMN is_org BOOLEAN DEFAULT FALSE")
print(" ✓ Added User.is_org")
except Exception as e:
if "duplicate column" in str(e).lower():
print(" - User.is_org already exists")
else:
raise
# Add description column (for orgs)
try:
cursor.execute("ALTER TABLE user ADD COLUMN description TEXT DEFAULT NULL")
print(" ✓ Added User.description")
except Exception as e:
if "duplicate column" in str(e).lower():
print(" - User.description already exists")
else:
raise
# Add normalized_name column (for O(1) conflict checking)
try:
cursor.execute("ALTER TABLE user ADD COLUMN normalized_name TEXT")
print(" ✓ Added User.normalized_name")
except Exception as e:
if "duplicate column" in str(e).lower():
print(" - User.normalized_name already exists")
else:
raise
# Note: SQLite doesn't support ALTER COLUMN to make existing columns nullable
# This will require table recreation, which we'll handle in a full rebuild
db.commit()
# Populate normalized_name for existing users
print(" Populating User.normalized_name for existing users...")
cursor.execute("SELECT id, username FROM user")
users = cursor.fetchall()
for user_id, username in users:
# Normalize: lowercase, remove hyphens and underscores
normalized = username.lower().replace("-", "").replace("_", "")
cursor.execute(
"UPDATE user SET normalized_name = ? WHERE id = ?", (normalized, user_id)
)
db.commit()
print(f" ✓ Populated normalized_name for {len(users)} existing users")
print("\n=== Phase 3: Migrate Organization data into User table ===")
# Check if organization table exists
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='organization'"
)
if cursor.fetchone():
# Get all organizations
cursor.execute(
"SELECT id, name, description, private_quota_bytes, public_quota_bytes, "
"private_used_bytes, public_used_bytes, bio, website, social_media, "
"avatar, avatar_updated_at, created_at FROM organization"
)
orgs = cursor.fetchall()
print(f" Found {len(orgs)} organization(s) to migrate")
# Create mapping table for old org IDs -> new user IDs
cursor.execute(
"CREATE TABLE IF NOT EXISTS _org_id_mapping (old_org_id INTEGER, new_user_id INTEGER)"
)
for org in orgs:
(
org_id,
name,
description,
private_quota_bytes,
public_quota_bytes,
private_used_bytes,
public_used_bytes,
bio,
website,
social_media,
avatar,
avatar_updated_at,
created_at,
) = org
# Normalize name for conflict checking
normalized = name.lower().replace("-", "").replace("_", "")
# Insert organization as user with is_org=TRUE
# email and password_hash will be NULL for organizations
cursor.execute(
"""
INSERT INTO user (username, normalized_name, is_org, email, password_hash, email_verified, is_active,
private_quota_bytes, public_quota_bytes, private_used_bytes, public_used_bytes,
full_name, bio, description, website, social_media,
avatar, avatar_updated_at, created_at)
VALUES (?, ?, TRUE, NULL, NULL, FALSE, TRUE, ?, ?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
""",
(
name,
normalized,
private_quota_bytes,
public_quota_bytes,
private_used_bytes,
public_used_bytes,
bio,
description,
website,
social_media,
avatar,
avatar_updated_at,
created_at,
),
)
new_user_id = cursor.lastrowid
# Store mapping
cursor.execute(
"INSERT INTO _org_id_mapping (old_org_id, new_user_id) VALUES (?, ?)",
(org_id, new_user_id),
)
print(f" ✓ Migrated organization '{name}' (id {org_id} -> {new_user_id})")
db.commit()
print(f" ✓ All {len(orgs)} organizations migrated to User table")
else:
print(" - No organization table found, skipping")
print("\n=== Phase 4: Update Foreign Key references ===")
# 4a. Update UserOrganization.organization to reference new User IDs
cursor.execute("SELECT id, organization FROM userorganization")
memberships = cursor.fetchall()
for membership_id, old_org_id in memberships:
cursor.execute(
"SELECT new_user_id FROM _org_id_mapping WHERE old_org_id = ?",
(old_org_id,),
)
result = cursor.fetchone()
if result:
new_user_id = result[0]
cursor.execute(
"UPDATE userorganization SET organization = ? WHERE id = ?",
(new_user_id, membership_id),
)
db.commit()
print(f" ✓ Updated {len(memberships)} UserOrganization records")
# 4b. Add owner column to File table (denormalized from repository.owner)
print(" Adding File.owner_id column...")
try:
cursor.execute("ALTER TABLE file ADD COLUMN owner_id INTEGER")
print(" ✓ Added File.owner_id column")
except Exception as e:
if "duplicate column" not in str(e).lower():
raise
print(" - File.owner_id already exists")
# Update File.owner_id from Repository.owner_id
cursor.execute(
"""
UPDATE file SET owner_id = (
SELECT owner_id FROM repository
WHERE repository.full_id = file.repo_full_id
LIMIT 1
)
"""
)
print(f" ✓ Updated File.owner_id for all files")
db.commit()
# 4c. Add owner column to Commit table (repository owner)
print(" Adding Commit.owner_id column...")
try:
cursor.execute("ALTER TABLE commit ADD COLUMN owner_id INTEGER")
print(" ✓ Added Commit.owner_id column")
except Exception as e:
if "duplicate column" not in str(e).lower():
raise
print(" - Commit.owner_id already exists")
# Update Commit.owner_id from Repository.owner_id
cursor.execute(
"""
UPDATE commit SET owner_id = (
SELECT owner_id FROM repository
WHERE repository.full_id = commit.repo_full_id
LIMIT 1
)
"""
)
print(f" ✓ Updated Commit.owner_id for all commits")
db.commit()
# 4d. Add uploader column to StagingUpload table
print(" Adding StagingUpload.uploader_id column...")
try:
cursor.execute(
"ALTER TABLE stagingupload ADD COLUMN uploader_id INTEGER DEFAULT NULL"
)
print(" ✓ Added StagingUpload.uploader_id column")
except Exception as e:
if "duplicate column" not in str(e).lower():
raise
print(" - StagingUpload.uploader_id already exists")
db.commit()
# 4e. Add file FK column to LFSObjectHistory table
print(" Adding LFSObjectHistory.file_id column...")
try:
cursor.execute(
"ALTER TABLE lfsobjecthistory ADD COLUMN file_id INTEGER DEFAULT NULL"
)
print(" ✓ Added LFSObjectHistory.file_id column")
except Exception as e:
if "duplicate column" not in str(e).lower():
raise
print(" - LFSObjectHistory.file_id already exists")
# Update LFSObjectHistory.file_id from File table
cursor.execute(
"""
UPDATE lfsobjecthistory SET file_id = (
SELECT id FROM file
WHERE file.repo_full_id = lfsobjecthistory.repo_full_id
AND file.path_in_repo = lfsobjecthistory.path_in_repo
LIMIT 1
)
"""
)
print(f" ✓ Updated LFSObjectHistory.file_id for all records")
db.commit()
print("\n=== Phase 5: Cleanup ===")
# Drop temporary mapping table
try:
cursor.execute("DROP TABLE _org_id_mapping")
print(" ✓ Dropped temporary mapping table")
except Exception as e:
print(f" - Failed to drop mapping table (non-fatal): {e}")
# Drop Organization table
try:
cursor.execute("DROP TABLE organization")
print(" ✓ Dropped Organization table")
db.commit()
except Exception as e:
print(f" - Failed to drop organization table: {e}")
# Non-fatal, continue
print("\n⚠️ IMPORTANT: Foreign key constraints require table recreation in SQLite")
print("⚠️ Peewee will handle this automatically on next application startup")
print("⚠️ The application will recreate tables with proper ForeignKey constraints")
return True
def migrate_postgres():
"""Migrate PostgreSQL database."""
cursor = db.cursor()
print("\n=== Phase 1: Backup Warning ===")
print("⚠️ This migration modifies the database schema significantly.")
print("⚠️ BACKUP YOUR DATABASE before proceeding!")
print("")
# Allow auto-confirmation via environment variable (for Docker/CI)
auto_confirm = os.environ.get("KOHAKU_HUB_AUTO_MIGRATE", "").lower() in (
"true",
"1",
"yes",
)
if auto_confirm:
print(" Auto-confirmation enabled (KOHAKU_HUB_AUTO_MIGRATE=true)")
response = "yes"
else:
response = input("Type 'yes' to continue: ")
if response.lower() != "yes":
print("Migration cancelled.")
return False
print("\n=== Phase 2: Add new columns to User table ===")
# Add is_org column
try:
cursor.execute('ALTER TABLE "user" ADD COLUMN is_org BOOLEAN DEFAULT FALSE')
print(" ✓ Added User.is_org")
except Exception as e:
if "already exists" in str(e).lower():
print(" - User.is_org already exists")
db.rollback()
else:
raise
# Add description column
try:
cursor.execute('ALTER TABLE "user" ADD COLUMN description TEXT DEFAULT NULL')
print(" ✓ Added User.description")
except Exception as e:
if "already exists" in str(e).lower():
print(" - User.description already exists")
db.rollback()
else:
raise
# Add normalized_name column (for O(1) conflict checking)
try:
cursor.execute('ALTER TABLE "user" ADD COLUMN normalized_name TEXT')
print(" ✓ Added User.normalized_name")
except Exception as e:
if "already exists" in str(e).lower():
print(" - User.normalized_name already exists")
db.rollback()
else:
raise
# Make email and password_hash nullable
try:
cursor.execute('ALTER TABLE "user" ALTER COLUMN email DROP NOT NULL')
cursor.execute('ALTER TABLE "user" ALTER COLUMN password_hash DROP NOT NULL')
print(" ✓ Made email and password_hash nullable")
except Exception as e:
print(f" - Failed to make columns nullable (may already be nullable): {e}")
db.rollback()
db.commit()
# Populate normalized_name for existing users
print(" Populating User.normalized_name for existing users...")
cursor.execute('SELECT id, username FROM "user"')
users = cursor.fetchall()
for user_id, username in users:
# Normalize: lowercase, remove hyphens and underscores
normalized = username.lower().replace("-", "").replace("_", "")
cursor.execute(
'UPDATE "user" SET normalized_name = %s WHERE id = %s',
(normalized, user_id),
)
db.commit()
print(f" ✓ Populated normalized_name for {len(users)} existing users")
print("\n=== Phase 3: Migrate Organization data into User table ===")
# Check if organization table exists
cursor.execute(
"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'organization')"
)
if cursor.fetchone()[0]:
# Get all organizations
cursor.execute(
"SELECT id, name, description, private_quota_bytes, public_quota_bytes, "
"private_used_bytes, public_used_bytes, bio, website, social_media, "
"avatar, avatar_updated_at, created_at FROM organization"
)
orgs = cursor.fetchall()
print(f" Found {len(orgs)} organization(s) to migrate")
# Create temporary mapping table
cursor.execute(
"CREATE TEMP TABLE _org_id_mapping (old_org_id INTEGER, new_user_id INTEGER)"
)
for org in orgs:
(
org_id,
name,
description,
private_quota_bytes,
public_quota_bytes,
private_used_bytes,
public_used_bytes,
bio,
website,
social_media,
avatar,
avatar_updated_at,
created_at,
) = org
# Normalize name for conflict checking
normalized = name.lower().replace("-", "").replace("_", "")
# Insert organization as user with is_org=TRUE
cursor.execute(
"""
INSERT INTO "user" (username, normalized_name, is_org, email, password_hash, email_verified, is_active,
private_quota_bytes, public_quota_bytes, private_used_bytes, public_used_bytes,
full_name, bio, description, website, social_media,
avatar, avatar_updated_at, created_at)
VALUES (%s, %s, TRUE, NULL, NULL, FALSE, TRUE, %s, %s, %s, %s, NULL, %s, %s, %s, %s, %s, %s, %s)
RETURNING id
""",
(
name,
normalized,
private_quota_bytes,
public_quota_bytes,
private_used_bytes,
public_used_bytes,
bio,
description,
website,
social_media,
avatar,
avatar_updated_at,
created_at,
),
)
new_user_id = cursor.fetchone()[0]
# Store mapping
cursor.execute(
"INSERT INTO _org_id_mapping (old_org_id, new_user_id) VALUES (%s, %s)",
(org_id, new_user_id),
)
print(f" ✓ Migrated organization '{name}' (id {org_id} -> {new_user_id})")
db.commit()
print(f" ✓ All {len(orgs)} organizations migrated to User table")
else:
print(" - No organization table found, skipping")
print("\n=== Phase 4: Update Foreign Key references ===")
# 4a. Update UserOrganization.organization to reference new User IDs
cursor.execute(
"UPDATE userorganization SET organization = m.new_user_id "
"FROM _org_id_mapping m WHERE userorganization.organization = m.old_org_id"
)
affected = cursor.rowcount
db.commit()
print(f" ✓ Updated {affected} UserOrganization records")
# 4b. Add owner column to File table (denormalized from repository.owner)
print(" Adding File.owner_id column...")
try:
cursor.execute("ALTER TABLE file ADD COLUMN owner_id INTEGER")
print(" ✓ Added File.owner_id column")
except Exception as e:
if "already exists" in str(e).lower():
print(" - File.owner_id already exists")
db.rollback()
else:
raise
# Update File.owner_id from Repository.owner_id
cursor.execute(
"""
UPDATE file SET owner_id = repository.owner_id
FROM repository
WHERE repository.full_id = file.repo_full_id
"""
)
print(f" ✓ Updated File.owner_id for all files")
db.commit()
# 4c. Add owner column to Commit table (repository owner)
print(" Adding Commit.owner_id column...")
try:
cursor.execute("ALTER TABLE commit ADD COLUMN owner_id INTEGER")
print(" ✓ Added Commit.owner_id column")
except Exception as e:
if "already exists" in str(e).lower():
print(" - Commit.owner_id already exists")
db.rollback()
else:
raise
# Update Commit.owner_id from Repository.owner_id
cursor.execute(
"""
UPDATE commit SET owner_id = repository.owner_id
FROM repository
WHERE repository.full_id = commit.repo_full_id
"""
)
print(f" ✓ Updated Commit.owner_id for all commits")
db.commit()
# 4d. Add uploader column to StagingUpload table
print(" Adding StagingUpload.uploader_id column...")
try:
cursor.execute(
"ALTER TABLE stagingupload ADD COLUMN uploader_id INTEGER DEFAULT NULL"
)
print(" ✓ Added StagingUpload.uploader_id column")
except Exception as e:
if "already exists" in str(e).lower():
print(" - StagingUpload.uploader_id already exists")
db.rollback()
else:
raise
db.commit()
# 4e. Add file FK column to LFSObjectHistory table
print(" Adding LFSObjectHistory.file_id column...")
try:
cursor.execute(
"ALTER TABLE lfsobjecthistory ADD COLUMN file_id INTEGER DEFAULT NULL"
)
print(" ✓ Added LFSObjectHistory.file_id column")
except Exception as e:
if "already exists" in str(e).lower():
print(" - LFSObjectHistory.file_id already exists")
db.rollback()
else:
raise
# Update LFSObjectHistory.file_id from File table
cursor.execute(
"""
UPDATE lfsobjecthistory SET file_id = file.id
FROM file
WHERE file.repo_full_id = lfsobjecthistory.repo_full_id
AND file.path_in_repo = lfsobjecthistory.path_in_repo
"""
)
print(f" ✓ Updated LFSObjectHistory.file_id for all records")
db.commit()
print("\n=== Phase 5: Drop old Organization table ===")
try:
cursor.execute("DROP TABLE IF EXISTS organization CASCADE")
print(" ✓ Dropped Organization table")
db.commit()
except Exception as e:
print(f" - Failed to drop organization table: {e}")
db.rollback()
print("\n⚠️ IMPORTANT: Table recreation with Foreign Keys")
print("⚠️ Peewee ORM will handle ForeignKey constraint creation on next startup")
print("⚠️ You may need to restart the application for changes to take effect")
return True
def run():
"""Run this migration."""
db.connect(reuse_if_open=True)
try:
# Check if any future migration has been applied (for extensibility)
if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
print("Migration 008: Skipped (superseded by future migration)")
return True
migration_needed = check_migration_needed()
if not migration_needed:
# Check if table exists to provide better message
cursor = db.cursor()
if cfg.app.db_backend == "postgres":
cursor.execute(
"SELECT table_name FROM information_schema.tables WHERE table_name='user'"
)
table_exists = cursor.fetchone() is not None
else:
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='user'"
)
table_exists = cursor.fetchone() is not None
if not table_exists:
print(
"Migration 008: Skipped (user table doesn't exist yet, will be created by init_db)"
)
else:
print("Migration 008: Already applied (User.is_org exists)")
return True
print("=" * 70)
print("Migration 008: Major Database Refactoring")
print("Merging User/Organization tables + Adding ForeignKey constraints")
print("=" * 70)
if cfg.app.db_backend == "postgres":
result = migrate_postgres()
else:
result = migrate_sqlite()
if result:
print("\n" + "=" * 70)
print("Migration 008: ✓ Completed Successfully")
print("=" * 70)
print("\nNext steps:")
print("1. Restart the application to apply ForeignKey constraints")
print("2. Test all functionality thoroughly")
print("3. Monitor logs for any foreign key constraint violations")
return result
except Exception as e:
print(f"\nMigration 008: ✗ Failed - {e}")
import traceback
traceback.print_exc()
return False
finally:
db.close()
if __name__ == "__main__":
success = run()
sys.exit(0 if success else 1)

View File

@@ -8,24 +8,61 @@ This directory contains database migration scripts for KohakuHub.
2. **Sequential execution**: Migrations run in numerical order (001, 002, 003, etc.)
3. **Idempotent**: Safe to run multiple times - already-applied migrations are skipped
4. **Auto-run**: Migrations automatically run on container startup via `docker/startup.py`
5. **Self-healing**: Each migration automatically checks if ANY future migration has been applied
- If migration 005 finds that migration 008 is applied, it skips (changes already included)
- Works automatically for any future migrations (009, 010, etc.)
- No hardcoding - migrations discover and check future migrations dynamically
## Migration Order
| # | Name | Description |
|---|------|-------------|
| 001 | repository_schema | Remove unique constraint from Repository.full_id |
| 002 | user_org_quotas | Add private/public quota fields to User/Organization |
| 003 | commit_tracking | Add Commit table for tracking user commits |
| 004 | repo_quotas | Add quota/used_bytes fields to Repository |
| # | Name | Description | Notes |
|---|------|-------------|-------|
| 001 | repository_schema | Remove unique constraint from Repository.full_id | Skipped if post-008 |
| 002 | user_org_quotas | Add private/public quota fields to User/Organization | Skipped if post-008 |
| 003 | commit_tracking | Add Commit table for tracking user commits | Skipped if post-008 |
| 004 | repo_quotas | Add quota/used_bytes fields to Repository | Skipped if post-008 |
| 005 | profiles_and_invitations | Add profile fields and invitation system | Skipped if post-008 |
| 006 | invitation_multi_use | Add multi-use support to invitations | Skipped if post-008 |
| 007 | avatar_support | Add avatar fields to User/Organization | Skipped if post-008 |
| 008 | foreignkey_refactoring | **BREAKING** Merge User/Organization tables + Add ForeignKeys | Major schema change |
## Migration 008 Schema Refactoring
**Migration 008 is a major schema refactoring that:**
- Merges the Organization table into User table (adds `is_org` flag)
- Converts all integer ID references to proper ForeignKey constraints
- Adds denormalized owner fields for performance
**If you have an existing database:**
- Migrations 001-007 will automatically skip (changes already included in 008)
- Migration 008 requires confirmation (or set `KOHAKU_HUB_AUTO_MIGRATE=true` in Docker)
- **BACKUP YOUR DATABASE BEFORE RUNNING 008**
**For fresh/new databases:**
- Recreate the database from scratch instead of running migrations:
```bash
# Stop services
docker-compose down
# Remove old database data
rm -rf hub-meta/postgres-data/*
# Restart (will auto-create schema)
docker-compose up -d
```
- Fresh databases get the latest schema automatically via `init_db()`
- All migrations will skip (nothing to migrate)
## Creating New Migrations
1. Create a new file: `scripts/db_migrations/00X_name.py`
2. Implement these functions:
- `MIGRATION_NUMBER` - Constant with migration number (e.g., 9)
- `is_applied(db, cfg)` - Check if THIS migration has been applied (for future migrations to detect)
- `check_migration_needed()` - Returns True if migration should run
- `migrate_sqlite()` - SQLite migration logic
- `migrate_postgres()` - PostgreSQL migration logic
- `run()` - Main entry point
- `run()` - Main entry point that uses `should_skip_due_to_future_migrations()`
3. Template:
```python
@@ -34,13 +71,38 @@ This directory contains database migration scripts for KohakuHub.
import sys
import os
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
# Add db_migrations to path (for _migration_utils)
sys.path.insert(0, os.path.dirname(__file__))
from kohakuhub.db import db
from kohakuhub.config import cfg
from _migration_utils import should_skip_due_to_future_migrations, check_column_exists, check_table_exists
# IMPORTANT: Do NOT import Peewee models (User, Repository, etc.)
# Models may be renamed/deleted in future versions, breaking old migrations.
# Use raw SQL queries instead.
MIGRATION_NUMBER = X # Replace X with actual number
def is_applied(db, cfg):
"""Check if THIS migration has been applied.
This function is called by older migrations to detect if this migration
has already applied their changes. Choose a unique signature column/table.
Returns True if this migration is applied, False otherwise.
Errors should return True (treat as applied, skip older migrations).
"""
# Example: Check if a signature column/table exists
return check_column_exists(db, cfg, "mytable", "mycolumn")
def check_migration_needed():
"""Check if columns/tables exist."""
"""Check if this migration needs to run."""
cursor = db.cursor()
if cfg.app.db_backend == "postgres":
cursor.execute("""
@@ -53,22 +115,31 @@ def check_migration_needed():
columns = [row[1] for row in cursor.fetchall()]
return 'mycolumn' not in columns
def migrate_sqlite():
cursor = db.cursor()
cursor.execute("ALTER TABLE mytable ADD COLUMN mycolumn INTEGER")
db.commit()
def migrate_postgres():
cursor = db.cursor()
cursor.execute("ALTER TABLE mytable ADD COLUMN mycolumn BIGINT")
db.commit()
def run():
db.connect(reuse_if_open=True)
try:
# Check if any future migration has been applied (auto-skip if superseded)
if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
print("Migration 00X: Skipped (superseded by future migration)")
return True
if not check_migration_needed():
print("Migration 00X: Already applied")
return True
print("Migration 00X: Running...")
if cfg.app.db_backend == "postgres":
migrate_postgres()
@@ -78,6 +149,8 @@ def run():
return True
except Exception as e:
print(f"Migration 00X: ✗ Failed - {e}")
import traceback
traceback.print_exc()
return False
finally:
db.close()
@@ -101,8 +174,138 @@ python scripts/run_migrations.py
python scripts/db_migrations/001_repository_schema.py
```
## Best Practices
### For Fresh Databases
**Recommended:** Delete database data and let `init_db()` create the latest schema:
```bash
docker-compose down
rm -rf hub-meta/postgres-data/*
docker-compose up -d
```
This is faster and cleaner than running all migrations sequentially.
### For Existing Databases
1. **Backup first!** Always backup before running migrations
2. Run migrations via the automatic startup process
3. Set `KOHAKU_HUB_AUTO_MIGRATE=true` to skip confirmation prompts in Docker
4. Monitor logs for migration status
### For Development
```bash
# Run all pending migrations
python scripts/run_migrations.py
# Run specific migration
python scripts/db_migrations/001_repository_schema.py
```
## Migration System Design
### Self-Healing Future-Migration Detection
Each migration automatically checks if any **future** migration has been applied before running:
**How it works:**
1. Migration 003 is about to run
2. Checks migrations 008, 007, 006, 005, 004 (newest to oldest)
3. If migration 008's `is_applied()` returns True → skip migration 003
4. If all future migrations return False → run migration 003 normally
**Benefits:**
- No hardcoding of specific migration numbers
- Automatically works when you add migration 009, 010, etc.
- Errors/exceptions in `is_applied()` are treated as "not applied" (safe fallback)
- Makes migrations resilient to major schema refactorings
**Each migration must implement:**
```python
def is_applied(db, cfg):
"""Check if THIS migration has been applied.
Choose a unique signature (table or column) that this migration creates.
Errors should return True (treat as applied to be safe).
"""
return check_column_exists(db, cfg, "mytable", "my_signature_column")
```
### Important Guidelines
#### DO NOT Import Peewee Models
**Never import models like `User`, `Repository`, `Organization`, etc. in migrations!**
```python
# ❌ BAD - Will break if model is renamed/deleted
from kohakuhub.db import db, User, Organization
db.create_tables([User], safe=True)
# ✅ GOOD - Use raw SQL instead
from kohakuhub.db import db
cursor = db.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS user (...)")
```
**Why?**
- Migrations are permanent historical records
- Models may be renamed, deleted, or refactored in future versions
- Importing models creates tight coupling that breaks old migrations
- Example: Migration 002 imported `Organization`, which no longer exists after migration 008
**Use raw SQL for all schema changes:**
- Table creation: `CREATE TABLE IF NOT EXISTS`
- Column addition: `ALTER TABLE ... ADD COLUMN`
- Index creation: `CREATE INDEX IF NOT EXISTS`
## Notes
- Migrations are idempotent - safe to re-run
- Failed migrations will prevent server startup
- Each migration auto-skips if ANY future migration has been applied
- Use raw SQL queries instead of importing Peewee models
- Errors in `is_applied()` are treated as "applied" (safe fallback)
- Old migration scripts in `scripts/migrate_*.py` are kept for reference
## Utilities (`_migration_utils.py`)
Common helper functions available to all migrations:
```python
from _migration_utils import (
should_skip_due_to_future_migrations, # Check if future migrations applied
check_table_exists, # Check if table exists
check_column_exists, # Check if column exists
)
# Usage in migration
if should_skip_due_to_future_migrations(MIGRATION_NUMBER, db, cfg):
print("Migration skipped - superseded by future migration")
return True
```
## Troubleshooting
### Error: "column repository_id does not exist"
This error indicates the database is in an inconsistent state (mix of old and new schema).
**Cause:** The database has some tables created with the new schema (post-migration 008), but migrations never ran to update the data properly.
**Solution 1 (Recommended):** Drop database and start fresh:
```bash
docker-compose down
rm -rf hub-meta/postgres-data/*
docker-compose up -d
```
**Solution 2:** Manually fix the inconsistency:
1. Connect to database: `docker exec -it postgres psql -U hub -d hubdb`
2. Check schema: `\d file` and `\d repository`
3. If File table has `repository_id` but no data, drop and recreate:
```sql
DROP TABLE IF EXISTS file CASCADE;
DROP TABLE IF EXISTS repository CASCADE;
-- Restart container to recreate tables
```
**Prevention:** Always run migrations before application starts (handled automatically in Docker)

View File

@@ -0,0 +1,151 @@
#!/usr/bin/env python3
"""
Shared utilities for database migrations.
This module provides common functionality for checking migration status
without importing any Peewee models (to avoid breaking old migrations).
"""
import os
from pathlib import Path
def should_skip_due_to_future_migrations(
current_migration_number: int, db, cfg
) -> bool:
"""Check if any future migration has been applied, indicating this migration should skip.
Args:
current_migration_number: The number of the current migration (e.g., 3 for migration 003)
db: Database connection object
cfg: Config object with db_backend property
Returns:
True if any future migration is applied (skip current migration)
False if no future migrations found (run current migration)
How it works:
1. Discovers all migration files with higher numbers
2. Checks from newest to oldest if migration is applied
3. If ANY future migration is applied, current migration should skip
4. Errors/exceptions are treated as "not applied"
"""
migrations_dir = Path(__file__).parent
# Find all migration files with numbers greater than current
future_migrations = []
for file_path in migrations_dir.glob("*.py"):
if file_path.name.startswith("_"):
continue # Skip utility files
# Extract migration number (e.g., "003" from "003_commit_tracking.py")
try:
number_str = file_path.stem.split("_")[0]
number = int(number_str)
if number > current_migration_number:
future_migrations.append((number, file_path))
except (ValueError, IndexError):
continue # Skip files without valid number prefix
if not future_migrations:
# No future migrations found, don't skip
return False
# Sort from newest to oldest
future_migrations.sort(reverse=True)
# Check each future migration from newest to oldest
for number, file_path in future_migrations:
try:
# Dynamically import the migration module
import importlib.util
import sys
# Create a unique module name to avoid conflicts
module_name = f"_temp_migration_{number:03d}"
# Remove module if already loaded (cleanup from previous attempts)
if module_name in sys.modules:
del sys.modules[module_name]
spec = importlib.util.spec_from_file_location(module_name, file_path)
module = importlib.util.module_from_spec(spec)
# Load module in isolated environment
sys.modules[module_name] = module
spec.loader.exec_module(module)
# Check if migration has is_applied() function
if hasattr(module, "is_applied"):
try:
is_applied = module.is_applied(db, cfg)
if is_applied:
# Future migration is applied, current migration should skip
# Clean up module
if module_name in sys.modules:
del sys.modules[module_name]
return True
except Exception as e:
# Error checking = treat as not applied, continue checking
# This is expected when checking migrations on old schema
pass
# Clean up module
if module_name in sys.modules:
del sys.modules[module_name]
except Exception as e:
# Error loading module = treat as not applied, continue checking
pass
# No future migrations are applied, don't skip
return False
def check_table_exists(db, table_name: str) -> bool:
"""Check if a table exists in the database.
Args:
db: Database connection object
table_name: Name of the table to check
Returns:
True if table exists, False otherwise
"""
try:
return db.table_exists(table_name)
except Exception:
return False
def check_column_exists(db, cfg, table_name: str, column_name: str) -> bool:
"""Check if a column exists in a table.
Args:
db: Database connection object
cfg: Config object with db_backend property
table_name: Name of the table
column_name: Name of the column to check
Returns:
True if column exists, False otherwise
"""
try:
cursor = db.cursor()
if cfg.app.db_backend == "postgres":
cursor.execute(
"""
SELECT column_name
FROM information_schema.columns
WHERE table_name=%s AND column_name=%s
""",
(table_name, column_name),
)
return cursor.fetchone() is not None
else:
# SQLite
cursor.execute(f"PRAGMA table_info({table_name})")
columns = [row[1] for row in cursor.fetchall()]
return column_name in columns
except Exception:
return False

View File

@@ -15,7 +15,15 @@ from pathlib import Path
def generate_secret(length: int = 32) -> str:
"""Generate a random secret key."""
"""Generate a random URL-safe secret key.
Args:
length: Number of random bytes (result will be ~1.33x longer due to base64 encoding)
Common values: 32 (→43 chars), 48 (→64 chars)
Returns:
URL-safe base64 encoded string
"""
return secrets.token_urlsafe(length)
@@ -275,6 +283,7 @@ def generate_hub_api_service(config: dict) -> str:
- KOHAKU_HUB_LFS_THRESHOLD_BYTES=1000000
- KOHAKU_HUB_LFS_KEEP_VERSIONS=5
- KOHAKU_HUB_LFS_AUTO_GC=true
- KOHAKU_HUB_AUTO_MIGRATE=true # Auto-confirm database migrations (required for Docker)
## ===== Auth & SMTP Configuration =====
- KOHAKU_HUB_REQUIRE_EMAIL_VERIFICATION=false
@@ -388,35 +397,43 @@ def load_config_file(config_path: Path) -> dict:
config["lakefs_use_postgres"] = lakefs.getboolean("use_postgres", fallback=True)
config["lakefs_db"] = lakefs.get("database", fallback="lakefs")
config["lakefs_encrypt_key"] = lakefs.get(
"encrypt_key", fallback=generate_secret()
"encrypt_key", fallback=generate_secret(32) # 43 chars
)
else:
config["lakefs_use_postgres"] = True
config["lakefs_db"] = "lakefs"
config["lakefs_encrypt_key"] = generate_secret()
config["lakefs_encrypt_key"] = generate_secret(32) # 43 chars
# S3 section
if parser.has_section("s3"):
s3 = parser["s3"]
config["s3_builtin"] = s3.getboolean("builtin", fallback=True)
config["s3_endpoint"] = s3.get("endpoint", fallback="http://minio:9000")
config["s3_access_key"] = s3.get("access_key", fallback="minioadmin")
config["s3_secret_key"] = s3.get("secret_key", fallback="minioadmin")
config["s3_access_key"] = s3.get(
"access_key", fallback=generate_secret(24)
) # 32 chars
config["s3_secret_key"] = s3.get(
"secret_key", fallback=generate_secret(48)
) # 64 chars
config["s3_region"] = s3.get("region", fallback="")
else:
config["s3_builtin"] = True
config["s3_endpoint"] = "http://minio:9000"
config["s3_access_key"] = "minioadmin"
config["s3_secret_key"] = "minioadmin"
config["s3_access_key"] = generate_secret(24) # 32 chars
config["s3_secret_key"] = generate_secret(48) # 64 chars
# Security section
if parser.has_section("security"):
sec = parser["security"]
config["session_secret"] = sec.get("session_secret", fallback=generate_secret())
config["admin_secret"] = sec.get("admin_secret", fallback=generate_secret())
config["session_secret"] = sec.get(
"session_secret", fallback=generate_secret(48)
) # 64 chars
config["admin_secret"] = sec.get(
"admin_secret", fallback=generate_secret(48)
) # 64 chars
else:
config["session_secret"] = generate_secret()
config["admin_secret"] = generate_secret()
config["session_secret"] = generate_secret(48) # 64 chars
config["admin_secret"] = generate_secret(48) # 64 chars
# Network section
if parser.has_section("network"):
@@ -467,9 +484,10 @@ builtin = true
# secret_key = your-secret-key
# region = us-east-1
# If builtin = true, you can customize MinIO credentials:
access_key = minioadmin
secret_key = minioadmin
# If builtin = true, MinIO credentials are auto-generated (recommended)
# You can override by uncommenting and setting custom values:
# access_key = your-custom-access-key
# secret_key = your-custom-secret-key
[security]
# Session and admin secrets (auto-generated if not specified)
@@ -600,8 +618,21 @@ def interactive_config() -> dict:
config["s3_builtin"] = ask_yes_no("Use built-in MinIO container?", default=True)
if config["s3_builtin"]:
config["s3_access_key"] = ask_string("MinIO access key", default="minioadmin")
config["s3_secret_key"] = ask_string("MinIO secret key", default="minioadmin")
# Generate secure random credentials for MinIO
default_access_key = generate_secret(24) # 32 chars
default_secret_key = generate_secret(48) # 64 chars
print(f"Generated MinIO access key: {default_access_key}")
print(f"Generated MinIO secret key: {default_secret_key}")
use_generated = ask_yes_no("Use generated MinIO credentials?", default=True)
if use_generated:
config["s3_access_key"] = default_access_key
config["s3_secret_key"] = default_secret_key
else:
config["s3_access_key"] = ask_string("MinIO access key")
config["s3_secret_key"] = ask_string("MinIO secret key")
config["s3_endpoint"] = "http://minio:9000"
else:
config["s3_endpoint"] = ask_string("S3 endpoint URL")
@@ -613,7 +644,7 @@ def interactive_config() -> dict:
# Security Configuration
print("--- Security Configuration ---")
default_session_secret = generate_secret()
default_session_secret = generate_secret(48) # 64 chars for session encryption
print(f"Generated session secret: {default_session_secret}")
use_generated = ask_yes_no("Use generated session secret?", default=True)
@@ -628,7 +659,7 @@ def interactive_config() -> dict:
if same_as_session:
config["admin_secret"] = config["session_secret"]
else:
default_admin_secret = generate_secret()
default_admin_secret = generate_secret(48) # 64 chars for admin token
print(f"Generated admin secret: {default_admin_secret}")
use_generated_admin = ask_yes_no("Use generated admin secret?", default=True)
@@ -638,7 +669,7 @@ def interactive_config() -> dict:
config["admin_secret"] = ask_string("Admin secret token")
# LakeFS encryption key
config["lakefs_encrypt_key"] = generate_secret()
config["lakefs_encrypt_key"] = generate_secret(32) # 43 chars
# Network configuration
print()

View File

@@ -78,15 +78,13 @@ def run_migrations():
print(f"Database URL: {cfg.app.database_url}")
print()
# Initialize database (create tables if they don't exist)
print("Initializing database...")
init_db()
print("✓ Database initialized\n")
# Discover migrations
migrations = discover_migrations()
if not migrations:
print("No migrations found in db_migrations/")
print("\nInitializing database (creating tables)...")
init_db()
print("✓ Database initialized\n")
return True
print(f"Found {len(migrations)} migration(s):\n")
@@ -122,6 +120,19 @@ def run_migrations():
print()
# Initialize database AFTER migrations (create tables/indexes if needed)
if all_success:
print("\nFinalizing database schema (ensuring all tables/indexes exist)...")
try:
init_db()
print("✓ Database schema finalized\n")
except Exception as e:
print(f"✗ Failed to finalize database schema: {e}")
import traceback
traceback.print_exc()
all_success = False
# Summary
print("=" * 70)
if all_success: