Files
KohakuHub/scripts/generate_docker_compose.py
2026-01-22 23:29:07 +08:00

1559 lines
56 KiB
Python

#!/usr/bin/env python3
"""
Docker Compose Generator for KohakuHub
This script generates a docker-compose.yml file based on user preferences.
Can read configuration from kohakuhub.conf file for automation.
"""
import argparse
import configparser
import os
import re
import secrets
import shutil
import sys
import tomllib
from datetime import datetime
from pathlib import Path
def generate_secret(length: int = 32) -> str:
"""Generate a random URL-safe secret key.
Args:
length: Number of random bytes (result will be ~1.33x longer due to base64 encoding)
Common values: 32 (→43 chars), 48 (→64 chars)
Returns:
URL-safe base64 encoded string
"""
return secrets.token_urlsafe(length)
def ask_yes_no(prompt: str, default: bool = True) -> bool:
"""Ask a yes/no question."""
default_str = "Y/n" if default else "y/N"
while True:
response = input(f"{prompt} [{default_str}]: ").strip().lower()
if not response:
return default
if response in ["y", "yes"]:
return True
if response in ["n", "no"]:
return False
print("Please answer 'y' or 'n'")
def ask_string(prompt: str, default: str = "") -> str:
"""Ask for a string input."""
if default:
response = input(f"{prompt} [{default}]: ").strip()
return response if response else default
else:
while True:
response = input(f"{prompt}: ").strip()
if response:
return response
print("This field is required")
def ask_int(prompt: str, default: int) -> int:
"""Ask for an integer input."""
while True:
response = input(f"{prompt} [{default}]: ").strip()
if not response:
return default
try:
return int(response)
except ValueError:
print("Please enter a valid number")
def generate_postgres_service(config: dict) -> str:
"""Generate PostgreSQL service configuration."""
if config["postgres_builtin"]:
return f""" postgres:
image: postgres:15
container_name: postgres
restart: always
environment:
- POSTGRES_USER={config["postgres_user"]}
- POSTGRES_PASSWORD={config["postgres_password"]}
- POSTGRES_DB={config["postgres_db"]}
ports:
- "25432:5432" # Optional: for external access
volumes:
- ./hub-meta/postgres-data:/var/lib/postgresql/data
"""
return ""
def generate_minio_service(config: dict) -> str:
"""Generate MinIO service configuration."""
if config["s3_builtin"] and config.get("s3_provider") == "minio":
return f""" minio:
image: quay.io/minio/minio:latest
container_name: minio
command: server /data --console-address ":29000"
environment:
- MINIO_ROOT_USER={config["s3_access_key"]}
- MINIO_ROOT_PASSWORD={config["s3_secret_key"]}
ports:
- "29001:9000" # S3 API
- "29000:29000" # Web Console
volumes:
- ./hub-storage/minio-data:/data
- ./hub-meta/minio-data:/root/.minio
"""
return ""
def generate_garage_service(config: dict) -> str:
"""Generate Garage S3 service configuration."""
if config["s3_builtin"] and config.get("s3_provider") == "garage":
# Generate Garage secrets
garage_rpc_secret = config.get("garage_rpc_secret", secrets.token_hex(32))
garage_admin_token = config.get("garage_admin_token", generate_secret(32))
garage_metrics_token = config.get("garage_metrics_token", generate_secret(32))
return f""" garage:
image: dxflrs/garage:v2.1.0
container_name: garage
restart: unless-stopped
ports:
- "39000:39000" # S3 API
- "39001:39001" # RPC/Admin API
- "39002:39002" # S3 Web
- "39003:39003" # Admin API
environment:
- RUST_LOG=garage=info
- GARAGE_RPC_SECRET={garage_rpc_secret}
- GARAGE_ADMIN_TOKEN={garage_admin_token}
- GARAGE_METRICS_TOKEN={garage_metrics_token}
volumes:
- ./docker/garage/garage.toml:/etc/garage.toml
- ./hub-storage/garage-meta:/var/lib/garage/meta
- ./hub-storage/garage-data:/var/lib/garage/data
"""
return ""
def generate_lakefs_service(config: dict) -> str:
"""Generate LakeFS service configuration."""
depends_on = []
if config["s3_builtin"]:
if config.get("s3_provider") == "minio":
depends_on.append("minio")
elif config.get("s3_provider") == "garage":
depends_on.append("garage")
if config["postgres_builtin"] and config["lakefs_use_postgres"]:
depends_on.append("postgres")
depends_on_str = ""
if depends_on:
depends_on_str = " depends_on:\n"
for dep in depends_on:
depends_on_str += f" - {dep}\n"
# LakeFS database configuration
if config["lakefs_use_postgres"]:
if config["postgres_builtin"]:
lakefs_db_config = f""" - LAKEFS_DATABASE_TYPE=postgres
- LAKEFS_DATABASE_POSTGRES_CONNECTION_STRING=postgres://{config["postgres_user"]}:{config["postgres_password"]}@postgres:5432/{config["lakefs_db"]}?sslmode=disable"""
# Add environment variables for init script
init_env_vars = f""" - POSTGRES_HOST=postgres
- POSTGRES_PORT=5432
- POSTGRES_USER={config["postgres_user"]}
- POSTGRES_PASSWORD={config["postgres_password"]}
- POSTGRES_DB={config["postgres_db"]}
- LAKEFS_DB={config["lakefs_db"]}"""
else:
lakefs_db_config = f""" - LAKEFS_DATABASE_TYPE=postgres
- LAKEFS_DATABASE_POSTGRES_CONNECTION_STRING=postgres://{config["postgres_user"]}:{config["postgres_password"]}@{config["postgres_host"]}:{config["postgres_port"]}/{config["lakefs_db"]}?sslmode=disable"""
# Add environment variables for init script
init_env_vars = f""" - POSTGRES_HOST={config["postgres_host"]}
- POSTGRES_PORT={config["postgres_port"]}
- POSTGRES_USER={config["postgres_user"]}
- POSTGRES_PASSWORD={config["postgres_password"]}
- POSTGRES_DB={config["postgres_db"]}
- LAKEFS_DB={config["lakefs_db"]}"""
else:
lakefs_db_config = """ - LAKEFS_DATABASE_TYPE=local
- LAKEFS_DATABASE_LOCAL_PATH=/var/lakefs/data/metadata.db"""
init_env_vars = ""
# S3 blockstore configuration
if config["s3_builtin"]:
if config.get("s3_provider") == "garage":
s3_endpoint = "http://garage:39000"
force_path_style = "true"
s3_region = "garage" # Garage uses custom region name
else: # minio
s3_endpoint = "http://minio:9000"
force_path_style = "true"
s3_region = "us-east-1" # MinIO works with us-east-1
s3_bucket = "hub-storage"
else:
s3_endpoint = config["s3_endpoint"]
# Use path-style for all non-AWS endpoints (MinIO, CloudFlare R2, custom S3)
# Only AWS S3 (*.amazonaws.com) should use virtual-hosted style
force_path_style = "false" if "amazonaws.com" in s3_endpoint.lower() else "true"
s3_region = config.get("s3_region", "us-east-1")
s3_bucket = config.get("s3_bucket") or "hub-storage"
# Add entrypoint and volumes for database initialization
entrypoint_config = ""
volumes_config = """ - ./hub-meta/lakefs-data:/var/lakefs/data
- ./hub-meta/lakefs-cache:/lakefs/data/cache"""
if config["lakefs_use_postgres"]:
entrypoint_config = """ entrypoint: ["/bin/sh", "/scripts/lakefs-entrypoint.sh"]
command: ["run"]"""
volumes_config += """
- ./docker/lakefs/lakefs-entrypoint.sh:/scripts/lakefs-entrypoint.sh:ro
- ./docker/lakefs/init-databases.sh:/scripts/init-databases.sh:ro"""
# Add external network if needed (for external postgres or s3)
lakefs_networks_str = ""
if config.get("external_network") and (
not config["postgres_builtin"] or not config["s3_builtin"]
):
lakefs_networks_str = f""" networks:
- default
- {config["external_network"]}
"""
return f""" lakefs:
build:
context: ./docker/lakefs
container_name: lakefs
{entrypoint_config}
environment:
{lakefs_db_config}
{init_env_vars}
- LAKEFS_BLOCKSTORE_TYPE=s3
- LAKEFS_BLOCKSTORE_S3_ENDPOINT={s3_endpoint}
- LAKEFS_BLOCKSTORE_S3_BUCKET={s3_bucket}
- LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE={force_path_style}
- LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID={config["s3_access_key"]}
- LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY={config["s3_secret_key"]}
- LAKEFS_BLOCKSTORE_S3_REGION={s3_region}
- LAKEFS_AUTH_ENCRYPT_SECRET_KEY={config["lakefs_encrypt_key"]}
- LAKEFS_LOGGING_FORMAT=text
- LAKEFS_LISTEN_ADDRESS=0.0.0.0:28000
ports:
- "28000:28000" # LakeFS admin UI (optional)
user: "${{UID}}:${{GID}}"
{depends_on_str} volumes:
{volumes_config}
{lakefs_networks_str}"""
def generate_hub_api_service(config: dict) -> str:
"""Generate hub-api service configuration."""
depends_on = ["lakefs"]
if config["postgres_builtin"]:
depends_on.insert(0, "postgres")
if config["s3_builtin"]:
if config.get("s3_provider") == "minio":
depends_on.append("minio")
elif config.get("s3_provider") == "garage":
depends_on.append("garage")
depends_on_str = " depends_on:\n"
for dep in depends_on:
depends_on_str += f" - {dep}\n"
# Add external network if needed (for external postgres or s3)
networks_str = ""
if config.get("external_network") and (
not config["postgres_builtin"] or not config["s3_builtin"]
):
networks_str = f""" networks:
- default
- {config["external_network"]}
"""
# Database configuration
if config["postgres_builtin"]:
db_url = f"postgresql://{config['postgres_user']}:{config['postgres_password']}@postgres:5432/{config['postgres_db']}"
else:
db_url = f"postgresql://{config['postgres_user']}:{config['postgres_password']}@{config['postgres_host']}:{config['postgres_port']}/{config['postgres_db']}"
# S3 configuration
if config["s3_builtin"]:
if config.get("s3_provider") == "garage":
s3_endpoint_internal = "http://garage:39000"
s3_endpoint_public = "http://127.0.0.1:39000"
s3_region = "garage" # Garage uses custom region name
# Garage: MUST use s3v4 (only signature version supported)
s3_sig_version_line = (
" - KOHAKU_HUB_S3_SIGNATURE_VERSION=s3v4 # Required for Garage"
)
else: # minio
s3_endpoint_internal = "http://minio:9000"
s3_endpoint_public = "http://127.0.0.1:29001"
s3_region = "us-east-1" # MinIO works with us-east-1
# MinIO: Don't set signature_version (uses default/s3v2-compatible)
s3_sig_version_line = " # - KOHAKU_HUB_S3_SIGNATURE_VERSION=s3v4 # Uncomment for R2/AWS S3 (leave commented for MinIO)"
else:
s3_endpoint_internal = config["s3_endpoint"]
s3_endpoint_public = config["s3_endpoint"]
s3_region = config.get("s3_region", "us-east-1")
# External S3: Use configured value or default to s3v4
s3_sig_version = config.get("s3_signature_version", "s3v4")
if s3_sig_version:
s3_sig_version_line = (
f" - KOHAKU_HUB_S3_SIGNATURE_VERSION={s3_sig_version}"
)
else:
s3_sig_version_line = (
" # - KOHAKU_HUB_S3_SIGNATURE_VERSION=s3v4 # Uncomment if needed"
)
# No Garage-specific config needed (manual setup)
garage_config_section = ""
return f""" hub-api:
build: .
container_name: hub-api
restart: always
ports:
- "48888:48888" # Internal API port (optional, for debugging)
{depends_on_str} environment:
## ===== CRITICAL: Endpoint Configuration (MUST CHANGE) =====
## These determine how users access your KohakuHub instance
- KOHAKU_HUB_BASE_URL=http://127.0.0.1:28080 # Change to your public URL (e.g., https://hub.example.com)
- KOHAKU_HUB_S3_PUBLIC_ENDPOINT={s3_endpoint_public} # Change to your S3 public URL
## ===== CRITICAL: Security Configuration (MUST CHANGE) =====
- KOHAKU_HUB_SESSION_SECRET={config["session_secret"]}
- KOHAKU_HUB_ADMIN_SECRET_TOKEN={config["admin_secret"]}
- KOHAKU_HUB_DATABASE_KEY={config["database_key"]}
## ===== Performance Configuration =====
- KOHAKU_HUB_WORKERS=4 # Number of worker processes (1-8, recommend: CPU cores)
## ===== Database Configuration =====
- KOHAKU_HUB_DB_BACKEND=postgres
- KOHAKU_HUB_DATABASE_URL={db_url}
## ===== S3 Storage Configuration =====
- KOHAKU_HUB_S3_ENDPOINT={s3_endpoint_internal}
- KOHAKU_HUB_S3_ACCESS_KEY={config["s3_access_key"]}
- KOHAKU_HUB_S3_SECRET_KEY={config["s3_secret_key"]}
- KOHAKU_HUB_S3_BUCKET={config["s3_bucket"]}
- KOHAKU_HUB_S3_REGION={s3_region} # auto (recommended), us-east-1, or your AWS region
{s3_sig_version_line}
## ===== LakeFS Configuration =====
- KOHAKU_HUB_LAKEFS_ENDPOINT=http://lakefs:28000
- KOHAKU_HUB_LAKEFS_REPO_NAMESPACE=hf
# LakeFS credentials auto-generated on first start
## ===== Application Configuration =====
- KOHAKU_HUB_SITE_NAME=KohakuHub
- KOHAKU_HUB_LFS_THRESHOLD_BYTES=1000000
- KOHAKU_HUB_LFS_MULTIPART_THRESHOLD_BYTES=100_000_000 # 100MB - use multipart for files larger than this
- KOHAKU_HUB_LFS_MULTIPART_CHUNK_SIZE_BYTES=50_000_000 # 50MB - size of each part (min 5MB except last)
- KOHAKU_HUB_LFS_KEEP_VERSIONS=5
- KOHAKU_HUB_LFS_AUTO_GC=true
- KOHAKU_HUB_AUTO_MIGRATE=true # Auto-confirm database migrations (required for Docker)
- KOHAKU_HUB_LOG_LEVEL=INFO
- KOHAKU_HUB_LOG_FORMAT=terminal
- KOHAKU_HUB_LOG_DIR=logs/
## ===== Auth & SMTP Configuration =====
- KOHAKU_HUB_REQUIRE_EMAIL_VERIFICATION=false
- KOHAKU_HUB_INVITATION_ONLY=false # Set to true to require invitation for registration
- KOHAKU_HUB_SESSION_EXPIRE_HOURS=168
- KOHAKU_HUB_TOKEN_EXPIRE_DAYS=365
- KOHAKU_HUB_ADMIN_ENABLED=true
# SMTP (Optional - for email verification)
- KOHAKU_HUB_SMTP_ENABLED=false
- KOHAKU_HUB_SMTP_HOST=smtp.gmail.com
- KOHAKU_HUB_SMTP_PORT=587
- KOHAKU_HUB_SMTP_USERNAME=
- KOHAKU_HUB_SMTP_PASSWORD=
- KOHAKU_HUB_SMTP_FROM=noreply@kohakuhub.local
- KOHAKU_HUB_SMTP_TLS=true
## ===== Storage Quota Configuration (Optional) =====
- KOHAKU_HUB_DEFAULT_USER_PRIVATE_QUOTA_BYTES=10_000_000
- KOHAKU_HUB_DEFAULT_USER_PUBLIC_QUOTA_BYTES=100_000_000
- KOHAKU_HUB_DEFAULT_ORG_PRIVATE_QUOTA_BYTES=10_000_000
- KOHAKU_HUB_DEFAULT_ORG_PUBLIC_QUOTA_BYTES=100_000_000{garage_config_section}
volumes:
- ./hub-meta/hub-api:/hub-api-creds
{networks_str}"""
def generate_hub_ui_service() -> str:
"""Generate hub-ui service configuration."""
return """ hub-ui:
image: nginx:alpine
container_name: hub-ui
restart: always
ports:
- "28080:80" # Public web interface
volumes:
- ./src/kohaku-hub-ui/dist:/usr/share/nginx/html
- ./src/kohaku-hub-admin/dist:/usr/share/nginx/html-admin
- ./docker/nginx/default.conf:/etc/nginx/conf.d/default.conf
depends_on:
- hub-api
"""
def generate_docker_compose(config: dict) -> str:
"""Generate the complete docker-compose.yml content."""
services = []
# Add services in order
services.append(generate_hub_ui_service())
services.append(generate_hub_api_service(config))
if config["s3_builtin"]:
if config.get("s3_provider") == "garage":
services.append(generate_garage_service(config))
else:
services.append(generate_minio_service(config))
services.append(generate_lakefs_service(config))
if config["postgres_builtin"]:
services.append(generate_postgres_service(config))
content = """# docker-compose.yml - KohakuHub Configuration
# Generated by KohakuHub docker-compose generator
# Customize for your deployment
services:
"""
content += "\n".join(services)
# Network configuration
content += "\nnetworks:\n default:\n name: hub-net\n"
# Add external network if specified
if config.get("external_network"):
content += f""" {config["external_network"]}:
external: true
"""
return content
def load_config_file(config_path: Path) -> dict:
"""Load configuration from INI file."""
if not config_path.exists():
print(f"Error: Config file not found: {config_path}")
sys.exit(1)
parser = configparser.ConfigParser()
parser.read(config_path, encoding="utf-8")
config = {}
# PostgreSQL section
if parser.has_section("postgresql"):
pg = parser["postgresql"]
config["postgres_builtin"] = pg.getboolean("builtin", fallback=True)
config["postgres_host"] = pg.get("host", fallback="postgres")
config["postgres_port"] = pg.getint("port", fallback=5432)
config["postgres_user"] = pg.get("user", fallback="hub")
config["postgres_password"] = pg.get("password", fallback="hubpass")
config["postgres_db"] = pg.get("database", fallback="kohakuhub")
else:
config["postgres_builtin"] = True
config["postgres_user"] = "hub"
config["postgres_password"] = "hubpass"
config["postgres_db"] = "kohakuhub"
config["postgres_host"] = "postgres"
config["postgres_port"] = 5432
# LakeFS section
if parser.has_section("lakefs"):
lakefs = parser["lakefs"]
config["lakefs_use_postgres"] = lakefs.getboolean("use_postgres", fallback=True)
config["lakefs_db"] = lakefs.get("database", fallback="lakefs")
config["lakefs_encrypt_key"] = lakefs.get(
"encrypt_key",
fallback=generate_secret(32), # 43 chars
)
else:
config["lakefs_use_postgres"] = True
config["lakefs_db"] = "lakefs"
config["lakefs_encrypt_key"] = generate_secret(32) # 43 chars
# S3 section
if parser.has_section("s3"):
s3 = parser["s3"]
config["s3_builtin"] = s3.getboolean("builtin", fallback=True)
config["s3_provider"] = s3.get(
"provider", fallback="minio"
) # minio (default) or garage
# Set defaults based on provider
if config["s3_provider"] == "garage":
default_endpoint = "http://garage:3900"
default_region = "garage"
default_sig_version = "s3v4" # Garage requires s3v4
else: # minio
default_endpoint = "http://minio:9000"
default_region = "us-east-1"
default_sig_version = "" # MinIO uses default
config["s3_endpoint"] = s3.get("endpoint", fallback=default_endpoint)
config["s3_access_key"] = s3.get(
"access_key", fallback=generate_secret(24)
) # 32 chars
config["s3_secret_key"] = s3.get(
"secret_key", fallback=generate_secret(48)
) # 64 chars
config["s3_region"] = s3.get("region", fallback=default_region)
config["s3_signature_version"] = s3.get(
"signature_version", fallback=default_sig_version
)
else:
config["s3_builtin"] = True
config["s3_provider"] = "minio" # Default to MinIO (works out of box)
config["s3_endpoint"] = "http://garage:3900"
config["s3_access_key"] = generate_secret(24) # 32 chars
config["s3_secret_key"] = generate_secret(48) # 64 chars
config["s3_region"] = "garage"
config["s3_signature_version"] = "s3v4" # Garage requires s3v4
# Security section
if parser.has_section("security"):
sec = parser["security"]
config["session_secret"] = sec.get(
"session_secret", fallback=generate_secret(48)
) # 64 chars
config["admin_secret"] = sec.get(
"admin_secret", fallback=generate_secret(48)
) # 64 chars
config["database_key"] = sec.get(
"database_key", fallback=generate_secret(32)
) # 43 chars
# Garage secrets (if using Garage)
config["garage_rpc_secret"] = sec.get(
"garage_rpc_secret", fallback=secrets.token_hex(32)
) # 64 hex chars
config["garage_admin_token"] = sec.get(
"garage_admin_token", fallback=generate_secret(32)
) # Admin API token
config["garage_metrics_token"] = sec.get(
"garage_metrics_token", fallback=generate_secret(32)
) # Metrics API token
else:
config["session_secret"] = generate_secret(48) # 64 chars
config["admin_secret"] = generate_secret(48) # 64 chars
config["database_key"] = generate_secret(32) # 43 chars for encryption
config["garage_rpc_secret"] = secrets.token_hex(32) # 64 hex chars for Garage
config["garage_admin_token"] = generate_secret(32)
config["garage_metrics_token"] = generate_secret(32)
# Network section
if parser.has_section("network"):
net = parser["network"]
config["external_network"] = net.get("external_network", fallback="")
else:
config["external_network"] = ""
return config
def generate_config_template(output_path: Path):
"""Generate a template configuration file."""
template = """# KohakuHub Configuration Template
# Use this file to automate docker-compose.yml generation
# Usage: python scripts/generate_docker_compose.py --config kohakuhub.conf
[postgresql]
# Use built-in PostgreSQL container (true) or external server (false)
builtin = true
# If builtin = false, specify connection details:
# host = your-postgres-host.com
# port = 5432
# PostgreSQL credentials
user = hub
password = hubpass
database = kohakuhub
[lakefs]
# Use PostgreSQL for LakeFS (true) or SQLite (false)
use_postgres = true
# LakeFS database name (separate from hub-api database)
database = lakefs
# LakeFS encryption key (auto-generated if not specified)
# encrypt_key = your-secret-key-here
[s3]
# Use built-in S3 container (true) or external S3 (false)
builtin = true
# S3 Provider: minio (default, auto-setup) or garage (manual setup, no CVEs)
provider = minio
# If builtin = false, specify S3 endpoint and credentials:
# endpoint = https://your-s3-endpoint.com
# access_key = your-access-key
# secret_key = your-secret-key
# bucket = hub-storage
# region = us-east-1 # us-east-1 (default), auto for R2, garage for Garage, or specific AWS region
# signature_version = s3v4 # s3v4 for Garage/R2/AWS S3, leave empty for MinIO
# If builtin = true, credentials are auto-generated (recommended)
# You can override by uncommenting and setting custom values:
# access_key = your-custom-access-key
# secret_key = your-custom-secret-key
# For Garage:
# region = garage
# signature_version = s3v4 # Required for Garage
# For MinIO:
# region = us-east-1
# signature_version = # Leave empty for MinIO (uses default)
[security]
# Session and admin secrets (auto-generated if not specified)
# session_secret = your-session-secret-here
# admin_secret = your-admin-secret-here
# database_key = your-database-encryption-key-here # For encrypting external fallback tokens
[network]
# External bridge network (optional)
# Use this if PostgreSQL or S3 are in different Docker Compose setups
# Create the network first: docker network create shared-network
# external_network = shared-network
"""
output_path.write_text(template, encoding="utf-8")
print(f"[OK] Generated configuration template: {output_path}")
print()
print("Edit this file with your settings, then run:")
print(f" python scripts/generate_docker_compose.py --config {output_path}")
def migrate_existing_config(docker_compose_path: Path, config_toml_path: Path) -> dict:
"""Migrate existing configuration files interactively.
Reads existing values and only prompts for new fields.
Args:
docker_compose_path: Path to docker-compose.yml
config_toml_path: Path to config.toml
Returns:
Config dict with migrated values
"""
config = {}
# Read existing docker-compose.yml
existing_env = {}
if docker_compose_path.exists():
try:
with open(docker_compose_path, "r", encoding="utf-8") as f:
content = f.read()
# Extract environment variables
for line in content.split("\n"):
match = re.match(
r"\s*- (KOHAKU_HUB_\w+)=(.+?)(?:\s+#.*)?$", line.strip()
)
if match:
key, value = match.groups()
existing_env[key] = value.strip()
print(f"✓ Loaded {len(existing_env)} settings from docker-compose.yml")
except Exception as e:
print(f"⚠ Failed to read docker-compose.yml: {e}")
# Read existing config.toml
existing_toml = {}
if config_toml_path.exists():
try:
with open(config_toml_path, "rb") as f:
existing_toml = tomllib.load(f)
print(f"✓ Loaded settings from config.toml")
except Exception as e:
print(f"⚠ Failed to read config.toml: {e}")
print()
print("=" * 60)
print("Migration Mode - Only New Fields Will Be Asked")
print("=" * 60)
print()
# Helper to get value from env or toml
def get_existing(env_key: str, toml_path: str | None = None):
if env_key in existing_env:
return existing_env[env_key]
if toml_path:
keys = toml_path.split(".")
val = existing_toml
for key in keys:
if isinstance(val, dict) and key in val:
val = val[key]
else:
return None
return val
return None
# PostgreSQL Configuration
print("--- PostgreSQL Configuration ---")
config["postgres_builtin"] = get_existing("KOHAKU_HUB_DB_BACKEND") != "sqlite"
print(f"Using: {'Built-in PostgreSQL' if config['postgres_builtin'] else 'SQLite'}")
if config["postgres_builtin"]:
# Parse DATABASE_URL
db_url = get_existing("KOHAKU_HUB_DATABASE_URL", "app.database_url")
if isinstance(db_url, str) and db_url.startswith("postgresql://"):
# Parse: postgresql://user:pass@host:port/db
match = re.match(r"postgresql://([^:]+):([^@]+)@([^:]+):(\d+)/(.+)", db_url)
if match:
user, password, host, port, db = match.groups()
config["postgres_user"] = user
config["postgres_password"] = password
config["postgres_host"] = host
config["postgres_port"] = int(port)
config["postgres_db"] = db
print(f" User: {user}")
print(f" Database: {db}")
else:
# Fallback defaults
config["postgres_user"] = "hub"
config["postgres_password"] = "hubpass"
config["postgres_host"] = "postgres"
config["postgres_port"] = 5432
config["postgres_db"] = "kohakuhub"
else:
config["postgres_user"] = "hub"
config["postgres_password"] = "hubpass"
config["postgres_host"] = "postgres"
config["postgres_port"] = 5432
config["postgres_db"] = "kohakuhub"
else:
config["postgres_user"] = ""
config["postgres_password"] = ""
config["postgres_host"] = ""
config["postgres_port"] = 5432
config["postgres_db"] = ""
# LakeFS Configuration
config["lakefs_use_postgres"] = True # Most installations use postgres
config["lakefs_db"] = "kohakuhub_lakefs"
# S3 Configuration
print("\n--- S3 Configuration ---")
s3_endpoint = get_existing("KOHAKU_HUB_S3_ENDPOINT", "s3.endpoint")
if isinstance(s3_endpoint, dict):
s3_endpoint = None
# Detect provider from endpoint
if s3_endpoint:
if "minio" in s3_endpoint:
config["s3_builtin"] = True
config["s3_provider"] = "minio"
elif "garage" in s3_endpoint or ":3900" in s3_endpoint:
config["s3_builtin"] = True
config["s3_provider"] = "garage"
else:
config["s3_builtin"] = False
config["s3_provider"] = "external"
else:
# Default to Garage (no CVE)
config["s3_builtin"] = True
config["s3_provider"] = "garage"
print(
f"Using: {'Built-in ' + config['s3_provider'].title() if config['s3_builtin'] else 'External S3'}"
)
config["s3_access_key"] = get_existing("KOHAKU_HUB_S3_ACCESS_KEY", "s3.access_key")
if isinstance(config["s3_access_key"], dict):
config["s3_access_key"] = None
config["s3_access_key"] = config["s3_access_key"] or generate_secret(24)
config["s3_secret_key"] = get_existing("KOHAKU_HUB_S3_SECRET_KEY", "s3.secret_key")
if isinstance(config["s3_secret_key"], dict):
config["s3_secret_key"] = None
config["s3_secret_key"] = config["s3_secret_key"] or generate_secret(48)
config["s3_bucket"] = get_existing("KOHAKU_HUB_S3_BUCKET", "s3.bucket")
if isinstance(config["s3_bucket"], dict):
config["s3_bucket"] = None
config["s3_bucket"] = config["s3_bucket"] or "hub-storage"
# Set endpoint based on provider
if not s3_endpoint:
s3_endpoint = (
"http://garage:3900"
if config["s3_provider"] == "garage"
else "http://minio:9000"
)
config["s3_endpoint"] = s3_endpoint
# Set region based on provider
existing_region = get_existing("KOHAKU_HUB_S3_REGION", "s3.region")
if isinstance(existing_region, dict):
existing_region = None
if existing_region:
config["s3_region"] = existing_region
else:
config["s3_region"] = (
"garage" if config["s3_provider"] == "garage" else "us-east-1"
)
# Set signature version
existing_sig = get_existing(
"KOHAKU_HUB_S3_SIGNATURE_VERSION", "s3.signature_version"
)
if isinstance(existing_sig, dict):
existing_sig = None
if existing_sig:
config["s3_signature_version"] = existing_sig
else:
config["s3_signature_version"] = (
"s3v4" if config["s3_provider"] == "garage" else ""
)
# Security Configuration
print("\n--- Security Configuration ---")
config["session_secret"] = get_existing(
"KOHAKU_HUB_SESSION_SECRET", "auth.session_secret"
)
if isinstance(config["session_secret"], dict):
config["session_secret"] = None
config["admin_secret"] = get_existing(
"KOHAKU_HUB_ADMIN_SECRET_TOKEN", "admin.secret_token"
)
if isinstance(config["admin_secret"], dict):
config["admin_secret"] = None
# NEW FIELD: database_key
config["database_key"] = get_existing("KOHAKU_HUB_DATABASE_KEY", "app.database_key")
if isinstance(config["database_key"], dict):
config["database_key"] = None
if not config["database_key"]:
print("\n🆕 New field: DATABASE_KEY (for encrypting external fallback tokens)")
default_db_key = generate_secret(32)
print(f" Generated: {default_db_key}")
use_generated = ask_yes_no("Use generated database key?", default=True)
config["database_key"] = (
default_db_key if use_generated else ask_string("Database encryption key")
)
else:
print(f" Database key: (exists)")
# Preserve existing secrets (DO NOT regenerate!)
if not config["session_secret"]:
print("\n⚠ Session secret missing - generating new one")
config["session_secret"] = generate_secret(48)
else:
print(f" Session secret: (exists)")
if not config["admin_secret"]:
print("\n⚠ Admin secret missing - generating new one")
config["admin_secret"] = generate_secret(48)
else:
print(f" Admin secret: (exists)")
# LakeFS encryption key - MUST preserve existing value or generate only if missing
config["lakefs_encrypt_key"] = get_existing("LAKEFS_ENCRYPT_SECRET_KEY")
if isinstance(config["lakefs_encrypt_key"], dict):
config["lakefs_encrypt_key"] = None
if not config["lakefs_encrypt_key"]:
print("\n⚠ LakeFS encryption key missing - generating new one")
print(" WARNING: This will make existing LakeFS data inaccessible!")
config["lakefs_encrypt_key"] = generate_secret(32)
else:
print(f" LakeFS encrypt key: (exists - PRESERVED)")
# Garage secrets - MUST preserve existing values or generate only if missing
config["garage_rpc_secret"] = get_existing("GARAGE_RPC_SECRET")
if isinstance(config["garage_rpc_secret"], dict):
config["garage_rpc_secret"] = None
if not config["garage_rpc_secret"]:
print("\n⚠ Garage RPC secret missing - generating new one")
config["garage_rpc_secret"] = secrets.token_hex(32)
else:
print(f" Garage RPC secret: (exists - PRESERVED)")
config["garage_admin_token"] = get_existing("GARAGE_ADMIN_TOKEN")
if isinstance(config["garage_admin_token"], dict):
config["garage_admin_token"] = None
if not config["garage_admin_token"]:
config["garage_admin_token"] = generate_secret(32)
config["garage_metrics_token"] = get_existing("GARAGE_METRICS_TOKEN")
if isinstance(config["garage_metrics_token"], dict):
config["garage_metrics_token"] = None
if not config["garage_metrics_token"]:
config["garage_metrics_token"] = generate_secret(32)
# Network
config["external_network"] = ""
print("\n✓ Migration complete - all existing values preserved")
print("✓ New fields added")
# Write updated values back to docker-compose.yml IN PLACE
if docker_compose_path.exists():
update_docker_compose_inplace(
docker_compose_path,
{
"KOHAKU_HUB_DATABASE_KEY": config["database_key"],
"LAKEFS_ENCRYPT_SECRET_KEY": config["lakefs_encrypt_key"],
"LAKEFS_BLOCKSTORE_S3_BUCKET": config["s3_bucket"],
},
)
# Write updated config.toml IN PLACE
if config_toml_path.exists():
update_config_toml_inplace(
config_toml_path,
{
"app.database_key": config["database_key"],
"s3.bucket": config["s3_bucket"],
"fallback.require_auth": False,
},
)
return config
def update_docker_compose_inplace(filepath: Path, new_vars: dict):
"""Update docker-compose.yml in place, adding new environment variables.
Args:
filepath: Path to docker-compose.yml
new_vars: Dict of {ENV_VAR: value} to add/update
"""
with open(filepath, "r", encoding="utf-8") as f:
lines = f.readlines()
output_lines = []
in_hub_api_env = False
added_vars = set()
for i, line in enumerate(lines):
stripped = line.strip()
# Detect hub-api environment section
if "hub-api:" in line:
in_hub_api_env = False
elif in_hub_api_env and stripped.startswith("environment:"):
in_hub_api_env = True
elif (
"environment:" in line
and i > 0
and "hub-api" in "".join(lines[max(0, i - 10) : i])
):
in_hub_api_env = True
# Check if this is an env var line
if in_hub_api_env and stripped.startswith("- "):
for var_name, var_value in new_vars.items():
if var_name in line:
# Update existing variable
indent = len(line) - len(line.lstrip())
comment_match = re.search(r"(#.+)$", line)
comment = " " + comment_match.group(1) if comment_match else ""
output_lines.append(
f"{' ' * indent}- {var_name}={var_value}{comment}\n"
)
added_vars.add(var_name)
break
else:
# Not updating this line, keep as-is
output_lines.append(line)
# If we're at the last env var and haven't added new vars, add them
if i + 1 < len(lines):
next_line = lines[i + 1]
if not isinstance(next_line, str):
next_line = ""
if not next_line.strip().startswith("- "):
for var_name, var_value in new_vars.items():
if var_name not in added_vars:
indent = len(line) - len(line.lstrip())
output_lines.append(
f"{' ' * indent}- {var_name}={var_value}\n"
)
added_vars.add(var_name)
else:
output_lines.append(line)
with open(filepath, "w", encoding="utf-8") as f:
f.writelines(output_lines)
print(f"✓ Updated {filepath} with {len(added_vars)} new variables")
def update_config_toml_inplace(filepath: Path, new_fields: dict):
"""Update config.toml in place, adding new fields.
Args:
filepath: Path to config.toml
new_fields: Dict of {"section.key": value} to add
"""
try:
with open(filepath, "rb") as f:
existing = tomllib.load(f)
except (FileNotFoundError, tomllib.TOMLDecodeError):
existing = {}
# Add new fields
for path, value in new_fields.items():
keys = path.split(".")
current = existing
for key in keys[:-1]:
if key not in current:
current[key] = {}
current = current[key]
# Only add if doesn't exist
if keys[-1] not in current:
current[keys[-1]] = value
# Write back (simple TOML format)
lines = []
for section in [
"s3",
"lakefs",
"smtp",
"auth",
"admin",
"app",
"quota",
"fallback",
]:
if section in existing:
lines.append(f"[{section}]")
for key, val in existing[section].items():
if isinstance(val, bool):
lines.append(f"{key} = {str(val).lower()}")
elif isinstance(val, (int, float)):
lines.append(f"{key} = {val}")
elif isinstance(val, str):
lines.append(f'{key} = "{val}"')
else:
lines.append(f'{key} = "{val}"')
lines.append("")
with open(filepath, "w", encoding="utf-8") as f:
f.write("\n".join(lines))
print(f"✓ Updated {filepath}")
def main():
"""Main function."""
# Parse command-line arguments
parser = argparse.ArgumentParser(
description="Generate docker-compose.yml for KohakuHub"
)
parser.add_argument(
"--config",
"-c",
type=Path,
help="Path to configuration file (kohakuhub.conf)",
)
parser.add_argument(
"--generate-config",
action="store_true",
help="Generate a template configuration file",
)
args = parser.parse_args()
# Generate template if requested
if args.generate_config:
template_path = Path(__file__).parent.parent / "kohakuhub.conf"
generate_config_template(template_path)
return
print("=" * 60)
print("KohakuHub Docker Compose Generator")
print("=" * 60)
print()
# Check for existing configuration files
repo_root = Path(__file__).parent.parent
existing_docker_compose = repo_root / "docker-compose.yml"
existing_config_toml = repo_root / "config.toml"
has_existing_config = (
existing_docker_compose.exists() or existing_config_toml.exists()
)
if has_existing_config and not args.config:
print("🔍 Found existing configuration files:")
if existing_docker_compose.exists():
print(f"{existing_docker_compose}")
if existing_config_toml.exists():
print(f"{existing_config_toml}")
print()
use_migrate = ask_yes_no(
"Use migration mode? (preserves existing values, only asks for new fields)",
default=True,
)
print()
if use_migrate:
# Create timestamped backups
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
if existing_docker_compose.exists():
backup_path = repo_root / f"docker-compose.yml.backup.{timestamp}"
shutil.copy2(existing_docker_compose, backup_path)
print(f"✓ Created backup: {backup_path}")
if existing_config_toml.exists():
backup_path = repo_root / f"config.toml.backup.{timestamp}"
shutil.copy2(existing_config_toml, backup_path)
print(f"✓ Created backup: {backup_path}")
print()
# Load existing config and migrate
config = migrate_existing_config(
existing_docker_compose, existing_config_toml
)
else:
print("⚠ Starting fresh configuration (existing files will be overwritten)")
print()
config = interactive_config()
elif args.config:
print(f"Loading configuration from: {args.config}")
print()
config = load_config_file(args.config)
# Show loaded configuration
print("Loaded configuration:")
print(
f" PostgreSQL: {'Built-in' if config['postgres_builtin'] else 'External'}"
)
if not config["postgres_builtin"]:
print(f" Host: {config['postgres_host']}:{config['postgres_port']}")
print(f" Database: {config['postgres_db']}")
print(
f" LakeFS: {'PostgreSQL' if config['lakefs_use_postgres'] else 'SQLite'}"
)
if config["lakefs_use_postgres"]:
print(f" Database: {config['lakefs_db']}")
print(f" S3: {'Built-in MinIO' if config['s3_builtin'] else 'External S3'}")
if not config["s3_builtin"]:
print(f" Endpoint: {config['s3_endpoint']}")
print()
else:
# Interactive mode - fresh config
config = interactive_config()
# Generate and write files
generate_and_write_files(config)
def interactive_config() -> dict:
"""Run interactive configuration."""
config = {}
# PostgreSQL Configuration
print("--- PostgreSQL Configuration ---")
config["postgres_builtin"] = ask_yes_no(
"Use built-in PostgreSQL container?", default=True
)
if config["postgres_builtin"]:
config["postgres_user"] = ask_string("PostgreSQL username", default="hub")
config["postgres_password"] = ask_string(
"PostgreSQL password", default="hubpass"
)
config["postgres_db"] = ask_string(
"PostgreSQL database name for hub-api", default="kohakuhub"
)
config["postgres_host"] = "postgres"
config["postgres_port"] = 5432
else:
config["postgres_host"] = ask_string("PostgreSQL host")
config["postgres_port"] = ask_int("PostgreSQL port", default=5432)
config["postgres_user"] = ask_string("PostgreSQL username")
config["postgres_password"] = ask_string("PostgreSQL password")
config["postgres_db"] = ask_string(
"PostgreSQL database name for hub-api", default="kohakuhub"
)
# LakeFS database configuration
print()
print("--- LakeFS Database Configuration ---")
config["lakefs_use_postgres"] = ask_yes_no(
"Use PostgreSQL for LakeFS? (No = use local SQLite)", default=True
)
if config["lakefs_use_postgres"]:
config["lakefs_db"] = ask_string(
"PostgreSQL database name for LakeFS", default="lakefs"
)
else:
config["lakefs_db"] = None
print()
# S3 Configuration
print("--- S3 Storage Configuration ---")
config["s3_builtin"] = ask_yes_no("Use built-in S3 container?", default=True)
if config["s3_builtin"]:
print()
print("Available S3 providers:")
print(" 1. MinIO (default, works out of box, has unresolved CVEs)")
print(" 2. Garage (lightweight, no CVEs, requires manual setup)")
while True:
choice = input("Choose S3 provider [1]: ").strip()
if not choice or choice == "1":
config["s3_provider"] = "minio"
break
elif choice == "2":
config["s3_provider"] = "garage"
break
else:
print("Please choose 1 or 2")
# Generate secure random credentials
default_access_key = generate_secret(24) # 32 chars
default_secret_key = generate_secret(48) # 64 chars
provider_name = config["s3_provider"].title()
print(f"\nGenerated {provider_name} access key: {default_access_key}")
print(f"Generated {provider_name} secret key: {default_secret_key}")
use_generated = ask_yes_no(
f"Use generated {provider_name} credentials?", default=True
)
if use_generated:
config["s3_access_key"] = default_access_key
config["s3_secret_key"] = default_secret_key
else:
config["s3_access_key"] = ask_string(f"{provider_name} access key")
config["s3_secret_key"] = ask_string(f"{provider_name} secret key")
# Set provider-specific defaults
if config["s3_provider"] == "garage":
config["s3_endpoint"] = "http://garage:3900"
config["s3_region"] = "garage"
config["s3_signature_version"] = "s3v4" # Garage requires s3v4
else: # minio
config["s3_endpoint"] = "http://minio:9000"
config["s3_region"] = "us-east-1"
config["s3_signature_version"] = "" # MinIO uses default (don't set)
else:
config["s3_provider"] = "external"
config["s3_endpoint"] = ask_string("S3 endpoint URL")
config["s3_access_key"] = ask_string("S3 access key")
config["s3_secret_key"] = ask_string("S3 secret key")
config["s3_region"] = ask_string("S3 region", default="us-east-1")
# Ask about signature version for external S3
print()
print("Signature version:")
print(" - (empty): Use default (for MinIO compatibility)")
print(" - s3v4: Cloudflare R2, AWS S3, Garage (recommended for R2/AWS/Garage)")
sig_input = ask_string(
"S3 signature version (s3v4 or leave empty)", default="s3v4"
)
config["s3_signature_version"] = (
sig_input if sig_input.lower() != "none" else ""
)
print()
# Security Configuration
print("--- Security Configuration ---")
default_session_secret = generate_secret(48) # 64 chars for session encryption
print(f"Generated session secret: {default_session_secret}")
use_generated = ask_yes_no("Use generated session secret?", default=True)
if use_generated:
config["session_secret"] = default_session_secret
else:
config["session_secret"] = ask_string("Session secret key")
print()
same_as_session = ask_yes_no("Use same secret for admin token?", default=False)
if same_as_session:
config["admin_secret"] = config["session_secret"]
else:
default_admin_secret = generate_secret(48) # 64 chars for admin token
print(f"Generated admin secret: {default_admin_secret}")
use_generated_admin = ask_yes_no("Use generated admin secret?", default=True)
if use_generated_admin:
config["admin_secret"] = default_admin_secret
else:
config["admin_secret"] = ask_string("Admin secret token")
# Database encryption key (for external tokens)
print()
default_database_key = generate_secret(32) # 43 chars for Fernet encryption
print(f"Generated database encryption key: {default_database_key}")
use_generated_db = ask_yes_no("Use generated database key?", default=True)
if use_generated_db:
config["database_key"] = default_database_key
else:
config["database_key"] = ask_string("Database encryption key")
# LakeFS encryption key
config["lakefs_encrypt_key"] = generate_secret(32) # 43 chars
# Garage secrets (if using Garage)
config["garage_rpc_secret"] = secrets.token_hex(32) # 64 hex chars
config["garage_admin_token"] = generate_secret(32)
config["garage_metrics_token"] = generate_secret(32)
# Network configuration
print()
print("--- Network Configuration ---")
use_external_network = False
if not config["postgres_builtin"] or not config["s3_builtin"]:
use_external_network = ask_yes_no(
"Use external Docker network for cross-compose communication?",
default=False,
)
if use_external_network:
config["external_network"] = ask_string(
"External network name", default="shared-network"
)
print()
print(f"Note: Make sure the network exists:")
print(f" docker network create {config['external_network']}")
else:
config["external_network"] = ""
return config
def generate_config_toml(config: dict) -> str:
"""Generate config.toml for local dev server."""
# Adapt endpoints for localhost dev server
if config["postgres_builtin"]:
db_url = f"postgresql://{config['postgres_user']}:{config['postgres_password']}@localhost:25432/{config['postgres_db']}"
else:
db_url = f"postgresql://{config['postgres_user']}:{config['postgres_password']}@{config['postgres_host']}:{config['postgres_port']}/{config['postgres_db']}"
# S3 configuration for dev
if config["s3_builtin"]:
if config.get("s3_provider") == "garage":
s3_endpoint_internal = "http://localhost:39000"
s3_endpoint_public = "http://localhost:39000"
s3_region = "garage"
else: # minio
s3_endpoint_internal = "http://localhost:29001"
s3_endpoint_public = "http://localhost:29001"
s3_region = "us-east-1"
else:
s3_endpoint_internal = config["s3_endpoint"]
s3_endpoint_public = config["s3_endpoint"]
s3_region = config.get("s3_region", "us-east-1")
toml_content = f"""# KohakuHub Configuration File (TOML)
# Generated by KohakuHub docker-compose generator
# Use this for local development server
[s3]
endpoint = "{s3_endpoint_internal}"
public_endpoint = "{s3_endpoint_public}"
access_key = "{config["s3_access_key"]}"
secret_key = "{config["s3_secret_key"]}"
bucket = "hub-storage"
region = "{s3_region}"
force_path_style = true
"""
# Add signature_version if set (required for Garage, R2, AWS S3)
if config.get("s3_signature_version"):
toml_content += f'signature_version = "{config["s3_signature_version"]}"\n'
else:
# Explicitly omit for MinIO (uses s3v2 by default)
toml_content += "# signature_version not set (MinIO uses s3v2 by default)\n"
toml_content += f"""
[lakefs]
endpoint = "http://localhost:28000"
repo_namespace = "hf"
# Credentials auto-generated on first start
[smtp]
enabled = false
host = "smtp.gmail.com"
port = 587
username = ""
password = ""
from_email = "noreply@kohakuhub.local"
use_tls = true
[auth]
require_email_verification = false
invitation_only = false
session_secret = "{config["session_secret"]}"
session_expire_hours = 168 # 7 days
token_expire_days = 365
[admin]
enabled = true
secret_token = "{config["admin_secret"]}"
[quota]
default_user_private_quota_bytes = 10_000_000 # 10MB
default_user_public_quota_bytes = 100_000_000 # 100MB
default_org_private_quota_bytes = 10_000_000 # 10MB
default_org_public_quota_bytes = 100_000_000 # 100MB
[fallback]
enabled = true
cache_ttl_seconds = 300
timeout_seconds = 10
max_concurrent_requests = 5
require_auth = false # Set true to require authentication for fallback access
[app]
base_url = "http://localhost:48888" # Dev server URL
api_base = "/api"
db_backend = "postgres"
database_url = "{db_url}"
database_key = "{config["database_key"]}" # For encrypting external fallback tokens
# LFS Configuration (sizes in decimal: 1MB = 1,000,000 bytes)
lfs_threshold_bytes = 5_000_000 # 5MB - files larger use LFS
lfs_multipart_threshold_bytes = 100_000_000 # 100MB - files larger use multipart upload
lfs_multipart_chunk_size_bytes = 50_000_000 # 50MB - size of each part (min 5MB except last)
lfs_keep_versions = 5 # Keep last K versions of each LFS file
lfs_auto_gc = true # Automatically delete old LFS objects on commit
# Download tracking settings
download_time_bucket_seconds = 900 # 15 minutes - session deduplication window
download_session_cleanup_threshold = 100 # Trigger cleanup when sessions > this
download_keep_sessions_days = 30 # Keep sessions from last N days
debug_log_payloads = false
site_name = "KohakuHub"
"""
return toml_content
def generate_and_write_files(config: dict):
"""Generate and write docker-compose.yml and related files."""
print()
print("=" * 60)
print("Generating docker-compose.yml and config.toml...")
print("=" * 60)
# Generate docker-compose content
compose_content = generate_docker_compose(config)
# Write docker-compose.yml
compose_path = Path(__file__).parent.parent / "docker-compose.yml"
compose_path.write_text(compose_content, encoding="utf-8")
print()
print(f"[OK] Successfully generated: {compose_path}")
# Generate and write config.toml
config_content = generate_config_toml(config)
config_path = Path(__file__).parent.parent / "config.toml"
config_path.write_text(config_content, encoding="utf-8")
print(f"[OK] Successfully generated: {config_path}")
if config["lakefs_use_postgres"]:
print(
"[OK] Database initialization scripts will run automatically when LakeFS starts"
)
print(" - scripts/init-databases.sh")
print(" - scripts/lakefs-entrypoint.sh")
print()
print("Configuration Summary:")
print("-" * 60)
print(f"PostgreSQL: {'Built-in' if config['postgres_builtin'] else 'Custom'}")
if config["postgres_builtin"]:
print(f" Hub-API Database: {config['postgres_db']}")
if config["lakefs_use_postgres"]:
print(f" LakeFS Database: {config['lakefs_db']}")
else:
print(f" Host: {config['postgres_host']}:{config['postgres_port']}")
print(f" Hub-API Database: {config['postgres_db']}")
if config["lakefs_use_postgres"]:
print(f" LakeFS Database: {config['lakefs_db']}")
print(
f"LakeFS Database Backend: {'PostgreSQL' if config['lakefs_use_postgres'] else 'SQLite'}"
)
if config["s3_builtin"]:
provider_name = config.get("s3_provider", "minio").title()
print(f"S3 Storage: Built-in {provider_name}")
else:
print(f"S3 Storage: Custom S3")
print(f" Endpoint: {config['s3_endpoint']}")
if config.get("external_network"):
print(f"External Network: {config['external_network']}")
print(f"Session Secret: {config['session_secret'][:20]}...")
print(f"Admin Secret: {config['admin_secret'][:20]}...")
print("-" * 60)
print()
print("Next steps:")
step_num = 1
if config.get("external_network"):
print(f"{step_num}. Create external network if not exists:")
print(f" docker network create {config['external_network']}")
step_num += 1
print()
print(f"{step_num}. Review the generated files:")
print(" - docker-compose.yml (for Docker deployment)")
print(" - config.toml (for local dev server)")
step_num += 1
print()
print("For Docker deployment:")
print(f"{step_num}. Build frontend: npm run build --prefix ./src/kohaku-hub-ui")
step_num += 1
print(f"{step_num}. Start services: docker-compose up -d")
print()
if config["lakefs_use_postgres"]:
print(" Note: Databases will be created automatically on first startup:")
print(f" - {config['postgres_db']} (hub-api)")
print(f" - {config['lakefs_db']} (LakeFS)")
print()
step_num += 1
print(f"{step_num}. Access at: http://localhost:28080")
print()
print("For local dev server:")
print(
f"{step_num}. Start infrastructure: docker-compose up -d postgres minio lakefs"
)
step_num += 1
print(
f"{step_num}. Run dev server: uvicorn kohakuhub.main:app --reload --port 48888"
)
step_num += 1
print(f"{step_num}. Access at: http://localhost:48888")
print()
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\nAborted by user")
sys.exit(1)
except Exception as e:
print(f"\n\nError: {e}")
sys.exit(1)