Files
KohakuHub/scripts/generate_docker_compose.py
2025-10-20 12:04:49 +08:00

928 lines
32 KiB
Python

#!/usr/bin/env python3
"""
Docker Compose Generator for KohakuHub
This script generates a docker-compose.yml file based on user preferences.
Can read configuration from kohakuhub.conf file for automation.
"""
import argparse
import configparser
import os
import secrets
import sys
from pathlib import Path
def generate_secret(length: int = 32) -> str:
"""Generate a random URL-safe secret key.
Args:
length: Number of random bytes (result will be ~1.33x longer due to base64 encoding)
Common values: 32 (→43 chars), 48 (→64 chars)
Returns:
URL-safe base64 encoded string
"""
return secrets.token_urlsafe(length)
def ask_yes_no(prompt: str, default: bool = True) -> bool:
"""Ask a yes/no question."""
default_str = "Y/n" if default else "y/N"
while True:
response = input(f"{prompt} [{default_str}]: ").strip().lower()
if not response:
return default
if response in ["y", "yes"]:
return True
if response in ["n", "no"]:
return False
print("Please answer 'y' or 'n'")
def ask_string(prompt: str, default: str = "") -> str:
"""Ask for a string input."""
if default:
response = input(f"{prompt} [{default}]: ").strip()
return response if response else default
else:
while True:
response = input(f"{prompt}: ").strip()
if response:
return response
print("This field is required")
def ask_int(prompt: str, default: int) -> int:
"""Ask for an integer input."""
while True:
response = input(f"{prompt} [{default}]: ").strip()
if not response:
return default
try:
return int(response)
except ValueError:
print("Please enter a valid number")
def generate_postgres_service(config: dict) -> str:
"""Generate PostgreSQL service configuration."""
if config["postgres_builtin"]:
return f""" postgres:
image: postgres:15
container_name: postgres
restart: always
environment:
- POSTGRES_USER={config['postgres_user']}
- POSTGRES_PASSWORD={config['postgres_password']}
- POSTGRES_DB={config['postgres_db']}
ports:
- "25432:5432" # Optional: for external access
volumes:
- ./hub-meta/postgres-data:/var/lib/postgresql/data
"""
return ""
def generate_minio_service(config: dict) -> str:
"""Generate MinIO service configuration."""
if config["s3_builtin"]:
return f""" minio:
image: quay.io/minio/minio:latest
container_name: minio
command: server /data --console-address ":29000"
environment:
- MINIO_ROOT_USER={config['s3_access_key']}
- MINIO_ROOT_PASSWORD={config['s3_secret_key']}
ports:
- "29001:9000" # S3 API
- "29000:29000" # Web Console
volumes:
- ./hub-storage/minio-data:/data
- ./hub-meta/minio-data:/root/.minio
"""
return ""
def generate_lakefs_service(config: dict) -> str:
"""Generate LakeFS service configuration."""
depends_on = []
if config["s3_builtin"]:
depends_on.append("minio")
if config["postgres_builtin"] and config["lakefs_use_postgres"]:
depends_on.append("postgres")
depends_on_str = ""
if depends_on:
depends_on_str = " depends_on:\n"
for dep in depends_on:
depends_on_str += f" - {dep}\n"
# LakeFS database configuration
if config["lakefs_use_postgres"]:
if config["postgres_builtin"]:
lakefs_db_config = f""" - LAKEFS_DATABASE_TYPE=postgres
- LAKEFS_DATABASE_POSTGRES_CONNECTION_STRING=postgres://{config['postgres_user']}:{config['postgres_password']}@postgres:5432/{config['lakefs_db']}?sslmode=disable"""
# Add environment variables for init script
init_env_vars = f""" - POSTGRES_HOST=postgres
- POSTGRES_PORT=5432
- POSTGRES_USER={config['postgres_user']}
- POSTGRES_PASSWORD={config['postgres_password']}
- POSTGRES_DB={config['postgres_db']}
- LAKEFS_DB={config['lakefs_db']}"""
else:
lakefs_db_config = f""" - LAKEFS_DATABASE_TYPE=postgres
- LAKEFS_DATABASE_POSTGRES_CONNECTION_STRING=postgres://{config['postgres_user']}:{config['postgres_password']}@{config['postgres_host']}:{config['postgres_port']}/{config['lakefs_db']}?sslmode=disable"""
# Add environment variables for init script
init_env_vars = f""" - POSTGRES_HOST={config['postgres_host']}
- POSTGRES_PORT={config['postgres_port']}
- POSTGRES_USER={config['postgres_user']}
- POSTGRES_PASSWORD={config['postgres_password']}
- POSTGRES_DB={config['postgres_db']}
- LAKEFS_DB={config['lakefs_db']}"""
else:
lakefs_db_config = """ - LAKEFS_DATABASE_TYPE=local
- LAKEFS_DATABASE_LOCAL_PATH=/var/lakefs/data/metadata.db"""
init_env_vars = ""
# S3 blockstore configuration
if config["s3_builtin"]:
s3_endpoint = "http://minio:9000"
force_path_style = "true"
s3_region = "us-east-1" # MinIO works with us-east-1
else:
s3_endpoint = config["s3_endpoint"]
# Use path-style for all non-AWS endpoints (MinIO, CloudFlare R2, custom S3)
# Only AWS S3 (*.amazonaws.com) should use virtual-hosted style
force_path_style = "false" if "amazonaws.com" in s3_endpoint.lower() else "true"
s3_region = config.get("s3_region", "us-east-1")
# Add entrypoint and volumes for database initialization
entrypoint_config = ""
volumes_config = """ - ./hub-meta/lakefs-data:/var/lakefs/data
- ./hub-meta/lakefs-cache:/lakefs/data/cache"""
if config["lakefs_use_postgres"]:
entrypoint_config = """ entrypoint: ["/bin/sh", "/scripts/lakefs-entrypoint.sh"]
command: ["run"]"""
volumes_config += """
- ./scripts/lakefs-entrypoint.sh:/scripts/lakefs-entrypoint.sh:ro
- ./scripts/init-databases.sh:/scripts/init-databases.sh:ro"""
# Add external network if needed (for external postgres or s3)
lakefs_networks_str = ""
if config.get("external_network") and (
not config["postgres_builtin"] or not config["s3_builtin"]
):
lakefs_networks_str = f""" networks:
- default
- {config['external_network']}
"""
return f""" lakefs:
image: treeverse/lakefs:latest
container_name: lakefs
{entrypoint_config}
environment:
{lakefs_db_config}
{init_env_vars}
- LAKEFS_BLOCKSTORE_TYPE=s3
- LAKEFS_BLOCKSTORE_S3_ENDPOINT={s3_endpoint}
- LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE={force_path_style}
- LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID={config['s3_access_key']}
- LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY={config['s3_secret_key']}
- LAKEFS_BLOCKSTORE_S3_REGION={s3_region}
- LAKEFS_AUTH_ENCRYPT_SECRET_KEY={config['lakefs_encrypt_key']}
- LAKEFS_LOGGING_FORMAT=text
- LAKEFS_LISTEN_ADDRESS=0.0.0.0:28000
ports:
- "28000:28000" # LakeFS admin UI (optional)
user: "${{UID}}:${{GID}}"
{depends_on_str} volumes:
{volumes_config}
{lakefs_networks_str}"""
def generate_hub_api_service(config: dict) -> str:
"""Generate hub-api service configuration."""
depends_on = ["lakefs"]
if config["postgres_builtin"]:
depends_on.insert(0, "postgres")
if config["s3_builtin"]:
depends_on.append("minio")
depends_on_str = " depends_on:\n"
for dep in depends_on:
depends_on_str += f" - {dep}\n"
# Add external network if needed (for external postgres or s3)
networks_str = ""
if config.get("external_network") and (
not config["postgres_builtin"] or not config["s3_builtin"]
):
networks_str = f""" networks:
- default
- {config['external_network']}
"""
# Database configuration
if config["postgres_builtin"]:
db_url = f"postgresql://{config['postgres_user']}:{config['postgres_password']}@postgres:5432/{config['postgres_db']}"
else:
db_url = f"postgresql://{config['postgres_user']}:{config['postgres_password']}@{config['postgres_host']}:{config['postgres_port']}/{config['postgres_db']}"
# S3 configuration
if config["s3_builtin"]:
s3_endpoint_internal = "http://minio:9000"
s3_endpoint_public = "http://127.0.0.1:29001"
s3_region = "us-east-1" # MinIO works with us-east-1
# MinIO: Don't set signature_version (uses default/s3v2-compatible)
s3_sig_version_line = " # - KOHAKU_HUB_S3_SIGNATURE_VERSION=s3v4 # Uncomment for R2/AWS S3 (leave commented for MinIO)"
else:
s3_endpoint_internal = config["s3_endpoint"]
s3_endpoint_public = config["s3_endpoint"]
s3_region = config.get("s3_region", "us-east-1")
# External S3: Use configured value or default to s3v4
s3_sig_version = config.get("s3_signature_version", "s3v4")
if s3_sig_version:
s3_sig_version_line = (
f" - KOHAKU_HUB_S3_SIGNATURE_VERSION={s3_sig_version}"
)
else:
s3_sig_version_line = (
" # - KOHAKU_HUB_S3_SIGNATURE_VERSION=s3v4 # Uncomment if needed"
)
return f""" hub-api:
build: .
container_name: hub-api
restart: always
ports:
- "48888:48888" # Internal API port (optional, for debugging)
{depends_on_str} environment:
## ===== CRITICAL: Endpoint Configuration (MUST CHANGE) =====
## These determine how users access your KohakuHub instance
- KOHAKU_HUB_BASE_URL=http://127.0.0.1:28080 # Change to your public URL (e.g., https://hub.example.com)
- KOHAKU_HUB_S3_PUBLIC_ENDPOINT={s3_endpoint_public} # Change to your S3 public URL
## ===== CRITICAL: Security Configuration (MUST CHANGE) =====
- KOHAKU_HUB_SESSION_SECRET={config['session_secret']}
- KOHAKU_HUB_ADMIN_SECRET_TOKEN={config['admin_secret']}
## ===== Performance Configuration =====
- KOHAKU_HUB_WORKERS=4 # Number of worker processes (1-8, recommend: CPU cores)
## ===== Database Configuration =====
- KOHAKU_HUB_DB_BACKEND=postgres
- KOHAKU_HUB_DATABASE_URL={db_url}
## ===== S3 Storage Configuration =====
- KOHAKU_HUB_S3_ENDPOINT={s3_endpoint_internal}
- KOHAKU_HUB_S3_ACCESS_KEY={config['s3_access_key']}
- KOHAKU_HUB_S3_SECRET_KEY={config['s3_secret_key']}
- KOHAKU_HUB_S3_BUCKET=hub-storage
- KOHAKU_HUB_S3_REGION={s3_region} # auto (recommended), us-east-1, or your AWS region
{s3_sig_version_line}
## ===== LakeFS Configuration =====
- KOHAKU_HUB_LAKEFS_ENDPOINT=http://lakefs:28000
- KOHAKU_HUB_LAKEFS_REPO_NAMESPACE=hf
# LakeFS credentials auto-generated on first start
## ===== Application Configuration =====
- KOHAKU_HUB_SITE_NAME=KohakuHub
- KOHAKU_HUB_LFS_THRESHOLD_BYTES=1000000
- KOHAKU_HUB_LFS_KEEP_VERSIONS=5
- KOHAKU_HUB_LFS_AUTO_GC=true
- KOHAKU_HUB_AUTO_MIGRATE=true # Auto-confirm database migrations (required for Docker)
## ===== Auth & SMTP Configuration =====
- KOHAKU_HUB_REQUIRE_EMAIL_VERIFICATION=false
- KOHAKU_HUB_INVITATION_ONLY=false # Set to true to require invitation for registration
- KOHAKU_HUB_SESSION_EXPIRE_HOURS=168
- KOHAKU_HUB_TOKEN_EXPIRE_DAYS=365
- KOHAKU_HUB_ADMIN_ENABLED=true
# SMTP (Optional - for email verification)
- KOHAKU_HUB_SMTP_ENABLED=false
- KOHAKU_HUB_SMTP_HOST=smtp.gmail.com
- KOHAKU_HUB_SMTP_PORT=587
- KOHAKU_HUB_SMTP_USERNAME=
- KOHAKU_HUB_SMTP_PASSWORD=
- KOHAKU_HUB_SMTP_FROM=noreply@kohakuhub.local
- KOHAKU_HUB_SMTP_TLS=true
## ===== Storage Quota Configuration (Optional) =====
- KOHAKU_HUB_DEFAULT_USER_PRIVATE_QUOTA_BYTES=10_000_000
- KOHAKU_HUB_DEFAULT_USER_PUBLIC_QUOTA_BYTES=100_000_000
- KOHAKU_HUB_DEFAULT_ORG_PRIVATE_QUOTA_BYTES=10_000_000
- KOHAKU_HUB_DEFAULT_ORG_PUBLIC_QUOTA_BYTES=100_000_000
volumes:
- ./hub-meta/hub-api:/hub-api-creds
{networks_str}"""
def generate_hub_ui_service() -> str:
"""Generate hub-ui service configuration."""
return """ hub-ui:
image: nginx:alpine
container_name: hub-ui
restart: always
ports:
- "28080:80" # Public web interface
volumes:
- ./src/kohaku-hub-ui/dist:/usr/share/nginx/html
- ./src/kohaku-hub-admin/dist:/usr/share/nginx/html-admin
- ./docker/nginx/default.conf:/etc/nginx/conf.d/default.conf
depends_on:
- hub-api
"""
def generate_docker_compose(config: dict) -> str:
"""Generate the complete docker-compose.yml content."""
services = []
# Add services in order
services.append(generate_hub_ui_service())
services.append(generate_hub_api_service(config))
if config["s3_builtin"]:
services.append(generate_minio_service(config))
services.append(generate_lakefs_service(config))
if config["postgres_builtin"]:
services.append(generate_postgres_service(config))
content = """# docker-compose.yml - KohakuHub Configuration
# Generated by KohakuHub docker-compose generator
# Customize for your deployment
services:
"""
content += "\n".join(services)
# Network configuration
content += "\nnetworks:\n default:\n name: hub-net\n"
# Add external network if specified
if config.get("external_network"):
content += f""" {config['external_network']}:
external: true
"""
return content
def load_config_file(config_path: Path) -> dict:
"""Load configuration from INI file."""
if not config_path.exists():
print(f"Error: Config file not found: {config_path}")
sys.exit(1)
parser = configparser.ConfigParser()
parser.read(config_path, encoding="utf-8")
config = {}
# PostgreSQL section
if parser.has_section("postgresql"):
pg = parser["postgresql"]
config["postgres_builtin"] = pg.getboolean("builtin", fallback=True)
config["postgres_host"] = pg.get("host", fallback="postgres")
config["postgres_port"] = pg.getint("port", fallback=5432)
config["postgres_user"] = pg.get("user", fallback="hub")
config["postgres_password"] = pg.get("password", fallback="hubpass")
config["postgres_db"] = pg.get("database", fallback="kohakuhub")
else:
config["postgres_builtin"] = True
config["postgres_user"] = "hub"
config["postgres_password"] = "hubpass"
config["postgres_db"] = "kohakuhub"
config["postgres_host"] = "postgres"
config["postgres_port"] = 5432
# LakeFS section
if parser.has_section("lakefs"):
lakefs = parser["lakefs"]
config["lakefs_use_postgres"] = lakefs.getboolean("use_postgres", fallback=True)
config["lakefs_db"] = lakefs.get("database", fallback="lakefs")
config["lakefs_encrypt_key"] = lakefs.get(
"encrypt_key", fallback=generate_secret(32) # 43 chars
)
else:
config["lakefs_use_postgres"] = True
config["lakefs_db"] = "lakefs"
config["lakefs_encrypt_key"] = generate_secret(32) # 43 chars
# S3 section
if parser.has_section("s3"):
s3 = parser["s3"]
config["s3_builtin"] = s3.getboolean("builtin", fallback=True)
config["s3_endpoint"] = s3.get("endpoint", fallback="http://minio:9000")
config["s3_access_key"] = s3.get(
"access_key", fallback=generate_secret(24)
) # 32 chars
config["s3_secret_key"] = s3.get(
"secret_key", fallback=generate_secret(48)
) # 64 chars
config["s3_region"] = s3.get("region", fallback="us-east-1")
config["s3_signature_version"] = s3.get(
"signature_version", fallback="" if config["s3_builtin"] else "s3v4"
) # Empty for MinIO (default), s3v4 for R2/AWS S3
else:
config["s3_builtin"] = True
config["s3_endpoint"] = "http://minio:9000"
config["s3_access_key"] = generate_secret(24) # 32 chars
config["s3_secret_key"] = generate_secret(48) # 64 chars
config["s3_region"] = "us-east-1"
config["s3_signature_version"] = "" # Empty for MinIO (default)
# Security section
if parser.has_section("security"):
sec = parser["security"]
config["session_secret"] = sec.get(
"session_secret", fallback=generate_secret(48)
) # 64 chars
config["admin_secret"] = sec.get(
"admin_secret", fallback=generate_secret(48)
) # 64 chars
else:
config["session_secret"] = generate_secret(48) # 64 chars
config["admin_secret"] = generate_secret(48) # 64 chars
# Network section
if parser.has_section("network"):
net = parser["network"]
config["external_network"] = net.get("external_network", fallback="")
else:
config["external_network"] = ""
return config
def generate_config_template(output_path: Path):
"""Generate a template configuration file."""
template = """# KohakuHub Configuration Template
# Use this file to automate docker-compose.yml generation
# Usage: python scripts/generate_docker_compose.py --config kohakuhub.conf
[postgresql]
# Use built-in PostgreSQL container (true) or external server (false)
builtin = true
# If builtin = false, specify connection details:
# host = your-postgres-host.com
# port = 5432
# PostgreSQL credentials
user = hub
password = hubpass
database = kohakuhub
[lakefs]
# Use PostgreSQL for LakeFS (true) or SQLite (false)
use_postgres = true
# LakeFS database name (separate from hub-api database)
database = lakefs
# LakeFS encryption key (auto-generated if not specified)
# encrypt_key = your-secret-key-here
[s3]
# Use built-in MinIO container (true) or external S3 (false)
builtin = true
# If builtin = false, specify S3 endpoint and credentials:
# endpoint = https://your-s3-endpoint.com
# access_key = your-access-key
# secret_key = your-secret-key
# region = us-east-1 # us-east-1 (default), auto for R2, or specific AWS region
# signature_version = s3v4 # s3v4 for R2/AWS S3, leave empty for MinIO
# If builtin = true, MinIO credentials are auto-generated (recommended)
# You can override by uncommenting and setting custom values:
# access_key = your-custom-access-key
# secret_key = your-custom-secret-key
# region = us-east-1
# signature_version = # Leave empty for MinIO (uses default)
[security]
# Session and admin secrets (auto-generated if not specified)
# session_secret = your-session-secret-here
# admin_secret = your-admin-secret-here
[network]
# External bridge network (optional)
# Use this if PostgreSQL or S3 are in different Docker Compose setups
# Create the network first: docker network create shared-network
# external_network = shared-network
"""
output_path.write_text(template, encoding="utf-8")
print(f"[OK] Generated configuration template: {output_path}")
print()
print("Edit this file with your settings, then run:")
print(f" python scripts/generate_docker_compose.py --config {output_path}")
def main():
"""Main function."""
# Parse command-line arguments
parser = argparse.ArgumentParser(
description="Generate docker-compose.yml for KohakuHub"
)
parser.add_argument(
"--config",
"-c",
type=Path,
help="Path to configuration file (kohakuhub.conf)",
)
parser.add_argument(
"--generate-config",
action="store_true",
help="Generate a template configuration file",
)
args = parser.parse_args()
# Generate template if requested
if args.generate_config:
template_path = Path(__file__).parent.parent / "kohakuhub.conf"
generate_config_template(template_path)
return
print("=" * 60)
print("KohakuHub Docker Compose Generator")
print("=" * 60)
print()
# Load config from file if provided
if args.config:
print(f"Loading configuration from: {args.config}")
print()
config = load_config_file(args.config)
# Show loaded configuration
print("Loaded configuration:")
print(
f" PostgreSQL: {'Built-in' if config['postgres_builtin'] else 'External'}"
)
if not config["postgres_builtin"]:
print(f" Host: {config['postgres_host']}:{config['postgres_port']}")
print(f" Database: {config['postgres_db']}")
print(
f" LakeFS: {'PostgreSQL' if config['lakefs_use_postgres'] else 'SQLite'}"
)
if config["lakefs_use_postgres"]:
print(f" Database: {config['lakefs_db']}")
print(f" S3: {'Built-in MinIO' if config['s3_builtin'] else 'External S3'}")
if not config["s3_builtin"]:
print(f" Endpoint: {config['s3_endpoint']}")
print()
else:
# Interactive mode
config = interactive_config()
# Generate and write files
generate_and_write_files(config)
def interactive_config() -> dict:
"""Run interactive configuration."""
config = {}
# PostgreSQL Configuration
print("--- PostgreSQL Configuration ---")
config["postgres_builtin"] = ask_yes_no(
"Use built-in PostgreSQL container?", default=True
)
if config["postgres_builtin"]:
config["postgres_user"] = ask_string("PostgreSQL username", default="hub")
config["postgres_password"] = ask_string(
"PostgreSQL password", default="hubpass"
)
config["postgres_db"] = ask_string(
"PostgreSQL database name for hub-api", default="kohakuhub"
)
config["postgres_host"] = "postgres"
config["postgres_port"] = 5432
else:
config["postgres_host"] = ask_string("PostgreSQL host")
config["postgres_port"] = ask_int("PostgreSQL port", default=5432)
config["postgres_user"] = ask_string("PostgreSQL username")
config["postgres_password"] = ask_string("PostgreSQL password")
config["postgres_db"] = ask_string(
"PostgreSQL database name for hub-api", default="kohakuhub"
)
# LakeFS database configuration
print()
print("--- LakeFS Database Configuration ---")
config["lakefs_use_postgres"] = ask_yes_no(
"Use PostgreSQL for LakeFS? (No = use local SQLite)", default=True
)
if config["lakefs_use_postgres"]:
config["lakefs_db"] = ask_string(
"PostgreSQL database name for LakeFS", default="lakefs"
)
else:
config["lakefs_db"] = None
print()
# S3 Configuration
print("--- S3 Storage Configuration ---")
config["s3_builtin"] = ask_yes_no("Use built-in MinIO container?", default=True)
if config["s3_builtin"]:
# Generate secure random credentials for MinIO
default_access_key = generate_secret(24) # 32 chars
default_secret_key = generate_secret(48) # 64 chars
print(f"Generated MinIO access key: {default_access_key}")
print(f"Generated MinIO secret key: {default_secret_key}")
use_generated = ask_yes_no("Use generated MinIO credentials?", default=True)
if use_generated:
config["s3_access_key"] = default_access_key
config["s3_secret_key"] = default_secret_key
else:
config["s3_access_key"] = ask_string("MinIO access key")
config["s3_secret_key"] = ask_string("MinIO secret key")
config["s3_endpoint"] = "http://minio:9000"
config["s3_region"] = "us-east-1"
config["s3_signature_version"] = "" # MinIO uses default (don't set)
else:
config["s3_endpoint"] = ask_string("S3 endpoint URL")
config["s3_access_key"] = ask_string("S3 access key")
config["s3_secret_key"] = ask_string("S3 secret key")
config["s3_region"] = ask_string("S3 region", default="us-east-1")
# Ask about signature version for external S3
print()
print("Signature version:")
print(" - (empty): Use default (for MinIO compatibility)")
print(" - s3v4: Cloudflare R2, AWS S3 (recommended for R2/AWS)")
sig_input = ask_string(
"S3 signature version (s3v4 or leave empty)", default="s3v4"
)
config["s3_signature_version"] = (
sig_input if sig_input.lower() != "none" else ""
)
print()
# Security Configuration
print("--- Security Configuration ---")
default_session_secret = generate_secret(48) # 64 chars for session encryption
print(f"Generated session secret: {default_session_secret}")
use_generated = ask_yes_no("Use generated session secret?", default=True)
if use_generated:
config["session_secret"] = default_session_secret
else:
config["session_secret"] = ask_string("Session secret key")
print()
same_as_session = ask_yes_no("Use same secret for admin token?", default=False)
if same_as_session:
config["admin_secret"] = config["session_secret"]
else:
default_admin_secret = generate_secret(48) # 64 chars for admin token
print(f"Generated admin secret: {default_admin_secret}")
use_generated_admin = ask_yes_no("Use generated admin secret?", default=True)
if use_generated_admin:
config["admin_secret"] = default_admin_secret
else:
config["admin_secret"] = ask_string("Admin secret token")
# LakeFS encryption key
config["lakefs_encrypt_key"] = generate_secret(32) # 43 chars
# Network configuration
print()
print("--- Network Configuration ---")
use_external_network = False
if not config["postgres_builtin"] or not config["s3_builtin"]:
use_external_network = ask_yes_no(
"Use external Docker network for cross-compose communication?",
default=False,
)
if use_external_network:
config["external_network"] = ask_string(
"External network name", default="shared-network"
)
print()
print(f"Note: Make sure the network exists:")
print(f" docker network create {config['external_network']}")
else:
config["external_network"] = ""
return config
def generate_config_toml(config: dict) -> str:
"""Generate config.toml for local dev server."""
# Adapt endpoints for localhost dev server
if config["postgres_builtin"]:
db_url = f"postgresql://{config['postgres_user']}:{config['postgres_password']}@localhost:25432/{config['postgres_db']}"
else:
db_url = f"postgresql://{config['postgres_user']}:{config['postgres_password']}@{config['postgres_host']}:{config['postgres_port']}/{config['postgres_db']}"
# S3 configuration for dev
if config["s3_builtin"]:
s3_endpoint_internal = "http://localhost:29001"
s3_endpoint_public = "http://localhost:29001"
s3_region = "us-east-1"
else:
s3_endpoint_internal = config["s3_endpoint"]
s3_endpoint_public = config["s3_endpoint"]
s3_region = config.get("s3_region", "us-east-1")
toml_content = f"""# KohakuHub Configuration File (TOML)
# Generated by KohakuHub docker-compose generator
# Use this for local development server
[app]
base_url = "http://localhost:48888" # Dev server URL
api_base = "/api"
site_name = "KohakuHub"
workers = 1 # Single worker for dev
[database]
backend = "postgres"
url = "{db_url}"
auto_migrate = true # Auto-confirm migrations
[s3]
endpoint = "{s3_endpoint_internal}"
public_endpoint = "{s3_endpoint_public}"
access_key = "{config['s3_access_key']}"
secret_key = "{config['s3_secret_key']}"
bucket = "hub-storage"
region = "{s3_region}"
"""
# Add signature_version only if set (for external S3)
if config.get("s3_signature_version"):
toml_content += f'signature_version = "{config["s3_signature_version"]}"\n'
toml_content += f"""
[lakefs]
endpoint = "http://localhost:28000"
repo_namespace = "hf"
# Credentials auto-generated on first start
[lfs]
threshold_bytes = 1_000_000 # 1MB
keep_versions = 5
auto_gc = true
[auth]
session_secret = "{config['session_secret']}"
session_expire_hours = 168 # 7 days
token_expire_days = 365
require_email_verification = false
invitation_only = false
[admin]
enabled = true
secret_token = "{config['admin_secret']}"
[smtp]
enabled = false
host = "smtp.gmail.com"
port = 587
username = ""
password = ""
from = "noreply@kohakuhub.local"
tls = true
[quota]
default_user_private_bytes = 10_000_000 # 10MB
default_user_public_bytes = 100_000_000 # 100MB
default_org_private_bytes = 10_000_000 # 10MB
default_org_public_bytes = 100_000_000 # 100MB
"""
return toml_content
def generate_and_write_files(config: dict):
"""Generate and write docker-compose.yml and related files."""
print()
print("=" * 60)
print("Generating docker-compose.yml and config.toml...")
print("=" * 60)
# Generate docker-compose content
compose_content = generate_docker_compose(config)
# Write docker-compose.yml
compose_path = Path(__file__).parent.parent / "docker-compose.yml"
compose_path.write_text(compose_content, encoding="utf-8")
print()
print(f"[OK] Successfully generated: {compose_path}")
# Generate and write config.toml
config_content = generate_config_toml(config)
config_path = Path(__file__).parent.parent / "config.toml"
config_path.write_text(config_content, encoding="utf-8")
print(f"[OK] Successfully generated: {config_path}")
if config["lakefs_use_postgres"]:
print(
"[OK] Database initialization scripts will run automatically when LakeFS starts"
)
print(" - scripts/init-databases.sh")
print(" - scripts/lakefs-entrypoint.sh")
print()
print("Configuration Summary:")
print("-" * 60)
print(f"PostgreSQL: {'Built-in' if config['postgres_builtin'] else 'Custom'}")
if config["postgres_builtin"]:
print(f" Hub-API Database: {config['postgres_db']}")
if config["lakefs_use_postgres"]:
print(f" LakeFS Database: {config['lakefs_db']}")
else:
print(f" Host: {config['postgres_host']}:{config['postgres_port']}")
print(f" Hub-API Database: {config['postgres_db']}")
if config["lakefs_use_postgres"]:
print(f" LakeFS Database: {config['lakefs_db']}")
print(
f"LakeFS Database Backend: {'PostgreSQL' if config['lakefs_use_postgres'] else 'SQLite'}"
)
print(f"S3 Storage: {'Built-in MinIO' if config['s3_builtin'] else 'Custom S3'}")
if not config["s3_builtin"]:
print(f" Endpoint: {config['s3_endpoint']}")
if config.get("external_network"):
print(f"External Network: {config['external_network']}")
print(f"Session Secret: {config['session_secret'][:20]}...")
print(f"Admin Secret: {config['admin_secret'][:20]}...")
print("-" * 60)
print()
print("Next steps:")
step_num = 1
if config.get("external_network"):
print(f"{step_num}. Create external network if not exists:")
print(f" docker network create {config['external_network']}")
step_num += 1
print()
print(f"{step_num}. Review the generated files:")
print(" - docker-compose.yml (for Docker deployment)")
print(" - config.toml (for local dev server)")
step_num += 1
print()
print("For Docker deployment:")
print(f"{step_num}. Build frontend: npm run build --prefix ./src/kohaku-hub-ui")
step_num += 1
print(f"{step_num}. Start services: docker-compose up -d")
print()
if config["lakefs_use_postgres"]:
print(" Note: Databases will be created automatically on first startup:")
print(f" - {config['postgres_db']} (hub-api)")
print(f" - {config['lakefs_db']} (LakeFS)")
print()
step_num += 1
print(f"{step_num}. Access at: http://localhost:28080")
print()
print("For local dev server:")
print(
f"{step_num}. Start infrastructure: docker-compose up -d postgres minio lakefs"
)
step_num += 1
print(
f"{step_num}. Run dev server: uvicorn kohakuhub.main:app --reload --port 48888"
)
step_num += 1
print(f"{step_num}. Access at: http://localhost:48888")
print()
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\nAborted by user")
sys.exit(1)
except Exception as e:
print(f"\n\nError: {e}")
sys.exit(1)