Add Firecrawl Docker Compose Template -#422 (#423)

* fix: run api and workers in separate containers to resolve port conflict  Previously, the API and worker services ran together and tried to bind to the same port, causing repeated EADDRINUSE errors and container crashes.  This update splits the API, queue worker, extract worker, and nuq workers into individual service containers, each with a unique port and process. Fixes API not starting, stabilizes the deployment, and enables concurrent service operation.

* updated the meta.json for the build issue

* updated the meta.json for the logo path

* Update blueprints/firecrawl/docker-compose.yml

---------

Co-authored-by: Mauricio Siu <47042324+Siumauricio@users.noreply.github.com>
This commit is contained in:
Harikrishnan Dhanasekaran
2025-10-05 09:33:48 +05:30
committed by GitHub
parent eb2f470443
commit 1e75877fb9
4 changed files with 225 additions and 0 deletions

View File

@@ -0,0 +1,138 @@
name: firecrawl
x-common-service: &common-service
image: ghcr.io/firecrawl/firecrawl:latest
ulimits:
nofile:
soft: 65535
hard: 65535
extra_hosts:
- "host.docker.internal:host-gateway"
x-common-env: &common-env
REDIS_URL: ${REDIS_URL:-redis://redis:6379}
REDIS_RATE_LIMIT_URL: ${REDIS_RATE_LIMIT_URL:-redis://redis:6379}
PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape}
NUQ_DATABASE_URL: ${NUQ_DATABASE_URL:-postgres://postgres:postgres@nuq-postgres:5432/postgres}
USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION:-}
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
OPENAI_BASE_URL: ${OPENAI_BASE_URL:-}
MODEL_NAME: ${MODEL_NAME:-}
MODEL_EMBEDDING_NAME: ${MODEL_EMBEDDING_NAME:-}
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
SLACK_WEBHOOK_URL: ${SLACK_WEBHOOK_URL:-}
BULL_AUTH_KEY: ${BULL_AUTH_KEY:-}
TEST_API_KEY: ${TEST_API_KEY:-}
POSTHOG_API_KEY: ${POSTHOG_API_KEY:-}
POSTHOG_HOST: ${POSTHOG_HOST:-}
SUPABASE_ANON_TOKEN: ${SUPABASE_ANON_TOKEN:-}
SUPABASE_URL: ${SUPABASE_URL:-}
SUPABASE_SERVICE_TOKEN: ${SUPABASE_SERVICE_TOKEN:-}
SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL:-}
SERPER_API_KEY: ${SERPER_API_KEY:-}
SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY:-}
LOGGING_LEVEL: ${LOGGING_LEVEL:-INFO}
PROXY_SERVER: ${PROXY_SERVER:-}
PROXY_USERNAME: ${PROXY_USERNAME:-}
PROXY_PASSWORD: ${PROXY_PASSWORD:-}
NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,redis,nuq-postgres,playwright-service,host.docker.internal}
SEARXNG_ENDPOINT: ${SEARXNG_ENDPOINT:-}
SEARXNG_ENGINES: ${SEARXNG_ENGINES:-}
SEARXNG_CATEGORIES: ${SEARXNG_CATEGORIES:-}
services:
playwright-service:
image: ghcr.io/firecrawl/playwright-service:latest
shm_size: "1g"
restart: unless-stopped
environment:
PORT: 3000
PROXY_SERVER: ${PROXY_SERVER:-}
PROXY_USERNAME: ${PROXY_USERNAME:-}
PROXY_PASSWORD: ${PROXY_PASSWORD:-}
BLOCK_MEDIA: ${BLOCK_MEDIA:-}
NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,redis,nuq-postgres,playwright-service,host.docker.internal}
api:
<<: *common-service
restart: unless-stopped
ports:
- "3002"
environment:
<<: *common-env
HOST: "0.0.0.0"
PORT: 3002
WORKER_PORT: 3005
ENV: local
depends_on:
redis:
condition: service_started
playwright-service:
condition: service_started
nuq-postgres:
condition: service_healthy
command: node --import ./dist/src/otel.js dist/src/index.js
worker:
<<: *common-service
restart: unless-stopped
environment:
<<: *common-env
HOST: "0.0.0.0"
PORT: 3005
ENV: local
depends_on:
redis:
condition: service_started
nuq-postgres:
condition: service_healthy
command: node --import ./dist/src/otel.js dist/src/services/queue-worker.js
extract-worker:
<<: *common-service
restart: unless-stopped
environment:
<<: *common-env
HOST: "0.0.0.0"
PORT: 3004
ENV: local
depends_on:
redis:
condition: service_started
nuq-postgres:
condition: service_healthy
command: node --import ./dist/src/otel.js dist/src/services/extract-worker.js
redis:
image: redis:alpine
command: redis-server --bind 0.0.0.0
nuq-postgres:
build:
context: "https://github.com/firecrawl/firecrawl.git#main:apps/nuq-postgres"
dockerfile: Dockerfile
restart: unless-stopped
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: postgres
volumes:
- nuq_pg_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"]
start_period: 30s
interval: 10s
timeout: 5s
retries: 10
networks:
- backend
- dokploy-network
networks:
backend:
driver: bridge
dokploy-network:
external: true
volumes:
nuq_pg_data:

View File

@@ -0,0 +1,3 @@
<svg width="50" height="72" viewBox="0 0 50 72" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M41.7154 23.1929C38.9531 24.0129 36.8707 25.8677 35.3457 27.8826C35.0183 28.3151 34.3358 27.9901 34.4658 27.4601C37.3856 15.4534 33.5283 5.47401 21.5039 0.561817C20.894 0.311833 20.259 0.859299 20.419 1.49926C25.8887 23.4604 2.88236 21.608 5.78971 46.504C5.83971 46.9314 5.35973 47.2239 5.00975 46.9739C3.9198 46.1915 2.70237 44.5591 1.86741 43.4116C1.62242 43.0742 1.09245 43.1692 0.979951 43.5716C0.314984 45.9765 0 48.2413 0 50.4912C0 59.2407 4.49727 66.9427 11.3044 71.4074C11.6944 71.6624 12.1944 71.2974 12.0619 70.8499C11.7119 69.675 11.5144 68.4351 11.4994 67.1527C11.4994 66.3652 11.5494 65.5603 11.6719 64.8103C11.9569 62.9254 12.6119 61.1305 13.7118 59.4957C17.4841 53.8335 25.0462 48.3638 23.8388 40.9368C23.7613 40.4668 24.3163 40.1569 24.6663 40.4793C29.9935 45.3465 31.0485 51.8936 30.1735 57.7658C30.0985 58.2757 30.7385 58.5482 31.061 58.1482C31.8759 57.1283 32.8709 56.2334 33.9533 55.5609C34.2233 55.3934 34.5833 55.5209 34.6858 55.8209C35.2882 57.5733 36.1832 59.2182 37.0281 60.8631C38.0381 62.8404 38.5756 65.0978 38.4906 67.4877C38.4481 68.6501 38.2556 69.775 37.9331 70.8449C37.7956 71.2974 38.2906 71.6749 38.6881 71.4149C45.5002 66.9502 50 59.2482 50 50.4937C50 47.4514 49.4675 44.4691 48.4601 41.6743C46.3477 35.8121 40.988 31.4099 42.3429 23.7704C42.4079 23.4054 42.0704 23.0879 41.7154 23.1929Z" fill="#FA5D19" style="fill:#FA5D19;fill:color(display-p3 0.9816 0.3634 0.0984);fill-opacity:1;"/>
</svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

@@ -0,0 +1,65 @@
[variables]
main_domain = "${domain}"
openai_api_key = "${OPENAI_API_KEY}"
openai_base_url = "${OPENAI_BASE_URL}"
ollama_base_url = "${OLLAMA_BASE_URL}"
model_name = "${MODEL_NAME}"
model_embedding_name = "${MODEL_EMBEDDING_NAME}"
proxy_server = "${PROXY_SERVER}"
proxy_username = "${PROXY_USERNAME}"
proxy_password = "${PROXY_PASSWORD}"
searxng_endpoint = "${SEARXNG_ENDPOINT}"
searxng_engines = "${SEARXNG_ENGINES}"
searxng_categories = "${SEARXNG_CATEGORIES}"
supabase_anon_token = "${SUPABASE_ANON_TOKEN}"
supabase_url = "${SUPABASE_URL}"
supabase_service_token = "${SUPABASE_SERVICE_TOKEN}"
test_api_key = "${TEST_API_KEY}"
bull_auth_key = "${password:32}"
llamaparse_api_key = "${LLAMAPARSE_API_KEY}"
slack_webhook_url = "${SLACK_WEBHOOK_URL}"
posthog_api_key = "${POSTHOG_API_KEY}"
posthog_host = "${POSTHOG_HOST}"
max_cpu = "${MAX_CPU}"
max_ram = "${MAX_RAM}"
[config]
env = [
"PORT=3002",
"HOST=0.0.0.0",
"USE_DB_AUTHENTICATION=false",
"BULL_AUTH_KEY=${bull_auth_key}",
"PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape",
"REDIS_URL=redis://redis:6379",
"REDIS_RATE_LIMIT_URL=redis://redis:6379",
"OPENAI_API_KEY=${openai_api_key}",
"OPENAI_BASE_URL=${openai_base_url}",
"OLLAMA_BASE_URL=${ollama_base_url}",
"MODEL_NAME=${model_name}",
"MODEL_EMBEDDING_NAME=${model_embedding_name}",
"PROXY_SERVER=${proxy_server}",
"PROXY_USERNAME=${proxy_username}",
"PROXY_PASSWORD=${proxy_password}",
"SEARXNG_ENDPOINT=${searxng_endpoint}",
"SEARXNG_ENGINES=${searxng_engines}",
"SEARXNG_CATEGORIES=${searxng_categories}",
"SUPABASE_ANON_TOKEN=${supabase_anon_token}",
"SUPABASE_URL=${supabase_url}",
"SUPABASE_SERVICE_TOKEN=${supabase_service_token}",
"TEST_API_KEY=${test_api_key}",
"LLAMAPARSE_API_KEY=${llamaparse_api_key}",
"SLACK_WEBHOOK_URL=${slack_webhook_url}",
"POSTHOG_API_KEY=${posthog_api_key}",
"POSTHOG_HOST=${posthog_host}",
"MAX_CPU=0.8",
"MAX_RAM=0.8"
]
mounts = []
[[config.domains]]
serviceName = "api"
port = 3002
host = "${main_domain}"
path = "/"

View File

@@ -1985,6 +1985,25 @@
"self-hosted"
]
},
{
"id": "firecrawl",
"name": "Firecrawl",
"version": "latest",
"description": "Firecrawl is an API service that takes a URL, crawls it, and converts it into clean markdown or structured data. It can crawl all accessible subpages and provide clean data for each.",
"logo": "firecrawl.svg",
"links": {
"github": "https://github.com/firecrawl/firecrawl",
"website": "https://firecrawl.dev",
"docs": "https://github.com/firecrawl/firecrawl"
},
"tags": [
"api",
"crawler",
"scraping",
"data-extraction",
"llm"
]
},
{
"id": "fivem",
"name": "FiveM Server",