From d4dbe8d0fe291e1f4449b598991ee6116cca0585 Mon Sep 17 00:00:00 2001 From: harmsolo13 Date: Wed, 29 Apr 2026 18:13:26 +0930 Subject: [PATCH] fix: replace celery container's inherited curl-based healthcheck MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The celery container builds from flowsint-api/Dockerfile which carries a HEALTHCHECK directive that does `curl -f http://localhost:5001/health`. The API container has an HTTP server on 5001 — celery doesn't, it's a worker. So the inherited healthcheck always fails and `docker ps` shows celery as (unhealthy) even when the worker is actively processing jobs. This is cosmetic noise today but bites in two real ways: (1) restart policies that key off health won't re-up celery on a real failure because Docker can't tell good unhealthy from bad unhealthy, (2) any service that adds `depends_on: celery: condition: service_healthy` will refuse to start. Fix: add a service-level healthcheck on celery in both compose files (prod and dev) that uses celery's own `inspect ping` primitive against the worker's broker. Compose-level healthcheck overrides the Dockerfile-level one, so no Dockerfile change needed. Smoke-tested locally: container goes from (unhealthy) to (healthy) within ~30s of restart with no other changes. Co-Authored-By: Claude Opus 4.7 (1M context) --- docker-compose.dev.yml | 8 ++++++++ docker-compose.prod.yml | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index c8026c7..c158303 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -123,6 +123,14 @@ services: - REDIS_URL=redis://redis:6379/0 - SKIP_MIGRATIONS=true - AUTH_SECRET=${AUTH_SECRET} + healthcheck: + # Celery has no HTTP server — Dockerfile's curl-based healthcheck always fails. + # Use celery's own ping primitive instead. + test: ["CMD-SHELL", "celery -A flowsint_core.core.celery inspect ping -d celery@$$HOSTNAME || exit 1"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s depends_on: postgres: condition: service_healthy diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 62e12cc..3128c5e 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -123,6 +123,14 @@ services: - REDIS_URL=redis://redis:6379/0 - SKIP_MIGRATIONS=true - AUTH_SECRET=${AUTH_SECRET} + healthcheck: + # Celery has no HTTP server — Dockerfile's curl-based healthcheck always fails. + # Use celery's own ping primitive instead. + test: ["CMD-SHELL", "celery -A flowsint_core.core.celery inspect ping -d celery@$$HOSTNAME || exit 1"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s depends_on: postgres: condition: service_healthy