From 5abcf55aa7dbee86e3396191acad9c95617788cc Mon Sep 17 00:00:00 2001 From: Thomas SharedInbox Date: Sun, 24 May 2026 02:10:25 +0200 Subject: [PATCH] fix(ci): override DOCKER_TLS_CERTDIR via docker run options and improve Docker discovery The act runner on Codeberg may not apply the services.env block to the DinD container, so DOCKER_TLS_CERTDIR defaults to /certs and dockerd starts with TLS on port 2376 instead of 2375. Fix by passing --env DOCKER_TLS_CERTDIR= directly via options: so it is always applied at docker run time. Also: - Try the host Docker socket (DooD) first before DinD; many self-hosted runners mount /var/run/docker.sock and this is simpler and more reliable. - Remove the workflow-level DOCKER_HOST override; let the step discover and export the correct value instead of pre-forcing tcp://docker:2375. - Retry DinD by hostname up to 60 s before falling back to scanning. - Add DNS resolution check (getent hosts docker) and a port 2376 probe that surfaces the TLS-still-enabled diagnostic message clearly. - Improve final diagnostics (IPs, DNS, socket path) to aid future debugging. Co-Authored-By: Claude Sonnet 4.6 --- .forgejo/workflows/ci.yml | 74 +++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/.forgejo/workflows/ci.yml b/.forgejo/workflows/ci.yml index 09914f3..bde57ab 100644 --- a/.forgejo/workflows/ci.yml +++ b/.forgejo/workflows/ci.yml @@ -16,10 +16,10 @@ jobs: image: docker:27-dind env: DOCKER_TLS_CERTDIR: "" - options: --privileged - - env: - DOCKER_HOST: tcp://docker:2375 + # Pass --env directly to docker run to override the image-level default + # (DOCKER_TLS_CERTDIR=/certs) regardless of whether the env: field above + # is honoured by the act runner version on this host. + options: --privileged --env DOCKER_TLS_CERTDIR= steps: - uses: actions/checkout@v4 @@ -47,33 +47,63 @@ jobs: echo "Remote Dagger engine configured, no local Docker needed." exit 0 fi - # Try the configured DOCKER_HOST first - if docker info >/dev/null 2>&1; then - echo "Docker available at $DOCKER_HOST" - exit 0 + + # 1. Host Docker socket (DooD) — available when runner mounts /var/run/docker.sock + if [ -S /var/run/docker.sock ]; then + if DOCKER_HOST=unix:///var/run/docker.sock docker info >/dev/null 2>&1; then + echo "Docker available via host socket." + echo "DOCKER_HOST=unix:///var/run/docker.sock" >> "$GITHUB_ENV" + exit 0 + fi fi - # Codeberg's act runner may not register the service container with the - # 'docker' DNS alias. Scan all local /24 subnets (one per runner - # interface) for any host accepting connections on port 2375. - echo "Docker unreachable via hostname; scanning network for DinD..." >&2 - FOUND="" + + # 2. DinD service hostname — retry up to 60 s to allow the inner daemon to start + for attempt in $(seq 1 6); do + if DOCKER_HOST=tcp://docker:2375 docker info >/dev/null 2>&1; then + echo "Docker available at tcp://docker:2375" + echo "DOCKER_HOST=tcp://docker:2375" >> "$GITHUB_ENV" + exit 0 + fi + echo "DinD not ready (attempt $attempt/6), waiting 10 s..." >&2 + sleep 10 + done + + # 3. DNS-based discovery: resolve 'docker' and probe it directly + DOCKER_IP=$(getent hosts docker 2>/dev/null | awk '{print $1; exit}') + if [ -n "$DOCKER_IP" ]; then + echo "docker resolves to $DOCKER_IP" >&2 + if (timeout 2 bash -c "echo >/dev/tcp/${DOCKER_IP}/2375" 2>/dev/null); then + echo "Found Docker daemon at $DOCKER_IP:2375" + echo "DOCKER_HOST=tcp://$DOCKER_IP:2375" >> "$GITHUB_ENV" + exit 0 + fi + if (timeout 2 bash -c "echo >/dev/tcp/${DOCKER_IP}/2376" 2>/dev/null); then + echo "ERROR: Docker at $DOCKER_IP answers on 2376 (TLS) but not 2375. TLS was not disabled." >&2 + fi + fi + + # 4. Scan all local /24 subnets for any host on port 2375 + echo "Scanning network for DinD on port 2375..." >&2 for MY_IP in $(hostname -I); do case "$MY_IP" in 127.*) continue ;; esac PREFIX=$(echo "$MY_IP" | cut -d. -f1-3) for i in $(seq 1 254); do ip="${PREFIX}.${i}" - if (timeout 0.3 bash -c "echo >/dev/tcp/${ip}/2375" 2>/dev/null); then - FOUND="$ip" - break 2 + if (timeout 0.2 bash -c "echo >/dev/tcp/${ip}/2375" 2>/dev/null); then + echo "Found Docker daemon at $ip:2375" + echo "DOCKER_HOST=tcp://$ip:2375" >> "$GITHUB_ENV" + exit 0 fi done done - if [ -z "$FOUND" ]; then - echo "ERROR: Could not locate Docker daemon on the network" >&2 - exit 1 - fi - echo "Found Docker daemon at $FOUND:2375" - echo "DOCKER_HOST=tcp://$FOUND:2375" >> "$GITHUB_ENV" + + # Diagnostics before giving up + echo "=== Docker discovery diagnostics ===" >&2 + echo "hostname -I: $(hostname -I)" >&2 + echo "docker DNS: $(getent hosts docker 2>/dev/null || echo 'not resolved')" >&2 + echo "docker.sock: $(ls -la /var/run/docker.sock 2>/dev/null || echo 'not present')" >&2 + echo "ERROR: Could not locate Docker daemon" >&2 + exit 1 - name: Prune Dagger cache before check env: