fix(ci): override DOCKER_TLS_CERTDIR via docker run options and improve Docker discovery

The act runner on Codeberg may not apply the services.env block to the
DinD container, so DOCKER_TLS_CERTDIR defaults to /certs and dockerd
starts with TLS on port 2376 instead of 2375. Fix by passing
--env DOCKER_TLS_CERTDIR= directly via options: so it is always applied
at docker run time.

Also:
- Try the host Docker socket (DooD) first before DinD; many self-hosted
  runners mount /var/run/docker.sock and this is simpler and more reliable.
- Remove the workflow-level DOCKER_HOST override; let the step discover
  and export the correct value instead of pre-forcing tcp://docker:2375.
- Retry DinD by hostname up to 60 s before falling back to scanning.
- Add DNS resolution check (getent hosts docker) and a port 2376 probe
  that surfaces the TLS-still-enabled diagnostic message clearly.
- Improve final diagnostics (IPs, DNS, socket path) to aid future debugging.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Thomas SharedInbox
2026-05-24 02:10:25 +02:00
co-authored by Claude Sonnet 4.6
parent 68dcee6968
commit 5abcf55aa7
+52 -22
View File
@@ -16,10 +16,10 @@ jobs:
image: docker:27-dind
env:
DOCKER_TLS_CERTDIR: ""
options: --privileged
env:
DOCKER_HOST: tcp://docker:2375
# Pass --env directly to docker run to override the image-level default
# (DOCKER_TLS_CERTDIR=/certs) regardless of whether the env: field above
# is honoured by the act runner version on this host.
options: --privileged --env DOCKER_TLS_CERTDIR=
steps:
- uses: actions/checkout@v4
@@ -47,33 +47,63 @@ jobs:
echo "Remote Dagger engine configured, no local Docker needed."
exit 0
fi
# Try the configured DOCKER_HOST first
if docker info >/dev/null 2>&1; then
echo "Docker available at $DOCKER_HOST"
exit 0
# 1. Host Docker socket (DooD) — available when runner mounts /var/run/docker.sock
if [ -S /var/run/docker.sock ]; then
if DOCKER_HOST=unix:///var/run/docker.sock docker info >/dev/null 2>&1; then
echo "Docker available via host socket."
echo "DOCKER_HOST=unix:///var/run/docker.sock" >> "$GITHUB_ENV"
exit 0
fi
fi
# Codeberg's act runner may not register the service container with the
# 'docker' DNS alias. Scan all local /24 subnets (one per runner
# interface) for any host accepting connections on port 2375.
echo "Docker unreachable via hostname; scanning network for DinD..." >&2
FOUND=""
# 2. DinD service hostname — retry up to 60 s to allow the inner daemon to start
for attempt in $(seq 1 6); do
if DOCKER_HOST=tcp://docker:2375 docker info >/dev/null 2>&1; then
echo "Docker available at tcp://docker:2375"
echo "DOCKER_HOST=tcp://docker:2375" >> "$GITHUB_ENV"
exit 0
fi
echo "DinD not ready (attempt $attempt/6), waiting 10 s..." >&2
sleep 10
done
# 3. DNS-based discovery: resolve 'docker' and probe it directly
DOCKER_IP=$(getent hosts docker 2>/dev/null | awk '{print $1; exit}')
if [ -n "$DOCKER_IP" ]; then
echo "docker resolves to $DOCKER_IP" >&2
if (timeout 2 bash -c "echo >/dev/tcp/${DOCKER_IP}/2375" 2>/dev/null); then
echo "Found Docker daemon at $DOCKER_IP:2375"
echo "DOCKER_HOST=tcp://$DOCKER_IP:2375" >> "$GITHUB_ENV"
exit 0
fi
if (timeout 2 bash -c "echo >/dev/tcp/${DOCKER_IP}/2376" 2>/dev/null); then
echo "ERROR: Docker at $DOCKER_IP answers on 2376 (TLS) but not 2375. TLS was not disabled." >&2
fi
fi
# 4. Scan all local /24 subnets for any host on port 2375
echo "Scanning network for DinD on port 2375..." >&2
for MY_IP in $(hostname -I); do
case "$MY_IP" in 127.*) continue ;; esac
PREFIX=$(echo "$MY_IP" | cut -d. -f1-3)
for i in $(seq 1 254); do
ip="${PREFIX}.${i}"
if (timeout 0.3 bash -c "echo >/dev/tcp/${ip}/2375" 2>/dev/null); then
FOUND="$ip"
break 2
if (timeout 0.2 bash -c "echo >/dev/tcp/${ip}/2375" 2>/dev/null); then
echo "Found Docker daemon at $ip:2375"
echo "DOCKER_HOST=tcp://$ip:2375" >> "$GITHUB_ENV"
exit 0
fi
done
done
if [ -z "$FOUND" ]; then
echo "ERROR: Could not locate Docker daemon on the network" >&2
exit 1
fi
echo "Found Docker daemon at $FOUND:2375"
echo "DOCKER_HOST=tcp://$FOUND:2375" >> "$GITHUB_ENV"
# Diagnostics before giving up
echo "=== Docker discovery diagnostics ===" >&2
echo "hostname -I: $(hostname -I)" >&2
echo "docker DNS: $(getent hosts docker 2>/dev/null || echo 'not resolved')" >&2
echo "docker.sock: $(ls -la /var/run/docker.sock 2>/dev/null || echo 'not present')" >&2
echo "ERROR: Could not locate Docker daemon" >&2
exit 1
- name: Prune Dagger cache before check
env: