fix(ci): override DOCKER_TLS_CERTDIR via docker run options and improve Docker discovery
The act runner on Codeberg may not apply the services.env block to the DinD container, so DOCKER_TLS_CERTDIR defaults to /certs and dockerd starts with TLS on port 2376 instead of 2375. Fix by passing --env DOCKER_TLS_CERTDIR= directly via options: so it is always applied at docker run time. Also: - Try the host Docker socket (DooD) first before DinD; many self-hosted runners mount /var/run/docker.sock and this is simpler and more reliable. - Remove the workflow-level DOCKER_HOST override; let the step discover and export the correct value instead of pre-forcing tcp://docker:2375. - Retry DinD by hostname up to 60 s before falling back to scanning. - Add DNS resolution check (getent hosts docker) and a port 2376 probe that surfaces the TLS-still-enabled diagnostic message clearly. - Improve final diagnostics (IPs, DNS, socket path) to aid future debugging. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
co-authored by
Claude Sonnet 4.6
parent
68dcee6968
commit
5abcf55aa7
+52
-22
@@ -16,10 +16,10 @@ jobs:
|
||||
image: docker:27-dind
|
||||
env:
|
||||
DOCKER_TLS_CERTDIR: ""
|
||||
options: --privileged
|
||||
|
||||
env:
|
||||
DOCKER_HOST: tcp://docker:2375
|
||||
# Pass --env directly to docker run to override the image-level default
|
||||
# (DOCKER_TLS_CERTDIR=/certs) regardless of whether the env: field above
|
||||
# is honoured by the act runner version on this host.
|
||||
options: --privileged --env DOCKER_TLS_CERTDIR=
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -47,33 +47,63 @@ jobs:
|
||||
echo "Remote Dagger engine configured, no local Docker needed."
|
||||
exit 0
|
||||
fi
|
||||
# Try the configured DOCKER_HOST first
|
||||
if docker info >/dev/null 2>&1; then
|
||||
echo "Docker available at $DOCKER_HOST"
|
||||
exit 0
|
||||
|
||||
# 1. Host Docker socket (DooD) — available when runner mounts /var/run/docker.sock
|
||||
if [ -S /var/run/docker.sock ]; then
|
||||
if DOCKER_HOST=unix:///var/run/docker.sock docker info >/dev/null 2>&1; then
|
||||
echo "Docker available via host socket."
|
||||
echo "DOCKER_HOST=unix:///var/run/docker.sock" >> "$GITHUB_ENV"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
# Codeberg's act runner may not register the service container with the
|
||||
# 'docker' DNS alias. Scan all local /24 subnets (one per runner
|
||||
# interface) for any host accepting connections on port 2375.
|
||||
echo "Docker unreachable via hostname; scanning network for DinD..." >&2
|
||||
FOUND=""
|
||||
|
||||
# 2. DinD service hostname — retry up to 60 s to allow the inner daemon to start
|
||||
for attempt in $(seq 1 6); do
|
||||
if DOCKER_HOST=tcp://docker:2375 docker info >/dev/null 2>&1; then
|
||||
echo "Docker available at tcp://docker:2375"
|
||||
echo "DOCKER_HOST=tcp://docker:2375" >> "$GITHUB_ENV"
|
||||
exit 0
|
||||
fi
|
||||
echo "DinD not ready (attempt $attempt/6), waiting 10 s..." >&2
|
||||
sleep 10
|
||||
done
|
||||
|
||||
# 3. DNS-based discovery: resolve 'docker' and probe it directly
|
||||
DOCKER_IP=$(getent hosts docker 2>/dev/null | awk '{print $1; exit}')
|
||||
if [ -n "$DOCKER_IP" ]; then
|
||||
echo "docker resolves to $DOCKER_IP" >&2
|
||||
if (timeout 2 bash -c "echo >/dev/tcp/${DOCKER_IP}/2375" 2>/dev/null); then
|
||||
echo "Found Docker daemon at $DOCKER_IP:2375"
|
||||
echo "DOCKER_HOST=tcp://$DOCKER_IP:2375" >> "$GITHUB_ENV"
|
||||
exit 0
|
||||
fi
|
||||
if (timeout 2 bash -c "echo >/dev/tcp/${DOCKER_IP}/2376" 2>/dev/null); then
|
||||
echo "ERROR: Docker at $DOCKER_IP answers on 2376 (TLS) but not 2375. TLS was not disabled." >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
# 4. Scan all local /24 subnets for any host on port 2375
|
||||
echo "Scanning network for DinD on port 2375..." >&2
|
||||
for MY_IP in $(hostname -I); do
|
||||
case "$MY_IP" in 127.*) continue ;; esac
|
||||
PREFIX=$(echo "$MY_IP" | cut -d. -f1-3)
|
||||
for i in $(seq 1 254); do
|
||||
ip="${PREFIX}.${i}"
|
||||
if (timeout 0.3 bash -c "echo >/dev/tcp/${ip}/2375" 2>/dev/null); then
|
||||
FOUND="$ip"
|
||||
break 2
|
||||
if (timeout 0.2 bash -c "echo >/dev/tcp/${ip}/2375" 2>/dev/null); then
|
||||
echo "Found Docker daemon at $ip:2375"
|
||||
echo "DOCKER_HOST=tcp://$ip:2375" >> "$GITHUB_ENV"
|
||||
exit 0
|
||||
fi
|
||||
done
|
||||
done
|
||||
if [ -z "$FOUND" ]; then
|
||||
echo "ERROR: Could not locate Docker daemon on the network" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "Found Docker daemon at $FOUND:2375"
|
||||
echo "DOCKER_HOST=tcp://$FOUND:2375" >> "$GITHUB_ENV"
|
||||
|
||||
# Diagnostics before giving up
|
||||
echo "=== Docker discovery diagnostics ===" >&2
|
||||
echo "hostname -I: $(hostname -I)" >&2
|
||||
echo "docker DNS: $(getent hosts docker 2>/dev/null || echo 'not resolved')" >&2
|
||||
echo "docker.sock: $(ls -la /var/run/docker.sock 2>/dev/null || echo 'not present')" >&2
|
||||
echo "ERROR: Could not locate Docker daemon" >&2
|
||||
exit 1
|
||||
|
||||
- name: Prune Dagger cache before check
|
||||
env:
|
||||
|
||||
Reference in New Issue
Block a user