deploy: give up and open issue after 5 failures on same commit

Tracks consecutive failure count in .fail_count. On the 5th failure
for the same SHA, creates a Prio/High + State/Ready Codeberg issue.
Before creating, checks local .last_issue_sha and queries Codeberg
open issues to avoid duplicates.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Thomas SharedInbox
2026-05-23 11:37:57 +02:00
co-authored by Claude Sonnet 4.6
parent c259d2dabe
commit 57902e8218
2 changed files with 40 additions and 18 deletions
+1
View File
@@ -120,3 +120,4 @@ dagger-certs
.viminfo
/go
.last_deployed_sha
.fail_count
+39 -18
View File
@@ -1,8 +1,8 @@
#!/usr/bin/env python3
"""
Cron deploy script for sharedinbox website.
Runs every 15 minutes; skips if origin/main has not changed since last successful deploy.
If last deploy failed and main still hasn't changed, creates a Codeberg issue.
Runs every 5 minutes; skips if origin/main has not changed since last successful deploy.
Gives up and creates a Codeberg issue after 5 consecutive failures on the same commit.
"""
import subprocess
import sys
@@ -12,9 +12,11 @@ from pathlib import Path
REPO_DIR = Path(__file__).parent.resolve()
SHA_FILE = REPO_DIR / '.last_deployed_sha'
FAILED_SHA_FILE = REPO_DIR / '.last_failed_sha'
FAIL_COUNT_FILE = REPO_DIR / '.fail_count'
ERROR_FILE = REPO_DIR / '.last_deploy_error'
ISSUE_SHA_FILE = REPO_DIR / '.last_issue_sha'
MAX_FAILURES = 5
REPO = 'guettli/sharedinbox'
CODEBERG = 'https://codeberg.org'
@@ -30,24 +32,42 @@ def read(path: Path) -> str:
return path.read_text().strip() if path.exists() else ''
def create_issue(failed_sha: str) -> None:
def read_int(path: Path) -> int:
try:
return int(read(path))
except ValueError:
return 0
def issue_exists_for(sha: str) -> bool:
"""Check Codeberg for an open issue referencing this commit SHA."""
result = subprocess.run(
['tea', 'issue', 'list', '--repo', REPO, '--state', 'open',
'--limit', '50', '--output', 'simple'],
capture_output=True, text=True,
)
return sha[:8] in result.stdout
def create_issue(failed_sha: str, fail_count: int) -> None:
error_output = read(ERROR_FILE)
tail = '\n'.join(error_output.splitlines()[-40:]) if error_output else '(no output captured)'
commit_url = f'{CODEBERG}/{REPO}/commit/{failed_sha}'
script_url = f'{CODEBERG}/{REPO}/src/branch/main/deploy_cron.py'
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')
title = f'Deploy failed on {failed_sha[:8]} main needs a fix'
title = f'Deploy failed {fail_count}x on {failed_sha[:8]} — needs fix'
body = f"""\
## Deploy failure — action needed
The automated deploy cron has been failing on commit \
[{failed_sha[:8]}]({commit_url}) and `main` has not advanced since the failure.
The automated deploy cron failed **{fail_count} times** on commit \
[{failed_sha[:8]}]({commit_url}) and has stopped retrying.
| | |
|---|---|
| **Detected** | {timestamp} |
| **Failing commit** | [{failed_sha}]({commit_url}) |
| **Failures** | {fail_count} / {MAX_FAILURES} |
| **Deploy script** | [deploy_cron.py]({script_url}) |
| **Log file** | `~/si-deploy-cron/deploy.log` |
@@ -59,7 +79,7 @@ The automated deploy cron has been failing on commit \
### Next steps
Push a fix to `main` — the cron runs every 15 min and will retry automatically.
Push a fix to `main` — the cron (every 5 min) will retry automatically on the next commit.
"""
result = subprocess.run(
@@ -82,22 +102,24 @@ def main():
last_sha = read(SHA_FILE)
last_failed = read(FAILED_SHA_FILE)
fail_count = read_int(FAIL_COUNT_FILE) if remote_sha == last_failed else 0
last_issue = read(ISSUE_SHA_FILE)
if remote_sha == last_sha:
print(f'No changes since {remote_sha[:8]}, skipping.')
return
if remote_sha == last_failed:
if remote_sha != last_issue:
print(f'{remote_sha[:8]} failed before and main has not changed — creating issue.')
create_issue(remote_sha)
if fail_count >= MAX_FAILURES:
if remote_sha != last_issue and not issue_exists_for(remote_sha):
print(f'{remote_sha[:8]} failed {fail_count}x — creating issue.')
create_issue(remote_sha, fail_count)
ISSUE_SHA_FILE.write_text(remote_sha + '\n')
else:
print(f'{remote_sha[:8]} still failing, issue already open, skipping.')
print(f'{remote_sha[:8]} failed {fail_count}x, issue already exists, skipping.')
return
print(f'Deploying {remote_sha[:8]} (was {last_sha[:8] or "none"})...')
attempt = fail_count + 1
print(f'Deploying {remote_sha[:8]} (attempt {attempt}/{MAX_FAILURES}, was {last_sha[:8] or "none"})...')
git('pull', '--ff-only', 'origin', 'main')
result = subprocess.run(
@@ -109,16 +131,15 @@ def main():
print(combined, end='')
if result.returncode != 0:
print(f'Deploy failed (exit {result.returncode})', file=sys.stderr)
print(f'Deploy failed (exit {result.returncode}), attempt {attempt}/{MAX_FAILURES}', file=sys.stderr)
FAILED_SHA_FILE.write_text(remote_sha + '\n')
FAIL_COUNT_FILE.write_text(str(attempt) + '\n')
ERROR_FILE.write_text(combined)
ISSUE_SHA_FILE.unlink(missing_ok=True)
sys.exit(1)
SHA_FILE.write_text(remote_sha + '\n')
FAILED_SHA_FILE.unlink(missing_ok=True)
ERROR_FILE.unlink(missing_ok=True)
ISSUE_SHA_FILE.unlink(missing_ok=True)
for f in (FAILED_SHA_FILE, FAIL_COUNT_FILE, ERROR_FILE, ISSUE_SHA_FILE):
f.unlink(missing_ok=True)
print('Deploy complete.')