diff --git a/.gitignore b/.gitignore index 1f125c2..de47e6c 100644 --- a/.gitignore +++ b/.gitignore @@ -120,3 +120,4 @@ dagger-certs .viminfo /go .last_deployed_sha +.fail_count diff --git a/deploy_cron.py b/deploy_cron.py index 89bb97e..1c50c91 100644 --- a/deploy_cron.py +++ b/deploy_cron.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 """ Cron deploy script for sharedinbox website. -Runs every 15 minutes; skips if origin/main has not changed since last successful deploy. -If last deploy failed and main still hasn't changed, creates a Codeberg issue. +Runs every 5 minutes; skips if origin/main has not changed since last successful deploy. +Gives up and creates a Codeberg issue after 5 consecutive failures on the same commit. """ import subprocess import sys @@ -12,9 +12,11 @@ from pathlib import Path REPO_DIR = Path(__file__).parent.resolve() SHA_FILE = REPO_DIR / '.last_deployed_sha' FAILED_SHA_FILE = REPO_DIR / '.last_failed_sha' +FAIL_COUNT_FILE = REPO_DIR / '.fail_count' ERROR_FILE = REPO_DIR / '.last_deploy_error' ISSUE_SHA_FILE = REPO_DIR / '.last_issue_sha' +MAX_FAILURES = 5 REPO = 'guettli/sharedinbox' CODEBERG = 'https://codeberg.org' @@ -30,24 +32,42 @@ def read(path: Path) -> str: return path.read_text().strip() if path.exists() else '' -def create_issue(failed_sha: str) -> None: +def read_int(path: Path) -> int: + try: + return int(read(path)) + except ValueError: + return 0 + + +def issue_exists_for(sha: str) -> bool: + """Check Codeberg for an open issue referencing this commit SHA.""" + result = subprocess.run( + ['tea', 'issue', 'list', '--repo', REPO, '--state', 'open', + '--limit', '50', '--output', 'simple'], + capture_output=True, text=True, + ) + return sha[:8] in result.stdout + + +def create_issue(failed_sha: str, fail_count: int) -> None: error_output = read(ERROR_FILE) tail = '\n'.join(error_output.splitlines()[-40:]) if error_output else '(no output captured)' commit_url = f'{CODEBERG}/{REPO}/commit/{failed_sha}' script_url = f'{CODEBERG}/{REPO}/src/branch/main/deploy_cron.py' timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC') - title = f'Deploy failed on {failed_sha[:8]} — main needs a fix' + title = f'Deploy failed {fail_count}x on {failed_sha[:8]} — needs fix' body = f"""\ ## Deploy failure — action needed -The automated deploy cron has been failing on commit \ -[{failed_sha[:8]}]({commit_url}) and `main` has not advanced since the failure. +The automated deploy cron failed **{fail_count} times** on commit \ +[{failed_sha[:8]}]({commit_url}) and has stopped retrying. | | | |---|---| | **Detected** | {timestamp} | | **Failing commit** | [{failed_sha}]({commit_url}) | +| **Failures** | {fail_count} / {MAX_FAILURES} | | **Deploy script** | [deploy_cron.py]({script_url}) | | **Log file** | `~/si-deploy-cron/deploy.log` | @@ -59,7 +79,7 @@ The automated deploy cron has been failing on commit \ ### Next steps -Push a fix to `main` — the cron runs every 15 min and will retry automatically. +Push a fix to `main` — the cron (every 5 min) will retry automatically on the next commit. """ result = subprocess.run( @@ -82,22 +102,24 @@ def main(): last_sha = read(SHA_FILE) last_failed = read(FAILED_SHA_FILE) + fail_count = read_int(FAIL_COUNT_FILE) if remote_sha == last_failed else 0 last_issue = read(ISSUE_SHA_FILE) if remote_sha == last_sha: print(f'No changes since {remote_sha[:8]}, skipping.') return - if remote_sha == last_failed: - if remote_sha != last_issue: - print(f'{remote_sha[:8]} failed before and main has not changed — creating issue.') - create_issue(remote_sha) + if fail_count >= MAX_FAILURES: + if remote_sha != last_issue and not issue_exists_for(remote_sha): + print(f'{remote_sha[:8]} failed {fail_count}x — creating issue.') + create_issue(remote_sha, fail_count) ISSUE_SHA_FILE.write_text(remote_sha + '\n') else: - print(f'{remote_sha[:8]} still failing, issue already open, skipping.') + print(f'{remote_sha[:8]} failed {fail_count}x, issue already exists, skipping.') return - print(f'Deploying {remote_sha[:8]} (was {last_sha[:8] or "none"})...') + attempt = fail_count + 1 + print(f'Deploying {remote_sha[:8]} (attempt {attempt}/{MAX_FAILURES}, was {last_sha[:8] or "none"})...') git('pull', '--ff-only', 'origin', 'main') result = subprocess.run( @@ -109,16 +131,15 @@ def main(): print(combined, end='') if result.returncode != 0: - print(f'Deploy failed (exit {result.returncode})', file=sys.stderr) + print(f'Deploy failed (exit {result.returncode}), attempt {attempt}/{MAX_FAILURES}', file=sys.stderr) FAILED_SHA_FILE.write_text(remote_sha + '\n') + FAIL_COUNT_FILE.write_text(str(attempt) + '\n') ERROR_FILE.write_text(combined) - ISSUE_SHA_FILE.unlink(missing_ok=True) sys.exit(1) SHA_FILE.write_text(remote_sha + '\n') - FAILED_SHA_FILE.unlink(missing_ok=True) - ERROR_FILE.unlink(missing_ok=True) - ISSUE_SHA_FILE.unlink(missing_ok=True) + for f in (FAILED_SHA_FILE, FAIL_COUNT_FILE, ERROR_FILE, ISSUE_SHA_FILE): + f.unlink(missing_ok=True) print('Deploy complete.')