fix(ci): replace continue-on-error with conditional step execution (#172 )

Remove all continue-on-error usages from CI workflows: - deploy.yml: replace continue-on-error on SSH deploy steps with if: secrets.SSH_PRIVATE_KEY != '' so steps are skipped (not failed) when the secret is absent - windows-nightly.yml: remove continue-on-error from job and steps (job is already disabled via if: false) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
perf: parallelize APK deploy and reduce fetch-depth in deploy.yml (#171 ) (#188 )
2026-05-23 19:03:04 +02:00 · 2026-05-23 18:55:08 +02:00 · 2026-05-23 18:35:15 +02:00 · 2026-05-23 17:42:20 +02:00 · 2026-05-23 17:25:08 +02:00 · 2026-05-23 17:10:11 +02:00
6 changed files with 149 additions and 133 deletions
@@ -14,7 +14,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
        with:
-          fetch-depth: 50
+          fetch-depth: 1

      - name: Check runner tools
        run: |
@@ -49,7 +49,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
        with:
-          fetch-depth: 50
+          fetch-depth: 1

      - name: Check runner tools
        run: |
@@ -73,12 +73,36 @@ jobs:
          DAGGER_NO_NAG: "1"
        run: task publish-android

+      - name: Cleanup TLS credentials
+        if: always()
+        run: rm -rf /tmp/dagger-tls /tmp/stunnel-dagger.conf /tmp/stunnel.pid
+
+  deploy-apk:
+    name: Build & Deploy APK to Server
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Check runner tools
+        run: |
+          command -v dagger  >/dev/null 2>&1 || { echo "ERROR: dagger is not installed in the runner image. Add it to .forgejo/Dockerfile."; exit 1; }
+          command -v task    >/dev/null 2>&1 || { echo "ERROR: task is not installed in the runner image. Add it to .forgejo/Dockerfile."; exit 1; }
+          dpkg -s stunnel4 netcat-openbsd >/dev/null 2>&1 || { echo "ERROR: stunnel4/netcat-openbsd are not installed in the runner image. Add them to .forgejo/Dockerfile."; exit 1; }
+
+      - name: Setup Dagger Remote Engine (via stunnel)
+        env:
+          DAGGER_STUNNEL_URL: ${{ secrets.DAGGER_STUNNEL_URL }}
+          DAGGER_CA_CERT: ${{ secrets.DAGGER_CA_CERT }}
+          DAGGER_CLIENT_CERT: ${{ secrets.DAGGER_CLIENT_CERT }}
+          DAGGER_CLIENT_KEY: ${{ secrets.DAGGER_CLIENT_KEY }}
+        run: scripts/setup_dagger_remote.sh
+
      - name: Build & Deploy APK to server
-        # continue-on-error: step requires SSH_PRIVATE_KEY secret; if unset the task
-        # precondition fails, but we don't want that to fail the whole job — the Play
-        # Store publish above already succeeded.  The overall job stays green even
-        # though this step shows as failed/orange in the UI.
-        continue-on-error: true
+        if: ${{ secrets.SSH_PRIVATE_KEY != '' }}
        env:
          SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
          SSH_USER: ${{ secrets.SSH_USER }}
@@ -100,7 +124,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
        with:
-          fetch-depth: 50
+          fetch-depth: 1

      - name: Check runner tools
        run: |
@@ -117,12 +141,7 @@ jobs:
        run: scripts/setup_dagger_remote.sh

      - name: Build & Deploy Linux to server
-        # continue-on-error: step requires SSH_PRIVATE_KEY secret; if unset the task
-        # precondition fails, but the build step that precedes this (done via Dagger)
-        # already succeeded.  Deployment is best-effort; a missing secret should not
-        # turn the job red.  The step will show as failed/orange in the UI even though
-        # the overall job is green — this is intentional.
-        continue-on-error: true
+        if: ${{ secrets.SSH_PRIVATE_KEY != '' }}
        env:
          SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
          SSH_USER: ${{ secrets.SSH_USER }}
@@ -137,16 +156,16 @@ jobs:
  publish-website:
    name: Publish Website Build History
    runs-on: ubuntu-latest
-    needs: [build-linux, deploy-playstore]
+    needs: [build-linux, deploy-playstore, deploy-apk]
    if: |
      always() &&
-      (needs.build-linux.result == 'success' || needs.deploy-playstore.result == 'success')
+      (needs.build-linux.result == 'success' || needs.deploy-playstore.result == 'success' || needs.deploy-apk.result == 'success')
    timeout-minutes: 60

    steps:
      - uses: actions/checkout@v4
        with:
-          fetch-depth: 50
+          fetch-depth: 1

      - name: Check runner tools
        run: |
@@ -163,9 +182,7 @@ jobs:
        run: scripts/setup_dagger_remote.sh

      - name: Generate build history and deploy website
-        # continue-on-error: website publish is best-effort; a missing SSH_PRIVATE_KEY
-        # should not block the overall workflow status.
-        continue-on-error: true
+        if: ${{ secrets.SSH_PRIVATE_KEY != '' }}
        env:
          SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
          SSH_USER: ${{ secrets.SSH_USER }}
@@ -180,7 +197,7 @@ jobs:
  label-deploy-health:
    name: Update Deploy Health Label
    runs-on: ubuntu-latest
-    needs: [test-android-firebase, deploy-playstore, build-linux]
+    needs: [test-android-firebase, deploy-playstore, deploy-apk, build-linux]
    if: always() && vars.DEPLOY_HEALTH_ISSUE != ''
    timeout-minutes: 5

@@ -190,7 +207,7 @@ jobs:
          FORGEJO_TOKEN: ${{ github.token }}
          FORGEJO_URL: ${{ github.server_url }}
          DEPLOY_HEALTH_ISSUE: ${{ vars.DEPLOY_HEALTH_ISSUE }}
-          ALL_SUCCEEDED: ${{ needs.test-android-firebase.result == 'success' && needs.deploy-playstore.result == 'success' && needs.build-linux.result == 'success' }}
+          ALL_SUCCEEDED: ${{ needs.test-android-firebase.result == 'success' && needs.deploy-playstore.result == 'success' && needs.deploy-apk.result == 'success' && needs.build-linux.result == 'success' }}
        run: |
          python3 - << 'PYEOF'
          import os, json, urllib.request, urllib.error
@@ -11,7 +11,6 @@ jobs:
    name: Build & Deploy Windows (Nightly)
    runs-on: windows-runner
    if: false
-    continue-on-error: true

    steps:
      - uses: actions/checkout@v4
@@ -32,7 +31,6 @@ jobs:

      - name: Set up SSH key
        if: env.SKIP_BUILD != 'true'
-        continue-on-error: true
        env:
          SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
        run: |
@@ -42,7 +40,6 @@ jobs:

      - name: Deploy Windows to server
        if: env.SKIP_BUILD != 'true'
-        continue-on-error: true
        env:
          SSH_USER: ${{ secrets.SSH_USER }}
          SSH_HOST: ${{ secrets.SSH_HOST }}
@@ -221,7 +221,7 @@ func (m *Ci) pubGetLayer() *dagger.Container {
 		WithExec([]string{"/bin/bash", "-c",
 			`tmp=$(mktemp); trap 'rm -f "$tmp"' EXIT; ` +
 				`flutter pub get >"$tmp" 2>&1 || { cat "$tmp"; exit 1; }; ` +
-				`grep -vE '^[+~><] ' "$tmp" || true`}).
+				`grep -vE '^(\+|Downloading packages)' "$tmp" || true`}).
 		WithExec([]string{"python3", "-c",
 			"import json, os\n" +
 				"f='.dart_tool/package_config.json'; d=json.load(open(f)); [d.pop(k,None) for k in ('generated','generatorVersion')]; json.dump(d,open(f,'w'))\n" +
@@ -245,7 +245,7 @@ func (m *Ci) codegenBase() *dagger.Container {
 		WithExec([]string{"/bin/bash", "-c",
 			`tmp=$(mktemp); trap 'rm -f "$tmp"' EXIT; ` +
 				`flutter pub run build_runner build --delete-conflicting-outputs >"$tmp" 2>&1 || { cat "$tmp"; exit 1; }; ` +
-				`grep -vE '^\[' "$tmp" || true`})
+				`grep -vE '^\[.*s\] \|' "$tmp" || true`})
 }

 // setup overlays platform-specific source files onto the shared codegen base.
@@ -312,6 +312,7 @@ func (m *Ci) Hugo() *dagger.Container {
 		From("alpine:3.21").
 		WithExec([]string{"apk", "--no-cache", "add", "curl", "tar", "libc6-compat", "libstdc++", "gcompat"}).
 		WithExec([]string{"curl", "-sL", "https://github.com/gohugoio/hugo/releases/download/v0.152.2/hugo_extended_0.152.2_linux-amd64.tar.gz", "-o", "/tmp/hugo.tar.gz"}).
+		WithExec([]string{"sh", "-c", "echo '416bcfbdf5f68469ec9644dbe507da50fc21b94b69a125b059d64ed2cb4d8c27  /tmp/hugo.tar.gz' | sha256sum -c -"}).
 		WithExec([]string{"tar", "-xzf", "/tmp/hugo.tar.gz", "-C", "/usr/local/bin", "hugo"}).
 		WithExec([]string{"rm", "/tmp/hugo.tar.gz"})
 }
@@ -410,7 +411,7 @@ func (m *Ci) CheckMocks(ctx context.Context) (string, error) {
 		WithExec([]string{"/bin/bash", "-c",
 			`tmp=$(mktemp); trap 'rm -f "$tmp"' EXIT; ` +
 				`flutter pub run build_runner build --delete-conflicting-outputs >"$tmp" 2>&1 || { cat "$tmp"; exit 1; }; ` +
-				`grep -vE '^\[' "$tmp" || true`}).
+				`grep -vE '^\[.*s\] \|' "$tmp" || true`}).
 		WithExec([]string{"/bin/bash", "-c", "CHANGED=$(find . -name '*.mocks.dart' | xargs -r git diff --exit-code); if [ $? -ne 0 ]; then echo \"ERROR: Mocks are out of date\"; exit 1; fi; echo \"Mocks are up to date.\""}).
 		Stdout(ctx)
 }
@@ -1,24 +1,17 @@
 #!/usr/bin/env python3
 """
 Cron deploy script for sharedinbox website.
-Runs every 5 minutes; skips if origin/main has not changed since last successful deploy.
-Gives up and creates a Codeberg issue after 5 consecutive failures on the same commit.
+Runs every 5 minutes; skips if origin/main has not changed since last trigger.
+Triggers the 'Deploy Website' Forgejo Actions workflow via fgj on each new commit.
+Forgejo Actions handles failure reporting.
 """
 import subprocess
 import sys
-from datetime import datetime, timezone
 from pathlib import Path

 REPO_DIR = Path(__file__).parent.resolve()
-SHA_FILE        = REPO_DIR / '.last_deployed_sha'
-FAILED_SHA_FILE = REPO_DIR / '.last_failed_sha'
-FAIL_COUNT_FILE = REPO_DIR / '.fail_count'
-ERROR_FILE      = REPO_DIR / '.last_deploy_error'
-ISSUE_SHA_FILE  = REPO_DIR / '.last_issue_sha'
-
-MAX_FAILURES = 5
+SHA_FILE = REPO_DIR / '.last_deployed_sha'
 REPO = 'guettli/sharedinbox'
-CODEBERG = 'https://codeberg.org'


 def git(*args):
@@ -32,70 +25,6 @@ def read(path: Path) -> str:
    return path.read_text().strip() if path.exists() else ''


-def read_int(path: Path) -> int:
-    try:
-        return int(read(path))
-    except ValueError:
-        return 0
-
-
-def issue_exists_for(sha: str) -> bool:
-    """Check Codeberg for an open issue referencing this commit SHA."""
-    result = subprocess.run(
-        ['tea', 'issue', 'list', '--repo', REPO, '--state', 'open',
-         '--limit', '50', '--output', 'simple'],
-        capture_output=True, text=True,
-    )
-    return sha[:8] in result.stdout
-
-
-def create_issue(failed_sha: str, fail_count: int) -> None:
-    error_output = read(ERROR_FILE)
-    tail = '\n'.join(error_output.splitlines()[-40:]) if error_output else '(no output captured)'
-    commit_url = f'{CODEBERG}/{REPO}/commit/{failed_sha}'
-    script_url = f'{CODEBERG}/{REPO}/src/branch/main/deploy_cron.py'
-    timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')
-
-    title = f'Deploy failed {fail_count}x on {failed_sha[:8]} — needs fix'
-    body = f"""\
-## Deploy failure — action needed
-
-The automated deploy cron failed **{fail_count} times** on commit \
-[{failed_sha[:8]}]({commit_url}) and has stopped retrying.
-
-| | |
-|---|---|
-| **Detected** | {timestamp} |
-| **Failing commit** | [{failed_sha}]({commit_url}) |
-| **Failures** | {fail_count} / {MAX_FAILURES} |
-| **Deploy script** | [deploy_cron.py]({script_url}) |
-| **Log file** | `~/si-deploy-cron/deploy.log` |
-
-### Last deploy output
-
-```
-{tail}
-```
-
-### Next steps
-
-Push a fix to `main` — the cron (every 5 min) will retry automatically on the next commit.
-"""
-
-    result = subprocess.run(
-        ['tea', 'issue', 'create',
-         '--repo', REPO,
-         '--title', title,
-         '--description', body,
-         '--labels', 'State/Ready,Prio/High'],
-        capture_output=True, text=True,
-    )
-    if result.returncode != 0:
-        print(f'Failed to create issue: {result.stderr}', file=sys.stderr)
-    else:
-        print(f'Issue created: {result.stdout.strip()}')
-
-
 def main():
    try:
        git('fetch', 'origin', 'main')
@@ -103,48 +32,23 @@ def main():
        print(f'git fetch failed (transient?): {exc} — skipping this run.', file=sys.stderr)
        return
    remote_sha = git('rev-parse', 'origin/main')
-
-    last_sha    = read(SHA_FILE)
-    last_failed = read(FAILED_SHA_FILE)
-    fail_count  = read_int(FAIL_COUNT_FILE) if remote_sha == last_failed else 0
-    last_issue  = read(ISSUE_SHA_FILE)
+    last_sha = read(SHA_FILE)

    if remote_sha == last_sha:
        print(f'No changes since {remote_sha[:8]}, skipping.')
        return

-    if fail_count >= MAX_FAILURES:
-        if remote_sha != last_issue and not issue_exists_for(remote_sha):
-            print(f'{remote_sha[:8]} failed {fail_count}x — creating issue.')
-            create_issue(remote_sha, fail_count)
-            ISSUE_SHA_FILE.write_text(remote_sha + '\n')
-        else:
-            print(f'{remote_sha[:8]} failed {fail_count}x, issue already exists, skipping.')
-        return
-
-    attempt = fail_count + 1
-    print(f'Deploying {remote_sha[:8]} (attempt {attempt}/{MAX_FAILURES}, was {last_sha[:8] or "none"})...')
-    git('pull', '--ff-only', 'origin', 'main')
-
+    print(f'New commit {remote_sha[:8]} (was {last_sha[:8] or "none"}) — triggering workflow...')
    result = subprocess.run(
-        ['task', 'publish-website'],
-        cwd=REPO_DIR,
+        ['fgj', 'actions', 'workflow', 'run', 'website.yml', '-R', REPO],
        capture_output=True, text=True,
    )
-    combined = result.stdout + result.stderr
-    print(combined, end='')
-
    if result.returncode != 0:
-        print(f'Deploy failed (exit {result.returncode}), attempt {attempt}/{MAX_FAILURES}', file=sys.stderr)
-        FAILED_SHA_FILE.write_text(remote_sha + '\n')
-        FAIL_COUNT_FILE.write_text(str(attempt) + '\n')
-        ERROR_FILE.write_text(combined)
+        print(f'fgj workflow run failed: {result.stderr}', file=sys.stderr)
        sys.exit(1)

    SHA_FILE.write_text(remote_sha + '\n')
-    for f in (FAILED_SHA_FILE, FAIL_COUNT_FILE, ERROR_FILE, ISSUE_SHA_FILE):
-        f.unlink(missing_ok=True)
-    print('Deploy complete.')
+    print('Workflow triggered.')


 if __name__ == '__main__':
@@ -616,6 +616,11 @@ Future<String> _resolveDatabasePath() async {
  );
 }

+// These two functions are only called from unit tests (database_path_test.dart).
+// They expose internals that cannot be reached via the public API.
+Future<String> resolveDatabasePathForTesting() => _resolveDatabasePath();
+void resetDatabasePathForTesting() => _dbPath = null;
+
 LazyDatabase _openConnection() {
  return LazyDatabase(() async {
    final file = File(await _resolveDatabasePath());
@@ -1,3 +1,6 @@
+import 'dart:async';
+
+import 'package:fake_async/fake_async.dart';
 import 'package:flutter/services.dart';
 import 'package:flutter_test/flutter_test.dart';
 import 'package:path_provider_platform_interface/path_provider_platform_interface.dart';
@@ -19,6 +22,30 @@ class _UnavailablePathProvider extends Fake
  }
 }

+// Fake PathProviderPlatform that fails the first [failCount] calls, then
+// returns a fixed path.  Used to exercise the retry loop in
+// _resolveDatabasePath() without waiting for real timers.
+class _SucceedAfterNPathProvider extends Fake
+    with MockPlatformInterfaceMixin
+    implements PathProviderPlatform {
+  _SucceedAfterNPathProvider({required this.failCount});
+
+  final int failCount;
+  int _callCount = 0;
+
+  @override
+  Future<String?> getApplicationSupportPath() async {
+    _callCount++;
+    if (_callCount <= failCount) {
+      throw PlatformException(
+        code: 'channel-error',
+        message: 'Simulated: path_provider channel not ready',
+      );
+    }
+    return '/tmp/test_app_support';
+  }
+}
+
 void main() {
  TestWidgetsFlutterBinding.ensureInitialized();

@@ -38,4 +65,69 @@ void main() {
      await expectLater(initDatabasePath(), completes);
    },
  );
+
+  // Tests for _resolveDatabasePath() — the lazy retry path called on first DB
+  // access when initDatabasePath() already failed.  fake_async lets us advance
+  // the back-off timers without waiting real-world milliseconds.
+
+  test(
+    '_resolveDatabasePath retries and eventually succeeds after transient failures',
+    () {
+      resetDatabasePathForTesting();
+      final prev = PathProviderPlatform.instance;
+      // Fail 3 times, succeed on the 4th call.  The delays in
+      // _resolveDatabasePath are [200, 500, 1000, 2000, 4000] ms, so three
+      // failures cost 200+500+1000 = 1700 ms before the fourth attempt.
+      PathProviderPlatform.instance = _SucceedAfterNPathProvider(failCount: 3);
+      addTearDown(() {
+        PathProviderPlatform.instance = prev;
+        resetDatabasePathForTesting();
+      });
+
+      fakeAsync((fake) {
+        String? result;
+        unawaited(resolveDatabasePathForTesting().then((r) => result = r));
+
+        // Advance fake time through the three back-off delays.
+        fake.elapse(const Duration(milliseconds: 200 + 500 + 1000 + 1));
+
+        expect(result, isNotNull);
+        expect(result, endsWith('sharedinbox.db'));
+      });
+    },
+  );
+
+  test(
+    '_resolveDatabasePath throws PlatformException after exhausting all retries',
+    () {
+      resetDatabasePathForTesting();
+      final prev = PathProviderPlatform.instance;
+      PathProviderPlatform.instance = _UnavailablePathProvider();
+      addTearDown(() {
+        PathProviderPlatform.instance = prev;
+        resetDatabasePathForTesting();
+      });
+
+      fakeAsync((fake) {
+        Object? caughtError;
+        unawaited(
+          resolveDatabasePathForTesting().catchError((Object e) {
+            caughtError = e;
+            return ''; // ignored; satisfies the Future<String> return type
+          }),
+        );
+
+        // Advance past all five back-off delays: 200+500+1000+2000+4000 ms.
+        fake.elapse(
+          const Duration(milliseconds: 200 + 500 + 1000 + 2000 + 4000 + 1),
+        );
+
+        expect(caughtError, isA<PlatformException>());
+        expect(
+          (caughtError! as PlatformException).message,
+          contains('cannot open database'),
+        );
+      });
+    },
+  );
 }
Author	SHA1	Message	Date
Thomas SharedInboxandClaude Sonnet 4.6	9f657cd624	fix(ci): replace continue-on-error with conditional step execution (#172 ) Remove all continue-on-error usages from CI workflows: - deploy.yml: replace continue-on-error on SSH deploy steps with if: secrets.SSH_PRIVATE_KEY != '' so steps are skipped (not failed) when the secret is absent - windows-nightly.yml: remove continue-on-error from job and steps (job is already disabled via if: false) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-05-23 19:03:04 +02:00
Bot of Thomas Güttler	6adba9b001	perf: parallelize APK deploy and reduce fetch-depth in deploy.yml (#171 ) (#188 )	2026-05-23 18:55:08 +02:00
Bot of Thomas Güttler	11d9805fca	test: cover _resolveDatabasePath retry logic (#167 ) (#187 )	2026-05-23 18:35:15 +02:00
Thomas SharedInboxandClaude Sonnet 4.6	3019fdf145	refactor(deploy_cron): trigger Forgejo Actions workflow via fgj instead of deploying locally Replace local `task publish-website` invocation with `fgj actions workflow run website.yml` so the deploy runs in CI rather than on the local machine. Remove failure-tracking state files and issue-creation logic — Forgejo Actions handles its own reporting. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-05-23 17:42:20 +02:00
Bot of Thomas Güttler	14342f6472	fix: use exact grep patterns for build_runner and flutter pub get (#136 ) (#159 )	2026-05-23 17:25:08 +02:00
Bot of Thomas Güttler	b86c1a5c69	fix: verify Hugo binary SHA-256 checksum after download (#162 ) (#182 )	2026-05-23 17:10:11 +02:00