Compare commits

...
Author SHA1 Message Date
Thomas SharedInboxandClaude Sonnet 4.6 c343ed6bd7 feat: monitor agent loop health every 2 hours (#217)
- Track a heartbeat timestamp in ~/.sharedinbox-agent-heartbeat at the
  start of each _run_loop() invocation so we can tell when it last ran.
- Add `agent_loop.py monitor` subcommand that exits 1 with a WARNING
  message if the heartbeat is missing, corrupted, or older than 2 hours.
- Add .forgejo/workflows/monitor.yml scheduled workflow that runs the
  monitor check every 2 hours on the self-hosted runner; a CI failure
  serves as the warning when the loop is stalled.
- Add 7 unit tests covering all monitor / heartbeat scenarios.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-24 18:27:03 +02:00
Thomas SharedInboxandClaude Sonnet 4.6 1d5eb187bf fix: fall back to text input when mobile_scanner plugin is unavailable (#202)
On some Android builds the mobile_scanner native plugin is not registered,
causing a MissingPluginException when the send/receive screens try to open
the QR scanner.  Add a pre-flight _initScanner() method that starts and
immediately stops a temporary MobileScannerController in a try/catch; any
exception (including MissingPluginException) sets _scannerFailed=true and
the UI falls back to the existing copy-paste text-input flow instead of
leaving the user stuck with a blank camera view.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-24 14:47:15 +02:00
5 changed files with 197 additions and 6 deletions
+18
View File
@@ -0,0 +1,18 @@
name: Monitor Agent Loop
on:
schedule:
- cron: '0 */2 * * *' # every 2 hours
workflow_dispatch:
jobs:
monitor:
name: Check Agent Loop Health
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- name: Check agent loop heartbeat
run: python3 scripts/agent_loop.py monitor
+37 -4
View File
@@ -37,6 +37,9 @@ class _AccountReceiveScreenState extends ConsumerState<AccountReceiveScreen> {
bool _scannerActive = false; bool _scannerActive = false;
MobileScannerController? _scannerController; MobileScannerController? _scannerController;
// True when the scanner plugin fails to initialise at runtime (e.g.
// MissingPluginException on some Android builds).
bool _scannerFailed = false;
@override @override
void initState() { void initState() {
@@ -76,8 +79,35 @@ class _AccountReceiveScreenState extends ConsumerState<AccountReceiveScreen> {
setState(() { setState(() {
_step = _Step.scanning; _step = _Step.scanning;
_scannerActive = true; _scannerActive = true;
_scannerController = MobileScannerController();
}); });
if (_cameraScanSupported()) {
unawaited(_initScanner());
}
}
// Pre-flight: start + stop the scanner to verify the plugin is available.
// Falls back to text entry on any exception (including MissingPluginException).
Future<void> _initScanner() async {
MobileScannerController? ctrl;
bool available = false;
try {
ctrl = MobileScannerController();
await ctrl.start();
await ctrl.stop();
available = true;
} catch (_) {
// Plugin not available on this device; text fallback will be shown.
} finally {
try {
await ctrl?.dispose();
} catch (_) {}
}
if (!mounted) return;
if (available) {
setState(() => _scannerController = MobileScannerController());
} else {
setState(() => _scannerFailed = true);
}
} }
Future<void> _onScanned(String rawValue) async { Future<void> _onScanned(String rawValue) async {
@@ -266,11 +296,14 @@ class _AccountReceiveScreenState extends ConsumerState<AccountReceiveScreen> {
} }
Widget _buildScannerView(BuildContext context) { Widget _buildScannerView(BuildContext context) {
// On platforms where the camera scanner is not available (Linux desktop), // Fall back to text input when the platform has no camera support or when
// fall back to a text-input field. // the scanner plugin fails to initialise at runtime (MissingPluginException).
if (!_cameraScanSupported()) { if (!_cameraScanSupported() || _scannerFailed) {
return _buildTextFallbackView(context); return _buildTextFallbackView(context);
} }
if (_scannerController == null) {
return const Center(child: CircularProgressIndicator());
}
return Stack( return Stack(
children: [ children: [
+33 -2
View File
@@ -45,12 +45,40 @@ class _AccountSendScreenState extends ConsumerState<AccountSendScreen> {
bool _scannerActive = true; bool _scannerActive = true;
MobileScannerController? _scannerController; MobileScannerController? _scannerController;
// True when the scanner plugin fails to initialise at runtime (e.g.
// MissingPluginException on some Android builds).
bool _scannerFailed = false;
@override @override
void initState() { void initState() {
super.initState(); super.initState();
if (_cameraScanSupported()) { if (_cameraScanSupported()) {
_scannerController = MobileScannerController(); unawaited(_initScanner());
}
}
// Pre-flight: start + stop the scanner to verify the plugin is available.
// Falls back to text entry on any exception (including MissingPluginException).
Future<void> _initScanner() async {
MobileScannerController? ctrl;
bool available = false;
try {
ctrl = MobileScannerController();
await ctrl.start();
await ctrl.stop();
available = true;
} catch (_) {
// Plugin not available on this device; text fallback will be shown.
} finally {
try {
await ctrl?.dispose();
} catch (_) {}
}
if (!mounted) return;
if (available) {
setState(() => _scannerController = MobileScannerController());
} else {
setState(() => _scannerFailed = true);
} }
} }
@@ -178,9 +206,12 @@ class _AccountSendScreenState extends ConsumerState<AccountSendScreen> {
} }
Widget _buildScanStep(BuildContext context) { Widget _buildScanStep(BuildContext context) {
if (!_cameraScanSupported()) { if (!_cameraScanSupported() || _scannerFailed) {
return _buildTextFallbackView(context); return _buildTextFallbackView(context);
} }
if (_scannerController == null) {
return const Center(child: CircularProgressIndicator());
}
return Stack( return Stack(
children: [ children: [
+43
View File
@@ -53,7 +53,9 @@ os.environ["PATH"] = (
REPO = "guettli/sharedinbox" REPO = "guettli/sharedinbox"
REPO_URL = f"https://codeberg.org/{REPO}" REPO_URL = f"https://codeberg.org/{REPO}"
STATE_FILE = Path.home() / ".sharedinbox-agent-state.json" STATE_FILE = Path.home() / ".sharedinbox-agent-state.json"
HEARTBEAT_FILE = Path.home() / ".sharedinbox-agent-heartbeat"
MAX_AGENT_AGE_SECONDS = 3600 # 1 hour MAX_AGENT_AGE_SECONDS = 3600 # 1 hour
MAX_HEARTBEAT_AGE_SECONDS = 7200 # 2 hours
CLAUDE_PROJECTS_DIR = Path.home() / ".claude" / "projects" / ( CLAUDE_PROJECTS_DIR = Path.home() / ".claude" / "projects" / (
"-" + str(Path.home())[1:].replace("/", "-") "-" + str(Path.home())[1:].replace("/", "-")
) )
@@ -273,6 +275,12 @@ def _clear_state() -> None:
STATE_FILE.unlink(missing_ok=True) STATE_FILE.unlink(missing_ok=True)
def _update_heartbeat() -> None:
"""Record that the agent loop ran right now."""
HEARTBEAT_FILE.write_text(datetime.now(timezone.utc).isoformat())
HEARTBEAT_FILE.chmod(0o600)
def _find_session_uuid(session_name: str) -> str | None: def _find_session_uuid(session_name: str) -> str | None:
"""Return the Claude session UUID for *session_name*, or None if not found. """Return the Claude session UUID for *session_name*, or None if not found.
@@ -442,12 +450,44 @@ def cmd_list() -> int:
return 0 return 0
# ── monitor subcommand ────────────────────────────────────────────────────────
def cmd_monitor() -> int:
"""Check that the agent loop has run within the last 2 hours.
Exits 0 if healthy, 1 if the heartbeat is missing or stale.
Intended to be called from a scheduled CI job or cron every 2 hours.
"""
if not HEARTBEAT_FILE.exists():
print(
f"WARNING: Agent loop heartbeat file missing — "
f"the loop may not have run yet or the file was deleted ({HEARTBEAT_FILE})."
)
return 1
try:
last_run = datetime.fromisoformat(HEARTBEAT_FILE.read_text().strip())
except ValueError:
print(f"WARNING: Agent loop heartbeat file is corrupted: {HEARTBEAT_FILE}")
return 1
age = (datetime.now(timezone.utc) - last_run).total_seconds()
if age > MAX_HEARTBEAT_AGE_SECONDS:
print(
f"WARNING: Agent loop last ran {age / 3600:.1f}h ago "
f"(limit: {MAX_HEARTBEAT_AGE_SECONDS // 3600}h) — the loop may be stalled."
)
return 1
print(f"Agent loop is healthy. Last run: {age / 60:.0f} min ago.")
return 0
# ── main flow ───────────────────────────────────────────────────────────────── # ── main flow ─────────────────────────────────────────────────────────────────
def _run_loop() -> int: def _run_loop() -> int:
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
print(f"---------------------- Starting {now.strftime('%Y-%m-%d %H:%MZ')}") print(f"---------------------- Starting {now.strftime('%Y-%m-%d %H:%MZ')}")
_update_heartbeat()
state = _read_state() state = _read_state()
@@ -795,10 +835,13 @@ def main() -> int:
parser = argparse.ArgumentParser(prog="agent_loop") parser = argparse.ArgumentParser(prog="agent_loop")
sub = parser.add_subparsers(dest="cmd") sub = parser.add_subparsers(dest="cmd")
sub.add_parser("list", help="List recent agent sessions") sub.add_parser("list", help="List recent agent sessions")
sub.add_parser("monitor", help="Check that the loop ran within the last 2 hours")
args = parser.parse_args() args = parser.parse_args()
if args.cmd == "list": if args.cmd == "list":
return cmd_list() return cmd_list()
if args.cmd == "monitor":
return cmd_monitor()
return _run_loop() return _run_loop()
+66
View File
@@ -6,6 +6,7 @@ import json
import os import os
import tempfile import tempfile
import unittest import unittest
from datetime import datetime, timedelta, timezone
from pathlib import Path from pathlib import Path
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
@@ -732,5 +733,70 @@ class TestRunLoopResumeCommand(unittest.TestCase):
self.assertNotIn("Resume:", output) self.assertNotIn("Resume:", output)
class TestHeartbeat(unittest.TestCase):
"""Tests for _update_heartbeat() and cmd_monitor()."""
def setUp(self):
self._tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".heartbeat")
self._tmp.close()
self._orig = agent_loop.HEARTBEAT_FILE
agent_loop.HEARTBEAT_FILE = Path(self._tmp.name)
Path(self._tmp.name).unlink() # Start with no heartbeat file.
def tearDown(self):
agent_loop.HEARTBEAT_FILE = self._orig
Path(self._tmp.name).unlink(missing_ok=True)
def test_update_heartbeat_writes_timestamp(self):
agent_loop._update_heartbeat()
content = Path(self._tmp.name).read_text().strip()
dt = datetime.fromisoformat(content)
age = (datetime.now(timezone.utc) - dt).total_seconds()
self.assertLess(age, 5)
def test_update_heartbeat_creates_file(self):
self.assertFalse(Path(self._tmp.name).exists())
agent_loop._update_heartbeat()
self.assertTrue(Path(self._tmp.name).exists())
def test_monitor_healthy_when_recent(self):
agent_loop._update_heartbeat()
result = agent_loop.cmd_monitor()
self.assertEqual(result, 0)
def test_monitor_warns_when_heartbeat_missing(self):
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
result = agent_loop.cmd_monitor()
self.assertEqual(result, 1)
self.assertIn("WARNING", buf.getvalue())
def test_monitor_warns_when_stale(self):
stale = (datetime.now(timezone.utc) - timedelta(hours=3)).isoformat()
Path(self._tmp.name).write_text(stale)
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
result = agent_loop.cmd_monitor()
self.assertEqual(result, 1)
self.assertIn("WARNING", buf.getvalue())
def test_monitor_warns_when_corrupted(self):
Path(self._tmp.name).write_text("not-a-timestamp")
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
result = agent_loop.cmd_monitor()
self.assertEqual(result, 1)
self.assertIn("WARNING", buf.getvalue())
def test_run_loop_updates_heartbeat(self):
self.assertFalse(Path(self._tmp.name).exists())
with patch("agent_loop._read_state", return_value=None), \
patch("agent_loop._open_issue_prs", return_value=[]), \
patch("agent_loop._latest_main_ci_run", return_value=None), \
patch("agent_loop._ready_issues", return_value=[]):
agent_loop._run_loop()
self.assertTrue(Path(self._tmp.name).exists())
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()