Compare commits

...
Author SHA1 Message Date
Thomas SharedInboxandClaude Sonnet 4.6 c343ed6bd7 feat: monitor agent loop health every 2 hours (#217)
- Track a heartbeat timestamp in ~/.sharedinbox-agent-heartbeat at the
  start of each _run_loop() invocation so we can tell when it last ran.
- Add `agent_loop.py monitor` subcommand that exits 1 with a WARNING
  message if the heartbeat is missing, corrupted, or older than 2 hours.
- Add .forgejo/workflows/monitor.yml scheduled workflow that runs the
  monitor check every 2 hours on the self-hosted runner; a CI failure
  serves as the warning when the loop is stalled.
- Add 7 unit tests covering all monitor / heartbeat scenarios.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-24 18:27:03 +02:00
Thomas SharedInboxandClaude Sonnet 4.6 1d5eb187bf fix: fall back to text input when mobile_scanner plugin is unavailable (#202)
On some Android builds the mobile_scanner native plugin is not registered,
causing a MissingPluginException when the send/receive screens try to open
the QR scanner.  Add a pre-flight _initScanner() method that starts and
immediately stops a temporary MobileScannerController in a try/catch; any
exception (including MissingPluginException) sets _scannerFailed=true and
the UI falls back to the existing copy-paste text-input flow instead of
leaving the user stuck with a blank camera view.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-24 14:47:15 +02:00
5 changed files with 197 additions and 6 deletions
+18
View File
@@ -0,0 +1,18 @@
name: Monitor Agent Loop
on:
schedule:
- cron: '0 */2 * * *' # every 2 hours
workflow_dispatch:
jobs:
monitor:
name: Check Agent Loop Health
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- name: Check agent loop heartbeat
run: python3 scripts/agent_loop.py monitor
+37 -4
View File
@@ -37,6 +37,9 @@ class _AccountReceiveScreenState extends ConsumerState<AccountReceiveScreen> {
bool _scannerActive = false;
MobileScannerController? _scannerController;
// True when the scanner plugin fails to initialise at runtime (e.g.
// MissingPluginException on some Android builds).
bool _scannerFailed = false;
@override
void initState() {
@@ -76,8 +79,35 @@ class _AccountReceiveScreenState extends ConsumerState<AccountReceiveScreen> {
setState(() {
_step = _Step.scanning;
_scannerActive = true;
_scannerController = MobileScannerController();
});
if (_cameraScanSupported()) {
unawaited(_initScanner());
}
}
// Pre-flight: start + stop the scanner to verify the plugin is available.
// Falls back to text entry on any exception (including MissingPluginException).
Future<void> _initScanner() async {
MobileScannerController? ctrl;
bool available = false;
try {
ctrl = MobileScannerController();
await ctrl.start();
await ctrl.stop();
available = true;
} catch (_) {
// Plugin not available on this device; text fallback will be shown.
} finally {
try {
await ctrl?.dispose();
} catch (_) {}
}
if (!mounted) return;
if (available) {
setState(() => _scannerController = MobileScannerController());
} else {
setState(() => _scannerFailed = true);
}
}
Future<void> _onScanned(String rawValue) async {
@@ -266,11 +296,14 @@ class _AccountReceiveScreenState extends ConsumerState<AccountReceiveScreen> {
}
Widget _buildScannerView(BuildContext context) {
// On platforms where the camera scanner is not available (Linux desktop),
// fall back to a text-input field.
if (!_cameraScanSupported()) {
// Fall back to text input when the platform has no camera support or when
// the scanner plugin fails to initialise at runtime (MissingPluginException).
if (!_cameraScanSupported() || _scannerFailed) {
return _buildTextFallbackView(context);
}
if (_scannerController == null) {
return const Center(child: CircularProgressIndicator());
}
return Stack(
children: [
+33 -2
View File
@@ -45,12 +45,40 @@ class _AccountSendScreenState extends ConsumerState<AccountSendScreen> {
bool _scannerActive = true;
MobileScannerController? _scannerController;
// True when the scanner plugin fails to initialise at runtime (e.g.
// MissingPluginException on some Android builds).
bool _scannerFailed = false;
@override
void initState() {
super.initState();
if (_cameraScanSupported()) {
_scannerController = MobileScannerController();
unawaited(_initScanner());
}
}
// Pre-flight: start + stop the scanner to verify the plugin is available.
// Falls back to text entry on any exception (including MissingPluginException).
Future<void> _initScanner() async {
MobileScannerController? ctrl;
bool available = false;
try {
ctrl = MobileScannerController();
await ctrl.start();
await ctrl.stop();
available = true;
} catch (_) {
// Plugin not available on this device; text fallback will be shown.
} finally {
try {
await ctrl?.dispose();
} catch (_) {}
}
if (!mounted) return;
if (available) {
setState(() => _scannerController = MobileScannerController());
} else {
setState(() => _scannerFailed = true);
}
}
@@ -178,9 +206,12 @@ class _AccountSendScreenState extends ConsumerState<AccountSendScreen> {
}
Widget _buildScanStep(BuildContext context) {
if (!_cameraScanSupported()) {
if (!_cameraScanSupported() || _scannerFailed) {
return _buildTextFallbackView(context);
}
if (_scannerController == null) {
return const Center(child: CircularProgressIndicator());
}
return Stack(
children: [
+43
View File
@@ -53,7 +53,9 @@ os.environ["PATH"] = (
REPO = "guettli/sharedinbox"
REPO_URL = f"https://codeberg.org/{REPO}"
STATE_FILE = Path.home() / ".sharedinbox-agent-state.json"
HEARTBEAT_FILE = Path.home() / ".sharedinbox-agent-heartbeat"
MAX_AGENT_AGE_SECONDS = 3600 # 1 hour
MAX_HEARTBEAT_AGE_SECONDS = 7200 # 2 hours
CLAUDE_PROJECTS_DIR = Path.home() / ".claude" / "projects" / (
"-" + str(Path.home())[1:].replace("/", "-")
)
@@ -273,6 +275,12 @@ def _clear_state() -> None:
STATE_FILE.unlink(missing_ok=True)
def _update_heartbeat() -> None:
"""Record that the agent loop ran right now."""
HEARTBEAT_FILE.write_text(datetime.now(timezone.utc).isoformat())
HEARTBEAT_FILE.chmod(0o600)
def _find_session_uuid(session_name: str) -> str | None:
"""Return the Claude session UUID for *session_name*, or None if not found.
@@ -442,12 +450,44 @@ def cmd_list() -> int:
return 0
# ── monitor subcommand ────────────────────────────────────────────────────────
def cmd_monitor() -> int:
"""Check that the agent loop has run within the last 2 hours.
Exits 0 if healthy, 1 if the heartbeat is missing or stale.
Intended to be called from a scheduled CI job or cron every 2 hours.
"""
if not HEARTBEAT_FILE.exists():
print(
f"WARNING: Agent loop heartbeat file missing — "
f"the loop may not have run yet or the file was deleted ({HEARTBEAT_FILE})."
)
return 1
try:
last_run = datetime.fromisoformat(HEARTBEAT_FILE.read_text().strip())
except ValueError:
print(f"WARNING: Agent loop heartbeat file is corrupted: {HEARTBEAT_FILE}")
return 1
age = (datetime.now(timezone.utc) - last_run).total_seconds()
if age > MAX_HEARTBEAT_AGE_SECONDS:
print(
f"WARNING: Agent loop last ran {age / 3600:.1f}h ago "
f"(limit: {MAX_HEARTBEAT_AGE_SECONDS // 3600}h) — the loop may be stalled."
)
return 1
print(f"Agent loop is healthy. Last run: {age / 60:.0f} min ago.")
return 0
# ── main flow ─────────────────────────────────────────────────────────────────
def _run_loop() -> int:
now = datetime.now(timezone.utc)
print(f"---------------------- Starting {now.strftime('%Y-%m-%d %H:%MZ')}")
_update_heartbeat()
state = _read_state()
@@ -795,10 +835,13 @@ def main() -> int:
parser = argparse.ArgumentParser(prog="agent_loop")
sub = parser.add_subparsers(dest="cmd")
sub.add_parser("list", help="List recent agent sessions")
sub.add_parser("monitor", help="Check that the loop ran within the last 2 hours")
args = parser.parse_args()
if args.cmd == "list":
return cmd_list()
if args.cmd == "monitor":
return cmd_monitor()
return _run_loop()
+66
View File
@@ -6,6 +6,7 @@ import json
import os
import tempfile
import unittest
from datetime import datetime, timedelta, timezone
from pathlib import Path
from unittest.mock import MagicMock, patch
@@ -732,5 +733,70 @@ class TestRunLoopResumeCommand(unittest.TestCase):
self.assertNotIn("Resume:", output)
class TestHeartbeat(unittest.TestCase):
"""Tests for _update_heartbeat() and cmd_monitor()."""
def setUp(self):
self._tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".heartbeat")
self._tmp.close()
self._orig = agent_loop.HEARTBEAT_FILE
agent_loop.HEARTBEAT_FILE = Path(self._tmp.name)
Path(self._tmp.name).unlink() # Start with no heartbeat file.
def tearDown(self):
agent_loop.HEARTBEAT_FILE = self._orig
Path(self._tmp.name).unlink(missing_ok=True)
def test_update_heartbeat_writes_timestamp(self):
agent_loop._update_heartbeat()
content = Path(self._tmp.name).read_text().strip()
dt = datetime.fromisoformat(content)
age = (datetime.now(timezone.utc) - dt).total_seconds()
self.assertLess(age, 5)
def test_update_heartbeat_creates_file(self):
self.assertFalse(Path(self._tmp.name).exists())
agent_loop._update_heartbeat()
self.assertTrue(Path(self._tmp.name).exists())
def test_monitor_healthy_when_recent(self):
agent_loop._update_heartbeat()
result = agent_loop.cmd_monitor()
self.assertEqual(result, 0)
def test_monitor_warns_when_heartbeat_missing(self):
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
result = agent_loop.cmd_monitor()
self.assertEqual(result, 1)
self.assertIn("WARNING", buf.getvalue())
def test_monitor_warns_when_stale(self):
stale = (datetime.now(timezone.utc) - timedelta(hours=3)).isoformat()
Path(self._tmp.name).write_text(stale)
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
result = agent_loop.cmd_monitor()
self.assertEqual(result, 1)
self.assertIn("WARNING", buf.getvalue())
def test_monitor_warns_when_corrupted(self):
Path(self._tmp.name).write_text("not-a-timestamp")
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
result = agent_loop.cmd_monitor()
self.assertEqual(result, 1)
self.assertIn("WARNING", buf.getvalue())
def test_run_loop_updates_heartbeat(self):
self.assertFalse(Path(self._tmp.name).exists())
with patch("agent_loop._read_state", return_value=None), \
patch("agent_loop._open_issue_prs", return_value=[]), \
patch("agent_loop._latest_main_ci_run", return_value=None), \
patch("agent_loop._ready_issues", return_value=[]):
agent_loop._run_loop()
self.assertTrue(Path(self._tmp.name).exists())
if __name__ == "__main__":
unittest.main()