refactor(agent-loop): remove tmux, run claude directly via Popen (#120)

Replace the tmux-based agent launcher with a direct subprocess.Popen
call. Claude sessions can't be attached to anyway, so the tmux layer
added complexity with no benefit. State now tracks a PID instead of a
tmux session name; liveness is checked with os.kill(pid, 0).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Thomas SharedInbox
2026-05-17 08:00:39 +02:00
co-authored by Claude Sonnet 4.6
parent 01409a164b
commit 666c42ce1c
2 changed files with 176 additions and 61 deletions
+44 -61
View File
@@ -15,14 +15,13 @@ Flow
d. No Ready issues → print "nothing to do", exit 0
State file: ~/.sharedinbox-agent-state.json
{ "tmux_session": "issue-91", "issue": 91,
{ "pid": 12345, "issue": 91,
"started_at": "2026-05-15T12:00:00+00:00", "type": "issue" }
The agent runs inside a detached tmux session so you can watch it live or
resume the Claude conversation afterward:
Output is written to ~/.sharedinbox-agent-logs/<session>-<timestamp>.log.
Resume the Claude conversation afterward with:
tmux attach -t issue-91 # watch while running
claude --resume issue-91 # continue the conversation later
claude --resume issue-91
"""
import json
@@ -138,11 +137,11 @@ def _read_state() -> dict | None:
return None
def _write_state(tmux_session: str, issue: int | None, kind: str) -> None:
def _write_state(pid: int, issue: int | None, kind: str) -> None:
STATE_FILE.write_text(
json.dumps(
{
"tmux_session": tmux_session,
"pid": pid,
"issue": issue,
"started_at": datetime.now(timezone.utc).isoformat(),
"type": kind,
@@ -159,60 +158,48 @@ def _clear_state() -> None:
# ── agent launcher ────────────────────────────────────────────────────────────
def _start_agent(prompt: str, session_name: str) -> str:
"""
Start Claude Code inside a detached tmux session and return the session name.
The session inherits the tmux server's environment (including ANTHROPIC_API_KEY
and any keychain access), which is more reliable than cron's minimal env.
Output is written to both the tmux scrollback buffer and a log file via tee.
"""
def _start_agent(prompt: str, session_name: str) -> int:
"""Start Claude Code as a detached background process and return its PID."""
log_dir = Path.home() / ".sharedinbox-agent-logs"
log_dir.mkdir(exist_ok=True)
ts = datetime.now().strftime("%Y%m%dT%H%M%S")
log_file = log_dir / f"{session_name}-{ts}.log"
# Kill any stale session with this name before creating a new one.
subprocess.run(["tmux", "kill-session", "-t", session_name], capture_output=True)
log_fh = open(log_file, "w")
proc = subprocess.Popen(
[
"claude",
"--dangerously-skip-permissions",
"--name", session_name,
"-p", prompt,
],
stdin=subprocess.PIPE,
stdout=log_fh,
stderr=log_fh,
start_new_session=True,
)
log_fh.close() # Parent closes its copy; the child retains the fd.
# Answer the workspace-trust dialog; after this the pipe hits EOF.
proc.stdin.write(b"\n")
proc.stdin.close()
# printf '\n' answers the workspace-trust dialog (press Enter to confirm the
# default "Yes, I trust this folder") when claude shows it despite -p mode.
# After that newline, stdin hits EOF, which -p mode ignores.
shell_cmd = (
f"printf '\\n' | claude --dangerously-skip-permissions"
f" --name {shlex.quote(session_name)}"
f" -p {shlex.quote(prompt)}"
f" 2>&1 | tee {shlex.quote(str(log_file))}"
)
subprocess.run(
["tmux", "new-session", "-d", "-s", session_name, "bash", "-c", shell_cmd],
check=True,
)
print(f"[agent_loop] Started tmux session={session_name!r}, log={log_file}")
print(f"[agent_loop] Watch: tmux attach -t {shlex.quote(session_name)}")
print(f"[agent_loop] Started agent pid={proc.pid}, log={log_file}")
print(f"[agent_loop] Resume: claude --resume {shlex.quote(session_name)}")
return session_name
return proc.pid
def _agent_alive(state: dict) -> bool:
"""Return True if the agent's tmux session is still running."""
session = state.get("tmux_session")
if session:
r = subprocess.run(
["tmux", "has-session", "-t", session], capture_output=True
)
return r.returncode == 0
# Backward compat: old state files stored a pid instead of a tmux session.
"""Return True if the agent process is still running."""
pid = state.get("pid")
if pid is not None:
try:
os.kill(pid, 0)
return True
except ProcessLookupError:
return False
except PermissionError:
return True
return False
if pid is None:
return False
try:
os.kill(pid, 0)
return True
except ProcessLookupError:
return False
except PermissionError:
return True
def _agent_age_seconds(state: dict) -> float:
@@ -226,10 +213,6 @@ def _agent_age_seconds(state: dict) -> float:
def _kill_agent(state: dict) -> None:
"""Forcefully stop the running agent."""
session = state.get("tmux_session")
if session:
subprocess.run(["tmux", "kill-session", "-t", session], capture_output=True)
return
pid = state.get("pid")
if pid:
try:
@@ -249,11 +232,11 @@ def main() -> int:
age = _agent_age_seconds(state)
issue = state.get("issue")
kind = state.get("type", "issue")
session = state.get("tmux_session", state.get("pid", "?"))
pid = state.get("pid", "?")
if age > MAX_AGENT_AGE_SECONDS:
print(
f"[agent_loop] Agent session={session!r} (issue #{issue}) "
f"[agent_loop] Agent pid={pid!r} (issue #{issue}) "
f"has been running for {age/60:.0f} min — aborting."
)
_kill_agent(state)
@@ -264,7 +247,7 @@ def main() -> int:
return 1
print(
f"[agent_loop] Agent session={session!r} ({kind}, issue #{issue}) "
f"[agent_loop] Agent pid={pid!r} ({kind}, issue #{issue}) "
f"still running ({age/60:.0f} min). Waiting."
)
return 0
@@ -290,8 +273,8 @@ def main() -> int:
"Verify locally with 'task check' before pushing. "
"When done, stop."
)
session_name = _start_agent(prompt, "ci-fix")
_write_state(session_name, None, "ci-fix")
pid = _start_agent(prompt, "ci-fix")
_write_state(pid, None, "ci-fix")
return 0
# CI is ok (or no run) — find a Ready issue.
@@ -334,8 +317,8 @@ Instructions:
- When the work is done and pushed, close the issue and stop.
"""
session_name = _start_agent(prompt, f"issue-{issue_number}")
_write_state(session_name, issue_number, "issue")
pid = _start_agent(prompt, f"issue-{issue_number}")
_write_state(pid, issue_number, "issue")
return 0
+132
View File
@@ -0,0 +1,132 @@
#!/usr/bin/env python3
"""Tests for agent_loop.py."""
import io
import json
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import MagicMock, patch
import sys
sys.path.insert(0, str(Path(__file__).parent))
import agent_loop
class TestStateFile(unittest.TestCase):
def setUp(self):
self._tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
self._tmp.close()
self._orig = agent_loop.STATE_FILE
agent_loop.STATE_FILE = Path(self._tmp.name)
Path(self._tmp.name).unlink() # Start with no state file.
def tearDown(self):
agent_loop.STATE_FILE = self._orig
Path(self._tmp.name).unlink(missing_ok=True)
def test_write_state_stores_pid(self):
agent_loop._write_state(12345, 91, "issue")
data = json.loads(Path(self._tmp.name).read_text())
self.assertEqual(data["pid"], 12345)
self.assertNotIn("tmux_session", data)
def test_write_state_stores_issue_and_kind(self):
agent_loop._write_state(99, 7, "ci-fix")
data = json.loads(Path(self._tmp.name).read_text())
self.assertEqual(data["issue"], 7)
self.assertEqual(data["type"], "ci-fix")
self.assertIn("started_at", data)
def test_read_state_returns_none_when_missing(self):
self.assertIsNone(agent_loop._read_state())
def test_read_and_write_roundtrip(self):
agent_loop._write_state(42, 10, "issue")
state = agent_loop._read_state()
self.assertIsNotNone(state)
self.assertEqual(state["pid"], 42)
self.assertEqual(state["issue"], 10)
def test_clear_state_removes_file(self):
agent_loop._write_state(1, None, "ci-fix")
agent_loop._clear_state()
self.assertIsNone(agent_loop._read_state())
class TestAgentAlive(unittest.TestCase):
def test_own_pid_is_alive(self):
self.assertTrue(agent_loop._agent_alive({"pid": os.getpid()}))
def test_nonexistent_pid_is_dead(self):
self.assertFalse(agent_loop._agent_alive({"pid": 999999999}))
def test_missing_pid_returns_false(self):
self.assertFalse(agent_loop._agent_alive({}))
self.assertFalse(agent_loop._agent_alive({"pid": None}))
class TestKillAgent(unittest.TestCase):
def test_kill_sends_sigkill(self):
with patch("agent_loop.os.kill") as mock_kill:
agent_loop._kill_agent({"pid": 1234})
mock_kill.assert_called_once_with(1234, 9)
def test_kill_ignores_missing_process(self):
with patch("agent_loop.os.kill", side_effect=ProcessLookupError):
agent_loop._kill_agent({"pid": 1234}) # Should not raise.
def test_kill_noop_when_no_pid(self):
with patch("agent_loop.os.kill") as mock_kill:
agent_loop._kill_agent({})
mock_kill.assert_not_called()
class TestStartAgent(unittest.TestCase):
def _make_mock_proc(self, pid=42):
proc = MagicMock()
proc.pid = pid
proc.stdin = io.BytesIO()
return proc
def test_start_agent_returns_pid(self):
mock_proc = self._make_mock_proc(pid=42)
with tempfile.TemporaryDirectory() as tmpdir:
with patch("agent_loop.subprocess.Popen", return_value=mock_proc):
with patch.object(agent_loop.Path, "home", return_value=Path(tmpdir)):
result = agent_loop._start_agent("do something", "issue-99")
self.assertEqual(result, 42)
def test_start_agent_uses_popen_not_tmux(self):
mock_proc = self._make_mock_proc(pid=7)
with tempfile.TemporaryDirectory() as tmpdir:
with patch("agent_loop.subprocess.Popen", return_value=mock_proc) as mock_popen:
with patch("agent_loop.subprocess.run") as mock_run:
with patch.object(agent_loop.Path, "home", return_value=Path(tmpdir)):
agent_loop._start_agent("prompt", "ci-fix")
mock_popen.assert_called_once()
mock_run.assert_not_called()
def test_start_agent_passes_session_name_to_claude(self):
mock_proc = self._make_mock_proc(pid=7)
with tempfile.TemporaryDirectory() as tmpdir:
with patch("agent_loop.subprocess.Popen", return_value=mock_proc) as mock_popen:
with patch.object(agent_loop.Path, "home", return_value=Path(tmpdir)):
agent_loop._start_agent("prompt", "issue-55")
cmd = mock_popen.call_args[0][0]
self.assertIn("issue-55", cmd)
self.assertIn("claude", cmd[0])
def test_start_agent_uses_start_new_session(self):
mock_proc = self._make_mock_proc(pid=7)
with tempfile.TemporaryDirectory() as tmpdir:
with patch("agent_loop.subprocess.Popen", return_value=mock_proc) as mock_popen:
with patch.object(agent_loop.Path, "home", return_value=Path(tmpdir)):
agent_loop._start_agent("prompt", "issue-55")
kwargs = mock_popen.call_args[1]
self.assertTrue(kwargs.get("start_new_session"))
if __name__ == "__main__":
unittest.main()