Files
billo-release-agent/tests/tools/test_claude_review.py
Yaojia Wang f5c2733cfb feat: initial commit — Billo Release Agent (LangGraph)
LangGraph-based release automation agent with:
- PR discovery (webhook + polling)
- AI code review via Claude Code CLI (subscription-based)
- Auto-create Jira tickets for PRs without ticket ID
- Jira ticket lifecycle management (code review -> staging -> done)
- CI/CD pipeline trigger, polling, and approval gates
- Slack interactive messages with approval buttons
- Per-repo semantic versioning
- PostgreSQL persistence (threads, staging, releases)
- FastAPI API (webhooks, approvals, status, manual triggers)
- Docker Compose deployment

1069 tests, 95%+ coverage.
2026-03-24 17:38:23 +01:00

455 lines
17 KiB
Python

"""Tests for ClaudeReviewer using Claude Code CLI subprocess."""
import json
import pytest
from release_agent.models.review import ReviewResult
from release_agent.tools.claude_review import (
ClaudeReviewer,
_build_prompt,
_parse_cli_output,
_truncate_diff,
)
MAX_DIFF_CHARS = 100_000
# ---------------------------------------------------------------------------
# Helpers — fake subprocess runner
# ---------------------------------------------------------------------------
def _make_cli_output(
verdict: str = "approve",
summary: str = "LGTM",
issues: list | None = None,
) -> str:
"""Build a JSON string mimicking Claude Code CLI --output-format json."""
structured = {
"verdict": verdict,
"summary": summary,
"issues": issues or [],
}
return json.dumps({"result": "", "structured_output": structured})
def _make_subprocess_runner(
stdout: str = "",
stderr: str = "",
returncode: int = 0,
):
"""Return a fake run_subprocess callable that records calls."""
calls: list[dict] = []
async def fake_run(*, cmd, cwd, timeout):
calls.append({"cmd": cmd, "cwd": cwd, "timeout": timeout})
return (stdout, stderr, returncode)
return fake_run, calls
# ---------------------------------------------------------------------------
# _truncate_diff tests
# ---------------------------------------------------------------------------
class TestTruncateDiff:
def test_short_diff_not_truncated(self) -> None:
diff = "short diff"
assert _truncate_diff(diff) == diff
def test_exact_limit_not_truncated(self) -> None:
diff = "x" * MAX_DIFF_CHARS
assert _truncate_diff(diff) == diff
def test_over_limit_truncated(self) -> None:
diff = "x" * (MAX_DIFF_CHARS + 1000)
result = _truncate_diff(diff)
assert len(result) < len(diff)
assert "TRUNCATED" in result
# ---------------------------------------------------------------------------
# _build_prompt tests
# ---------------------------------------------------------------------------
class TestBuildPrompt:
def test_contains_pr_title(self) -> None:
prompt = _build_prompt(diff="d", pr_title="My Title", repo_name="repo")
assert "My Title" in prompt
def test_contains_repo_name(self) -> None:
prompt = _build_prompt(diff="d", pr_title="t", repo_name="my-repo")
assert "my-repo" in prompt
def test_contains_diff(self) -> None:
prompt = _build_prompt(diff="UNIQUE_DIFF", pr_title="t", repo_name="r")
assert "UNIQUE_DIFF" in prompt
# ---------------------------------------------------------------------------
# _parse_cli_output tests
# ---------------------------------------------------------------------------
class TestParseCliOutput:
def test_parses_structured_output(self) -> None:
stdout = _make_cli_output(verdict="approve", summary="Good")
result = _parse_cli_output(stdout)
assert isinstance(result, ReviewResult)
assert result.verdict == "approve"
assert result.summary == "Good"
def test_parses_request_changes(self) -> None:
stdout = _make_cli_output(
verdict="request_changes",
summary="Has issues",
issues=[{"severity": "blocker", "description": "SQL injection"}],
)
result = _parse_cli_output(stdout)
assert result.verdict == "request_changes"
assert len(result.issues) == 1
assert result.has_blockers is True
def test_parses_issues_with_optional_fields(self) -> None:
stdout = _make_cli_output(
verdict="request_changes",
summary="Issues found",
issues=[{
"severity": "warning",
"description": "Style issue",
"file_path": "src/foo.py",
"suggestion": "Fix it",
}],
)
result = _parse_cli_output(stdout)
assert result.issues[0].file_path == "src/foo.py"
assert result.issues[0].suggestion == "Fix it"
def test_empty_issues_no_blockers(self) -> None:
stdout = _make_cli_output(verdict="approve", summary="Clean", issues=[])
result = _parse_cli_output(stdout)
assert result.has_blockers is False
assert len(result.issues) == 0
def test_result_field_as_json_string(self) -> None:
"""When structured_output is absent, falls back to parsing result as JSON."""
inner = {"verdict": "approve", "summary": "OK", "issues": []}
stdout = json.dumps({"result": json.dumps(inner)})
result = _parse_cli_output(stdout)
assert result.verdict == "approve"
def test_invalid_json_raises(self) -> None:
with pytest.raises(ValueError, match="Failed to parse"):
_parse_cli_output("not json at all")
def test_missing_structured_output_and_result_raises(self) -> None:
with pytest.raises(ValueError, match="No structured_output"):
_parse_cli_output(json.dumps({"other": "data"}))
def test_non_dict_structured_output_raises(self) -> None:
stdout = json.dumps({"structured_output": ["not", "a", "dict"]})
with pytest.raises(ValueError, match="Expected dict"):
_parse_cli_output(stdout)
def test_result_is_non_json_string_raises(self) -> None:
stdout = json.dumps({"result": "just plain text, not json"})
with pytest.raises(ValueError, match="not valid JSON"):
_parse_cli_output(stdout)
# ---------------------------------------------------------------------------
# ClaudeReviewer construction tests
# ---------------------------------------------------------------------------
class TestClaudeReviewerConstruction:
def test_can_be_instantiated(self) -> None:
reviewer = ClaudeReviewer()
assert reviewer is not None
def test_custom_claude_cmd(self) -> None:
reviewer = ClaudeReviewer(claude_cmd="/usr/local/bin/claude")
assert reviewer._claude_cmd == "/usr/local/bin/claude"
def test_custom_timeout(self) -> None:
reviewer = ClaudeReviewer(timeout=60)
assert reviewer._timeout == 60
# ---------------------------------------------------------------------------
# review_pr tests
# ---------------------------------------------------------------------------
class TestReviewPr:
async def test_returns_review_result(self) -> None:
stdout = _make_cli_output(verdict="approve", summary="Looks good")
runner, _ = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
result = await reviewer.review_pr(
diff="diff --git a/foo.py ...",
pr_title="Fix bug",
repo_name="my-repo",
)
assert isinstance(result, ReviewResult)
assert result.verdict == "approve"
async def test_passes_cwd_to_subprocess(self) -> None:
stdout = _make_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.review_pr(
diff="diff",
pr_title="PR",
repo_name="repo",
cwd="/path/to/worktree",
)
assert calls[0]["cwd"] == "/path/to/worktree"
async def test_cmd_includes_claude_p(self) -> None:
stdout = _make_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.review_pr(diff="d", pr_title="t", repo_name="r")
cmd = calls[0]["cmd"]
assert cmd[0] == "claude"
assert "-p" in cmd
async def test_cmd_includes_output_format_json(self) -> None:
stdout = _make_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.review_pr(diff="d", pr_title="t", repo_name="r")
cmd = calls[0]["cmd"]
idx = cmd.index("--output-format")
assert cmd[idx + 1] == "json"
async def test_cmd_includes_json_schema(self) -> None:
stdout = _make_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.review_pr(diff="d", pr_title="t", repo_name="r")
cmd = calls[0]["cmd"]
assert "--json-schema" in cmd
async def test_cmd_includes_allowed_tools(self) -> None:
stdout = _make_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.review_pr(diff="d", pr_title="t", repo_name="r")
cmd = calls[0]["cmd"]
idx = cmd.index("--allowedTools")
assert "Read" in cmd[idx + 1]
async def test_cmd_includes_system_prompt(self) -> None:
stdout = _make_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.review_pr(diff="d", pr_title="t", repo_name="r")
cmd = calls[0]["cmd"]
assert "--system-prompt" in cmd
async def test_nonzero_exit_raises(self) -> None:
runner, _ = _make_subprocess_runner(
stdout="", stderr="error occurred", returncode=1
)
reviewer = ClaudeReviewer(run_subprocess=runner)
with pytest.raises(RuntimeError, match="exited with code 1"):
await reviewer.review_pr(diff="d", pr_title="t", repo_name="r")
async def test_timeout_passed_to_subprocess(self) -> None:
stdout = _make_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner, timeout=120)
await reviewer.review_pr(diff="d", pr_title="t", repo_name="r")
assert calls[0]["timeout"] == 120
async def test_pr_title_in_prompt(self) -> None:
stdout = _make_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.review_pr(
diff="d", pr_title="Specific Title", repo_name="r"
)
cmd = calls[0]["cmd"]
prompt = cmd[cmd.index("-p") + 1]
assert "Specific Title" in prompt
async def test_repo_name_in_prompt(self) -> None:
stdout = _make_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.review_pr(
diff="d", pr_title="t", repo_name="special-repo"
)
cmd = calls[0]["cmd"]
prompt = cmd[cmd.index("-p") + 1]
assert "special-repo" in prompt
async def test_cwd_none_when_not_provided(self) -> None:
stdout = _make_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.review_pr(diff="d", pr_title="t", repo_name="r")
assert calls[0]["cwd"] is None
async def test_request_changes_with_issues(self) -> None:
stdout = _make_cli_output(
verdict="request_changes",
summary="Problems found",
issues=[
{"severity": "blocker", "description": "Security flaw"},
{"severity": "warning", "description": "Missing docs"},
],
)
runner, _ = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
result = await reviewer.review_pr(diff="d", pr_title="t", repo_name="r")
assert result.verdict == "request_changes"
assert len(result.issues) == 2
assert result.has_blockers is True
# ---------------------------------------------------------------------------
# ClaudeReviewer.generate_ticket_content tests
# ---------------------------------------------------------------------------
def _make_ticket_cli_output(summary: str = "My summary", description: str = "My desc") -> str:
"""Build a JSON string mimicking Claude Code CLI output for ticket generation."""
structured = {"summary": summary, "description": description}
return json.dumps({"result": "", "structured_output": structured})
class TestGenerateTicketContent:
"""Tests for ClaudeReviewer.generate_ticket_content."""
async def test_returns_tuple_of_summary_and_description(self) -> None:
stdout = _make_ticket_cli_output(summary="Fix login bug", description="Detailed desc")
runner, _ = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
result = await reviewer.generate_ticket_content(
diff="edit: main.py", pr_title="Fix login", repo_name="backend"
)
assert isinstance(result, tuple)
assert len(result) == 2
async def test_returns_correct_summary(self) -> None:
stdout = _make_ticket_cli_output(summary="Implement OAuth2 login")
runner, _ = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
summary, _ = await reviewer.generate_ticket_content(
diff="d", pr_title="Add OAuth", repo_name="auth-service"
)
assert summary == "Implement OAuth2 login"
async def test_returns_correct_description(self) -> None:
stdout = _make_ticket_cli_output(description="This adds OAuth2 support for the login flow")
runner, _ = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
_, description = await reviewer.generate_ticket_content(
diff="d", pr_title="Add OAuth", repo_name="auth-service"
)
assert description == "This adds OAuth2 support for the login flow"
async def test_uses_json_schema_with_summary_and_description_fields(self) -> None:
stdout = _make_ticket_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.generate_ticket_content(diff="d", pr_title="t", repo_name="r")
cmd = calls[0]["cmd"]
# Verify --json-schema flag was used
assert "--json-schema" in cmd
schema_idx = cmd.index("--json-schema")
schema_json = cmd[schema_idx + 1]
schema = json.loads(schema_json)
assert "summary" in schema["properties"]
assert "description" in schema["properties"]
async def test_passes_pr_title_in_prompt(self) -> None:
stdout = _make_ticket_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.generate_ticket_content(
diff="d", pr_title="My Unique PR Title", repo_name="r"
)
cmd_str = " ".join(calls[0]["cmd"])
assert "My Unique PR Title" in cmd_str
async def test_passes_repo_name_in_prompt(self) -> None:
stdout = _make_ticket_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.generate_ticket_content(
diff="d", pr_title="t", repo_name="my-special-repo"
)
cmd_str = " ".join(calls[0]["cmd"])
assert "my-special-repo" in cmd_str
async def test_passes_cwd_to_subprocess(self) -> None:
stdout = _make_ticket_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.generate_ticket_content(
diff="d", pr_title="t", repo_name="r", cwd="/some/path"
)
assert calls[0]["cwd"] == "/some/path"
async def test_cwd_none_by_default(self) -> None:
stdout = _make_ticket_cli_output()
runner, calls = _make_subprocess_runner(stdout=stdout)
reviewer = ClaudeReviewer(run_subprocess=runner)
await reviewer.generate_ticket_content(diff="d", pr_title="t", repo_name="r")
assert calls[0]["cwd"] is None
async def test_raises_on_nonzero_exit_code(self) -> None:
runner, _ = _make_subprocess_runner(stdout="", stderr="Error", returncode=1)
reviewer = ClaudeReviewer(run_subprocess=runner)
with pytest.raises(RuntimeError, match="Claude CLI"):
await reviewer.generate_ticket_content(diff="d", pr_title="t", repo_name="r")
async def test_raises_on_invalid_json_output(self) -> None:
runner, _ = _make_subprocess_runner(stdout="not json at all")
reviewer = ClaudeReviewer(run_subprocess=runner)
with pytest.raises((ValueError, Exception)):
await reviewer.generate_ticket_content(diff="d", pr_title="t", repo_name="r")