"""PR deduplication service. Queries the agent_threads table to find which PRs from a given list have not yet been processed. This prevents the PR poller from re-triggering graph runs for PRs that already have an existing thread. """ from release_agent.models.pr import PRInfo _QUERY = """ SELECT pr_id, repo_name FROM agent_threads WHERE (pr_id, repo_name) IN ( SELECT unnest(%s::text[]), unnest(%s::text[]) ) """ async def find_unprocessed_prs(pool, prs: list[PRInfo]) -> list[PRInfo]: """Return the subset of prs that have no existing agent_threads record. A PR is considered already-processed if there exists a row in agent_threads with matching (pr_id, repo_name) pair. The SQL uses unnest to enforce pair-wise matching (not independent ANY on each column). Args: pool: Async psycopg connection pool. prs: List of PRInfo objects to check. Returns: A new list containing only PRs with no existing thread. Original list is not mutated. """ if not prs: return [] pr_ids = [p.pr_id for p in prs] repo_names = [p.repo_name for p in prs] async with pool.connection() as conn: async with conn.cursor() as cur: await cur.execute(_QUERY, (pr_ids, repo_names)) rows = await cur.fetchall() processed = {(str(row[0]), str(row[1])) for row in rows} return [p for p in prs if (p.pr_id, p.repo_name) not in processed]