Files
dotfiles/dotfiles/agents/skills/nixpkgs-review/scripts/mine_pr_feedback.py

209 lines
6.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Mine external feedback from recent GitHub PRs.
Examples:
python scripts/mine_pr_feedback.py --repo NixOS/nixpkgs --author colonelpanic8
python scripts/mine_pr_feedback.py --repo NixOS/nixpkgs --author colonelpanic8 --limit 30 --format json
"""
from __future__ import annotations
import argparse
import json
import subprocess
import sys
from collections import Counter
from concurrent.futures import ThreadPoolExecutor, as_completed
def run(cmd: list[str]) -> str:
proc = subprocess.run(cmd, capture_output=True, text=True)
if proc.returncode != 0:
raise RuntimeError(proc.stderr.strip() or f"command failed: {' '.join(cmd)}")
return proc.stdout
def gh_json(args: list[str]) -> object:
return json.loads(run(["gh", *args]))
def fetch_prs(repo: str, author: str, limit: int) -> list[dict]:
prs: dict[int, dict] = {}
for state in ("open", "closed"):
data = gh_json(
[
"search",
"prs",
"--repo",
repo,
"--author",
author,
"--limit",
str(max(limit, 30)),
"--state",
state,
"--json",
"number,title,state,closedAt,updatedAt,url",
]
)
for pr in data:
prs[pr["number"]] = pr
return sorted(
prs.values(),
key=lambda pr: (pr["updatedAt"], pr["number"]),
reverse=True,
)[:limit]
def fetch_feedback(repo: str, author: str, pr: dict) -> dict:
owner, name = repo.split("/", 1)
number = pr["number"]
def api(path: str) -> list[dict]:
return gh_json(["api", f"repos/{owner}/{name}/{path}", "--paginate"])
issue_comments = api(f"issues/{number}/comments")
review_comments = api(f"pulls/{number}/comments")
reviews = api(f"pulls/{number}/reviews")
comments = []
for comment in issue_comments:
login = comment["user"]["login"]
body = (comment.get("body") or "").strip()
if login != author and body:
comments.append({"kind": "issue", "user": login, "body": body})
for comment in review_comments:
login = comment["user"]["login"]
body = (comment.get("body") or "").strip()
if login != author and body:
comments.append(
{
"kind": "review_comment",
"user": login,
"body": body,
"path": comment.get("path"),
"line": comment.get("line"),
}
)
for review in reviews:
login = review["user"]["login"]
body = (review.get("body") or "").strip()
if login != author and body:
comments.append(
{
"kind": "review",
"user": login,
"body": body,
"state": review.get("state"),
}
)
return {**pr, "comments": comments}
def is_bot(login: str) -> bool:
return login.endswith("[bot]") or login in {"github-actions", "app/dependabot"}
def render_markdown(results: list[dict], include_bots: bool) -> str:
commenters = Counter()
kept = []
for pr in results:
comments = [
comment
for comment in pr["comments"]
if include_bots or not is_bot(comment["user"])
]
if comments:
kept.append({**pr, "comments": comments})
commenters.update(comment["user"] for comment in comments)
lines = [
"# PR Feedback Summary",
"",
f"- PRs scanned: {len(results)}",
f"- PRs with external feedback: {len(kept)}",
"",
"## Top commenters",
"",
]
for user, count in commenters.most_common(10):
lines.append(f"- `{user}`: {count}")
for pr in kept:
lines.extend(
[
"",
f"## PR #{pr['number']}: {pr['title']}",
"",
f"- URL: {pr['url']}",
f"- State: {pr['state']}",
"",
]
)
for comment in pr["comments"]:
body = comment["body"].replace("\r", " ").replace("\n", " ").strip()
snippet = body[:280] + ("..." if len(body) > 280 else "")
lines.append(f"- `{comment['user']}` `{comment['kind']}`: {snippet}")
return "\n".join(lines) + "\n"
def main() -> int:
parser = argparse.ArgumentParser(description="Collect review feedback from recent GitHub PRs.")
parser.add_argument("--repo", required=True, help="GitHub repo in owner/name form")
parser.add_argument("--author", required=True, help="PR author to inspect")
parser.add_argument("--limit", type=int, default=20, help="How many recent PRs to inspect")
parser.add_argument(
"--format",
choices=("markdown", "json"),
default="markdown",
help="Output format",
)
parser.add_argument(
"--include-bots",
action="store_true",
help="Keep bot comments in the output",
)
parser.add_argument(
"--workers",
type=int,
default=6,
help="Maximum concurrent GitHub API workers",
)
args = parser.parse_args()
try:
run(["gh", "auth", "status"])
except RuntimeError as err:
print(err, file=sys.stderr)
return 1
prs = fetch_prs(args.repo, args.author, args.limit)
results = []
with ThreadPoolExecutor(max_workers=args.workers) as pool:
futures = [pool.submit(fetch_feedback, args.repo, args.author, pr) for pr in prs]
for future in as_completed(futures):
results.append(future.result())
results.sort(key=lambda pr: (pr["updatedAt"], pr["number"]), reverse=True)
if args.format == "json":
if not args.include_bots:
for pr in results:
pr["comments"] = [
comment for comment in pr["comments"] if not is_bot(comment["user"])
]
json.dump(results, sys.stdout, indent=2)
sys.stdout.write("\n")
else:
sys.stdout.write(render_markdown(results, args.include_bots))
return 0
if __name__ == "__main__":
raise SystemExit(main())