Skip to content

Commit 35fd68b

Browse files
committed
[feat]: implement github issue processor to scraper issue body, summarize and embed it
1 parent c8bacc7 commit 35fd68b

File tree

2 files changed

+103
-0
lines changed

2 files changed

+103
-0
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
ISSUE_SUMMARIZATION_PROMPT = """You are a technical analyst optimizing GitHub issues for contributor search.
2+
3+
Analyze the provided GitHub issue and create a technical summary optimized for finding relevant expert contributors.
4+
5+
Focus on:
6+
- Core technical problem or feature request
7+
- Specific technologies, frameworks, libraries, APIs mentioned
8+
- Technical skills and expertise required to solve this
9+
- Programming languages and tools involved
10+
- System components affected (frontend, backend, database, etc.)
11+
12+
Create a summary that reads like a job requirement for finding the right technical expert.
13+
14+
**GitHub Issue Content:**
15+
---
16+
{issue_content}
17+
---
18+
19+
**Optimized Technical Summary for Contributor Search:**
20+
"""
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import logging
2+
from typing import List
3+
from langchain_google_genai import ChatGoogleGenerativeAI
4+
from langchain_core.messages import HumanMessage
5+
6+
from app.core.config import settings
7+
from app.services.embedding_service.service import EmbeddingService
8+
from app.services.github.user.profiling import GitHubUserProfiler
9+
from app.agents.devrel.github.prompts.contributor_recommendation.issue_summarization import ISSUE_SUMMARIZATION_PROMPT
10+
11+
logger = logging.getLogger(__name__)
12+
13+
14+
class GitHubIssueProcessor:
15+
"""
16+
A service to fetch, summarize, and embed a GitHub issue.
17+
"""
18+
19+
def __init__(self, owner: str, repo: str, issue_number: int):
20+
self.owner = owner
21+
self.repo = repo
22+
self.issue_number = issue_number
23+
self.summarizer_llm = ChatGoogleGenerativeAI(
24+
model=settings.github_agent_model,
25+
temperature=0.1,
26+
google_api_key=settings.gemini_api_key
27+
)
28+
self.embedding_service = EmbeddingService()
29+
30+
async def _fetch_issue_content(self) -> str:
31+
"""
32+
Fetches and consolidates all text content from a GitHub issue.
33+
"""
34+
logger.info(f"Fetching content for {self.owner}/{self.repo}#{self.issue_number}")
35+
async with GitHubUserProfiler() as profiler:
36+
issue_url = f"{profiler.base_url}/repos/{self.owner}/{self.repo}/issues/{self.issue_number}"
37+
comments_url = f"{issue_url}/comments"
38+
39+
issue_data = await profiler._make_request(issue_url)
40+
if not issue_data:
41+
raise ValueError("Failed to fetch issue data.")
42+
43+
content_parts = [
44+
f"Title: {issue_data['title']}",
45+
f"Body: {issue_data['body']}",
46+
]
47+
48+
comments_data = await profiler._make_request(comments_url)
49+
if comments_data:
50+
comment_texts = [
51+
f"Comment by {c['user']['login']}: {c['body']}"
52+
for c in comments_data if c.get('body')
53+
]
54+
content_parts.extend(comment_texts)
55+
56+
return "\n\n---\n\n".join(content_parts)
57+
58+
async def _summarize_context(self, content: str) -> str:
59+
"""Generates a technical summary of the issue content using an LLM."""
60+
logger.info(f"Summarizing issue content for {self.owner}/{self.repo}#{self.issue_number}")
61+
prompt = ISSUE_SUMMARIZATION_PROMPT.format(issue_content=content)
62+
response = await self.summarizer_llm.ainvoke([HumanMessage(content=prompt)])
63+
logger.info(f"Generated summary: {response.content.strip()[:100]}")
64+
return response.content.strip()
65+
66+
async def get_embedding_for_issue(self) -> List[float]:
67+
"""
68+
Orchestrates the entire process: fetch, summarize, and embed.
69+
Returns a vector embedding representing the issue.
70+
"""
71+
try:
72+
content = await self._fetch_issue_content()
73+
if not content:
74+
raise ValueError("Failed to fetch issue content.")
75+
76+
summary = await self._summarize_context(content)
77+
78+
logger.info("Embedding issue summary")
79+
embedding = await self.embedding_service.get_embedding(summary)
80+
return embedding
81+
except Exception as e:
82+
logger.error(f"Error processing issue {self.owner}/{self.repo}#{self.issue_number}: {str(e)}")
83+
raise e

0 commit comments

Comments
 (0)