|
| 1 | +import logging |
| 2 | +from typing import List |
| 3 | +from langchain_google_genai import ChatGoogleGenerativeAI |
| 4 | +from langchain_core.messages import HumanMessage |
| 5 | + |
| 6 | +from app.core.config import settings |
| 7 | +from app.services.embedding_service.service import EmbeddingService |
| 8 | +from app.services.github.user.profiling import GitHubUserProfiler |
| 9 | +from app.agents.devrel.github.prompts.contributor_recommendation.issue_summarization import ISSUE_SUMMARIZATION_PROMPT |
| 10 | + |
| 11 | +logger = logging.getLogger(__name__) |
| 12 | + |
| 13 | + |
| 14 | +class GitHubIssueProcessor: |
| 15 | + """ |
| 16 | + A service to fetch, summarize, and embed a GitHub issue. |
| 17 | + """ |
| 18 | + |
| 19 | + def __init__(self, owner: str, repo: str, issue_number: int): |
| 20 | + self.owner = owner |
| 21 | + self.repo = repo |
| 22 | + self.issue_number = issue_number |
| 23 | + self.summarizer_llm = ChatGoogleGenerativeAI( |
| 24 | + model=settings.github_agent_model, |
| 25 | + temperature=0.1, |
| 26 | + google_api_key=settings.gemini_api_key |
| 27 | + ) |
| 28 | + self.embedding_service = EmbeddingService() |
| 29 | + |
| 30 | + async def _fetch_issue_content(self) -> str: |
| 31 | + """ |
| 32 | + Fetches and consolidates all text content from a GitHub issue. |
| 33 | + """ |
| 34 | + logger.info(f"Fetching content for {self.owner}/{self.repo}#{self.issue_number}") |
| 35 | + async with GitHubUserProfiler() as profiler: |
| 36 | + issue_url = f"{profiler.base_url}/repos/{self.owner}/{self.repo}/issues/{self.issue_number}" |
| 37 | + comments_url = f"{issue_url}/comments" |
| 38 | + |
| 39 | + issue_data = await profiler._make_request(issue_url) |
| 40 | + if not issue_data: |
| 41 | + raise ValueError("Failed to fetch issue data.") |
| 42 | + |
| 43 | + content_parts = [ |
| 44 | + f"Title: {issue_data['title']}", |
| 45 | + f"Body: {issue_data['body']}", |
| 46 | + ] |
| 47 | + |
| 48 | + comments_data = await profiler._make_request(comments_url) |
| 49 | + if comments_data: |
| 50 | + comment_texts = [ |
| 51 | + f"Comment by {c['user']['login']}: {c['body']}" |
| 52 | + for c in comments_data if c.get('body') |
| 53 | + ] |
| 54 | + content_parts.extend(comment_texts) |
| 55 | + |
| 56 | + return "\n\n---\n\n".join(content_parts) |
| 57 | + |
| 58 | + async def _summarize_context(self, content: str) -> str: |
| 59 | + """Generates a technical summary of the issue content using an LLM.""" |
| 60 | + logger.info(f"Summarizing issue content for {self.owner}/{self.repo}#{self.issue_number}") |
| 61 | + prompt = ISSUE_SUMMARIZATION_PROMPT.format(issue_content=content) |
| 62 | + response = await self.summarizer_llm.ainvoke([HumanMessage(content=prompt)]) |
| 63 | + logger.info(f"Generated summary: {response.content.strip()[:100]}") |
| 64 | + return response.content.strip() |
| 65 | + |
| 66 | + async def get_embedding_for_issue(self) -> List[float]: |
| 67 | + """ |
| 68 | + Orchestrates the entire process: fetch, summarize, and embed. |
| 69 | + Returns a vector embedding representing the issue. |
| 70 | + """ |
| 71 | + try: |
| 72 | + content = await self._fetch_issue_content() |
| 73 | + if not content: |
| 74 | + raise ValueError("Failed to fetch issue content.") |
| 75 | + |
| 76 | + summary = await self._summarize_context(content) |
| 77 | + |
| 78 | + logger.info("Embedding issue summary") |
| 79 | + embedding = await self.embedding_service.get_embedding(summary) |
| 80 | + return embedding |
| 81 | + except Exception as e: |
| 82 | + logger.error(f"Error processing issue {self.owner}/{self.repo}#{self.issue_number}: {str(e)}") |
| 83 | + raise e |
0 commit comments