From 400b5eb4ad597fae6015f844e33c8612bc4ed1e7 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Wed, 23 Jul 2025 03:00:38 +0530 Subject: [PATCH 1/6] [feat]: update intent analysis and response prompt to better support contributor recommendation tool --- .../agents/devrel/github/prompts/intent_analysis.py | 10 ++++++++-- backend/app/agents/devrel/prompts/response_prompt.py | 11 ++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/backend/app/agents/devrel/github/prompts/intent_analysis.py b/backend/app/agents/devrel/github/prompts/intent_analysis.py index eb3f9a6..4c1e281 100644 --- a/backend/app/agents/devrel/github/prompts/intent_analysis.py +++ b/backend/app/agents/devrel/github/prompts/intent_analysis.py @@ -2,7 +2,7 @@ AVAILABLE FUNCTIONS: - web_search: Search the web for information -- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate +- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate (supports both issue URLs and general queries) - repo_support: Questions about codebase structure, dependencies, impact analysis, architecture - issue_creation: Creating bug reports, feature requests, or tracking items - documentation_generation: Generating docs, READMEs, API docs, guides, or explanations @@ -12,7 +12,13 @@ USER QUERY: {user_query} Classification guidelines: -- contributor_recommendation: Finding reviewers, assignees, collaborators +- contributor_recommendation: + * "who should review this PR/issue?" + * "find experts in React/Python/ML" + * "recommend assignees for stripe integration" + * "best people for database optimization" + * URLs like github.com/owner/repo/issues/123 + * "I need help with RabbitMQ, can you suggest some people?" - repo_support: Code structure, dependencies, impact analysis, architecture - issue_creation: Creating bugs, features, tracking items - documentation_generation: Docs, READMEs, guides, explanations diff --git a/backend/app/agents/devrel/prompts/response_prompt.py b/backend/app/agents/devrel/prompts/response_prompt.py index 99291f3..4baa6b5 100644 --- a/backend/app/agents/devrel/prompts/response_prompt.py +++ b/backend/app/agents/devrel/prompts/response_prompt.py @@ -26,10 +26,18 @@ - Avoid complex markdown formatting like **bold** or *italic* - Use plain text with clear line breaks - Format links as plain URLs: https://example.com -- Use simple emojis for visual appeal: 🔗 📚 ⚡ +- Use simple emojis for visual appeal - Keep paragraphs short and scannable - Use "→" for arrows instead of markdown arrows +SPECIAL FORMATTING FOR CONTRIBUTOR RECOMMENDATIONS: +If the task result contains contributor recommendations: +- Start with "Found X Contributors" +- Show search query used and keywords +- For each contributor: "1. username (Score: X.XXX)" +- Include their expertise/reason for recommendation +- End with metadata about search and actionable guidance + Instructions: 1. Synthesize all information - Use reasoning process, tool results, and task results together 2. Address the user's needs - Focus on what they're trying to accomplish @@ -37,5 +45,6 @@ 4. Stay DevRel-focused - Be encouraging, helpful, and community-oriented 5. Reference sources - Mention what you researched or considered when relevant 6. Format for readability - Clean, simple text that displays well +7. For contributor recommendations - Use the special formatting above to show scores and details Create a helpful, comprehensive response:""" From c8bacc74f3783b0a4030ff9493d4539f29ac7cc2 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Wed, 23 Jul 2025 03:03:07 +0530 Subject: [PATCH 2/6] [refactor]: migrate user profiling logic to github/user --- backend/app/services/{ => github}/user/__init__.py | 0 backend/app/services/{ => github}/user/profiling.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename backend/app/services/{ => github}/user/__init__.py (100%) rename backend/app/services/{ => github}/user/profiling.py (100%) diff --git a/backend/app/services/user/__init__.py b/backend/app/services/github/user/__init__.py similarity index 100% rename from backend/app/services/user/__init__.py rename to backend/app/services/github/user/__init__.py diff --git a/backend/app/services/user/profiling.py b/backend/app/services/github/user/profiling.py similarity index 100% rename from backend/app/services/user/profiling.py rename to backend/app/services/github/user/profiling.py From 35fd68bf03b5dd5dd305493f1c18d3a49f2f42bc Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Wed, 23 Jul 2025 03:07:50 +0530 Subject: [PATCH 3/6] [feat]: implement github issue processor to scraper issue body, summarize and embed it --- .../issue_summarization.py | 20 +++++ .../app/services/github/issue_processor.py | 83 +++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 backend/app/agents/devrel/github/prompts/contributor_recommendation/issue_summarization.py create mode 100644 backend/app/services/github/issue_processor.py diff --git a/backend/app/agents/devrel/github/prompts/contributor_recommendation/issue_summarization.py b/backend/app/agents/devrel/github/prompts/contributor_recommendation/issue_summarization.py new file mode 100644 index 0000000..56424c8 --- /dev/null +++ b/backend/app/agents/devrel/github/prompts/contributor_recommendation/issue_summarization.py @@ -0,0 +1,20 @@ +ISSUE_SUMMARIZATION_PROMPT = """You are a technical analyst optimizing GitHub issues for contributor search. + +Analyze the provided GitHub issue and create a technical summary optimized for finding relevant expert contributors. + +Focus on: +- Core technical problem or feature request +- Specific technologies, frameworks, libraries, APIs mentioned +- Technical skills and expertise required to solve this +- Programming languages and tools involved +- System components affected (frontend, backend, database, etc.) + +Create a summary that reads like a job requirement for finding the right technical expert. + +**GitHub Issue Content:** +--- +{issue_content} +--- + +**Optimized Technical Summary for Contributor Search:** +""" diff --git a/backend/app/services/github/issue_processor.py b/backend/app/services/github/issue_processor.py new file mode 100644 index 0000000..f4319fb --- /dev/null +++ b/backend/app/services/github/issue_processor.py @@ -0,0 +1,83 @@ +import logging +from typing import List +from langchain_google_genai import ChatGoogleGenerativeAI +from langchain_core.messages import HumanMessage + +from app.core.config import settings +from app.services.embedding_service.service import EmbeddingService +from app.services.github.user.profiling import GitHubUserProfiler +from app.agents.devrel.github.prompts.contributor_recommendation.issue_summarization import ISSUE_SUMMARIZATION_PROMPT + +logger = logging.getLogger(__name__) + + +class GitHubIssueProcessor: + """ + A service to fetch, summarize, and embed a GitHub issue. + """ + + def __init__(self, owner: str, repo: str, issue_number: int): + self.owner = owner + self.repo = repo + self.issue_number = issue_number + self.summarizer_llm = ChatGoogleGenerativeAI( + model=settings.github_agent_model, + temperature=0.1, + google_api_key=settings.gemini_api_key + ) + self.embedding_service = EmbeddingService() + + async def _fetch_issue_content(self) -> str: + """ + Fetches and consolidates all text content from a GitHub issue. + """ + logger.info(f"Fetching content for {self.owner}/{self.repo}#{self.issue_number}") + async with GitHubUserProfiler() as profiler: + issue_url = f"{profiler.base_url}/repos/{self.owner}/{self.repo}/issues/{self.issue_number}" + comments_url = f"{issue_url}/comments" + + issue_data = await profiler._make_request(issue_url) + if not issue_data: + raise ValueError("Failed to fetch issue data.") + + content_parts = [ + f"Title: {issue_data['title']}", + f"Body: {issue_data['body']}", + ] + + comments_data = await profiler._make_request(comments_url) + if comments_data: + comment_texts = [ + f"Comment by {c['user']['login']}: {c['body']}" + for c in comments_data if c.get('body') + ] + content_parts.extend(comment_texts) + + return "\n\n---\n\n".join(content_parts) + + async def _summarize_context(self, content: str) -> str: + """Generates a technical summary of the issue content using an LLM.""" + logger.info(f"Summarizing issue content for {self.owner}/{self.repo}#{self.issue_number}") + prompt = ISSUE_SUMMARIZATION_PROMPT.format(issue_content=content) + response = await self.summarizer_llm.ainvoke([HumanMessage(content=prompt)]) + logger.info(f"Generated summary: {response.content.strip()[:100]}") + return response.content.strip() + + async def get_embedding_for_issue(self) -> List[float]: + """ + Orchestrates the entire process: fetch, summarize, and embed. + Returns a vector embedding representing the issue. + """ + try: + content = await self._fetch_issue_content() + if not content: + raise ValueError("Failed to fetch issue content.") + + summary = await self._summarize_context(content) + + logger.info("Embedding issue summary") + embedding = await self.embedding_service.get_embedding(summary) + return embedding + except Exception as e: + logger.error(f"Error processing issue {self.owner}/{self.repo}#{self.issue_number}: {str(e)}") + raise e From 52d4e5237c0b68afc5fc7848fc1541278e0bfe2b Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Wed, 23 Jul 2025 03:08:04 +0530 Subject: [PATCH 4/6] [feat]: changes to implement user background async profiling after github authorization --- backend/app/api/v1/auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/api/v1/auth.py b/backend/app/api/v1/auth.py index e70c5dc..738894b 100644 --- a/backend/app/api/v1/auth.py +++ b/backend/app/api/v1/auth.py @@ -2,7 +2,7 @@ from fastapi.responses import HTMLResponse from app.database.supabase.client import get_supabase_client from app.services.auth.verification import find_user_by_session_and_verify, get_verification_session_info -from app.services.user.profiling import profile_user_from_github +from app.services.github.user.profiling import profile_user_from_github from typing import Optional import logging import asyncio From 5cfa38f4e4ccdcd6236490ce1d53644a853fd4e4 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sun, 27 Jul 2025 05:08:45 +0530 Subject: [PATCH 5/6] [feat]: add contributor recommendation tool --- .../agents/devrel/github/github_toolkit.py | 5 +- .../query_alignment.py | 37 ++++ .../tools/contributor_recommendation.py | 169 ++++++++++++++++++ .../app/services/github/issue_processor.py | 4 +- 4 files changed, 210 insertions(+), 5 deletions(-) create mode 100644 backend/app/agents/devrel/github/prompts/contributor_recommendation/query_alignment.py diff --git a/backend/app/agents/devrel/github/github_toolkit.py b/backend/app/agents/devrel/github/github_toolkit.py index 1a874e9..b419812 100644 --- a/backend/app/agents/devrel/github/github_toolkit.py +++ b/backend/app/agents/devrel/github/github_toolkit.py @@ -6,7 +6,7 @@ from .prompts.intent_analysis import GITHUB_INTENT_ANALYSIS_PROMPT from .tools.search import handle_web_search # TODO: Implement all tools -# from .tools.contributor_recommendation import handle_contributor_recommendation +from .tools.contributor_recommendation import handle_contributor_recommendation # from .tools.repository_query import handle_repo_query # from .tools.issue_creation import handle_issue_creation # from .tools.documentation_generation import handle_documentation_generation @@ -102,8 +102,7 @@ async def execute(self, query: str) -> Dict[str, Any]: logger.info(f"Executing {classification} for query") if classification == "contributor_recommendation": - result = "Not implemented" - # result = await handle_contributor_recommendation(query) + result = await handle_contributor_recommendation(query) elif classification == "repo_support": result = "Not implemented" # result = await handle_repo_query(query) diff --git a/backend/app/agents/devrel/github/prompts/contributor_recommendation/query_alignment.py b/backend/app/agents/devrel/github/prompts/contributor_recommendation/query_alignment.py new file mode 100644 index 0000000..d525d12 --- /dev/null +++ b/backend/app/agents/devrel/github/prompts/contributor_recommendation/query_alignment.py @@ -0,0 +1,37 @@ +QUERY_ALIGNMENT_PROMPT = """Analyze this contributor recommendation request and process it for technical search: + +USER REQUEST: {query} + +Your task: +1. Extract the core technical requirements +2. Generate a clean, technical search query optimized for finding contributor profiles +3. Extract specific keywords that would appear in developer profiles (languages, frameworks, tools, domains) + +Guidelines: +- aligned_query: Convert user request into clear technical language that matches how developers describe their skills +- keywords: Extract 3-5 specific technical terms (React, Python, API, database, etc.) +- Focus on technologies, not business requirements +- Make it searchable against developer profiles and contribution history + +Example transformations: + +Input: "I need help with our Stripe payment integration issue" +Output: {{"query_type": "general", "aligned_query": "developer with payment processing and Stripe API integration experience", "keywords": ["Stripe", "payment", "API", "integration"], "technical_domain": "backend"}} + +Input: "Find experts for database optimization" +Output: {{"query_type": "general", "aligned_query": "backend developer with database performance optimization experience", "keywords": ["database", "optimization", "performance", "SQL"], "technical_domain": "backend"}} + +Input: "https://github.com/owner/repo/issues/123 - authentication bug" +Output: {{"query_type": "github_issue", "aligned_query": "developer with authentication and security implementation experience", "keywords": ["authentication", "security", "OAuth", "JWT"], "technical_domain": "security"}} + +IMPORTANT FORMATTING RULES: +- DO NOT use markdown formatting +- DO NOT wrap in code blocks (```) +- DO NOT add any text before or after the JSON +- DO NOT add explanations +- Return EXACTLY this format: {{"query_type": "...", "aligned_query": "...", "keywords": [...], "technical_domain": "..."}} + +Expected JSON schema: +{{"query_type": "github_issue" | "general", "aligned_query": "clean technical search text", "keywords": ["keyword1", "keyword2"], "technical_domain": "frontend|backend|fullstack|ml|devops|mobile|security|other"}} + +Return the JSON object only:""" diff --git a/backend/app/agents/devrel/github/tools/contributor_recommendation.py b/backend/app/agents/devrel/github/tools/contributor_recommendation.py index 8b13789..a80d9c7 100644 --- a/backend/app/agents/devrel/github/tools/contributor_recommendation.py +++ b/backend/app/agents/devrel/github/tools/contributor_recommendation.py @@ -1 +1,170 @@ +import logging +import re +from typing import Any, Dict +from urllib.parse import urlparse +from langchain_core.messages import HumanMessage +from langchain_google_genai import ChatGoogleGenerativeAI +from app.core.config import settings +from app.database.weaviate.operations import search_contributors +from app.services.github.issue_processor import GitHubIssueProcessor +from app.services.embedding_service.service import EmbeddingService +from ..prompts.contributor_recommendation.query_alignment import QUERY_ALIGNMENT_PROMPT + +logger = logging.getLogger(__name__) + +class ContributorRecommendationWorkflow: + """ + Contributor recommendation with proper query alignment for hybrid search. + """ + + def __init__(self): + self.query_alignment_llm = ChatGoogleGenerativeAI( + model=settings.github_agent_model, + temperature=0.1, + google_api_key=settings.gemini_api_key + ) + self.embedding_service = EmbeddingService() + + async def _align_user_request(self, query: str) -> Dict[str, Any]: + """ + Align user request into optimized format for hybrid search. + Extract clean technical query + keywords that match contributor profiles. + """ + logger.info("Aligning user request for hybrid search optimization") + + url_match = re.search(r'https?://github\.com/[\w-]+/[\w.-]+/issues/\d+', query) + + if url_match: + issue_content = await self._fetch_github_issue_content(url_match.group(0)) + full_query = f"{query}\n\nIssue content: {issue_content}" + else: + full_query = query + + prompt = QUERY_ALIGNMENT_PROMPT.format(query=full_query) + response = await self.query_alignment_llm.ainvoke([HumanMessage(content=prompt)]) + + try: + import json + print(response) + result = json.loads(response.content.strip()) + logger.info(f"Query aligned: '{result.get('aligned_query')}' with keywords: {result.get('keywords')}") + return result + except json.JSONDecodeError: + logger.warning("Failed to parse alignment result, using fallback") + return { + "query_type": "general", + "aligned_query": query, + "keywords": [], + "technical_domain": "other" + } + + async def _fetch_github_issue_content(self, github_url: str) -> str: + """Fetch GitHub issue content.""" + try: + parsed_url = urlparse(github_url) + path_parts = parsed_url.path.strip('/').split('/') + + if len(path_parts) >= 4 and path_parts[2] == "issues": + owner, repo, issue_number = path_parts[0], path_parts[1], int(path_parts[3]) + processor = GitHubIssueProcessor(owner, repo, issue_number) + + content = await processor.fetch_issue_content() + return content + else: + raise ValueError("Invalid GitHub issue URL") + + except Exception as e: + logger.error(f"GitHub issue fetching failed: {e}") + raise + +async def handle_contributor_recommendation(query: str) -> Dict[str, Any]: + """ + Main entry point with unified query processing. + """ + logger.info(f"Processing contributor recommendation: {query[:100]}...") + + try: + workflow = ContributorRecommendationWorkflow() + + alignment_result = await workflow._align_user_request(query) + search_text = alignment_result.get("aligned_query", query) + + logger.info("Generating embedding for semantic search") + enhanced_search_text = f"Looking for contributor with expertise in: {search_text}" + query_embedding = await workflow.embedding_service.get_embedding(enhanced_search_text) + logger.info(f"Generated embedding with dimension: {len(query_embedding)}") + + logger.info("Performing hybrid search (semantic + keyword matching)") + + results = await search_contributors( + query_embedding=query_embedding, + keywords=alignment_result.get("keywords", []), + limit=5, + vector_weight=0.7, # Semantic similarity + bm25_weight=0.3 # Keyword matching + ) + + logger.info(f"Search complete: Found {len(results)} potential contributors") + + if not results: + logger.info("No contributors found matching the search criteria") + return { + "status": "success", + "recommendations": [], + "message": "No suitable contributors found", + "search_query": search_text, + "keywords_used": alignment_result.get("keywords", []), + "technical_domain": alignment_result.get("technical_domain", "other") + } + + logger.info("Formatting recommendations with scores") + recommendations = [] + for contributor in results: + languages = contributor.get('languages', []) + topics = contributor.get('topics', []) + hybrid_score = contributor.get('hybrid_score', 0) + vector_score = contributor.get('vector_score', 0) + bm25_score = contributor.get('bm25_score', 0) + + reason_parts = [] + if languages: + reason_parts.append(f"Expert in {', '.join(languages)}") + if topics: + reason_parts.append(f"Active in {', '.join(topics)}") + + username = contributor.get("github_username") + recommendation = { + "user": username, + "reason": " • ".join(reason_parts) if reason_parts else "Strong technical match", + "search_score": round(hybrid_score, 4), + "vector_score": round(vector_score, 4), + "keyword_score": round(bm25_score, 4), + "languages": languages, + "topics": topics + } + + recommendations.append(recommendation) + logger.info( + f"@{username} (score: {hybrid_score:.4f}) - {reason_parts[0] if reason_parts else 'Technical match'}") + + logger.info(f"Successfully generated {len(recommendations)} contributor recommendations") + + return { + "status": "success", + "recommendations": recommendations, + "message": f"Found {len(recommendations)} suitable contributors", + "search_query": search_text, + "keywords_used": alignment_result.get("keywords", []), + "technical_domain": alignment_result.get("technical_domain", "other"), + "search_metadata": { + "total_candidates": len(results), + "vector_weight": 0.7, + "keyword_weight": 0.3, + "embedding_dimension": len(query_embedding) + } + } + + except Exception as e: + logger.error(f"Error in contributor recommendation: {str(e)}", exc_info=True) + return {"status": "error", "message": str(e)} diff --git a/backend/app/services/github/issue_processor.py b/backend/app/services/github/issue_processor.py index f4319fb..e4572ea 100644 --- a/backend/app/services/github/issue_processor.py +++ b/backend/app/services/github/issue_processor.py @@ -27,7 +27,7 @@ def __init__(self, owner: str, repo: str, issue_number: int): ) self.embedding_service = EmbeddingService() - async def _fetch_issue_content(self) -> str: + async def fetch_issue_content(self) -> str: """ Fetches and consolidates all text content from a GitHub issue. """ @@ -69,7 +69,7 @@ async def get_embedding_for_issue(self) -> List[float]: Returns a vector embedding representing the issue. """ try: - content = await self._fetch_issue_content() + content = await self.fetch_issue_content() if not content: raise ValueError("Failed to fetch issue content.") From 48b8b279dc42b546daca69c37fa07dcbc1715646 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sun, 27 Jul 2025 05:12:47 +0530 Subject: [PATCH 6/6] [refactor]: add public wrapper for private _make_request --- backend/app/services/github/issue_processor.py | 4 ++-- backend/app/services/github/user/profiling.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/backend/app/services/github/issue_processor.py b/backend/app/services/github/issue_processor.py index e4572ea..ca326ca 100644 --- a/backend/app/services/github/issue_processor.py +++ b/backend/app/services/github/issue_processor.py @@ -36,7 +36,7 @@ async def fetch_issue_content(self) -> str: issue_url = f"{profiler.base_url}/repos/{self.owner}/{self.repo}/issues/{self.issue_number}" comments_url = f"{issue_url}/comments" - issue_data = await profiler._make_request(issue_url) + issue_data = await profiler.request(issue_url) if not issue_data: raise ValueError("Failed to fetch issue data.") @@ -45,7 +45,7 @@ async def fetch_issue_content(self) -> str: f"Body: {issue_data['body']}", ] - comments_data = await profiler._make_request(comments_url) + comments_data = await profiler.request(comments_url) if comments_data: comment_texts = [ f"Comment by {c['user']['login']}: {c['body']}" diff --git a/backend/app/services/github/user/profiling.py b/backend/app/services/github/user/profiling.py index 0b66b5a..915c4cc 100644 --- a/backend/app/services/github/user/profiling.py +++ b/backend/app/services/github/user/profiling.py @@ -74,6 +74,10 @@ async def _make_request(self, url: str, params: Dict = None) -> Optional[Dict]: logger.error(f"Error making request to {url}: {str(e)}") return None + async def request(self, url: str, params: Dict | None = None) -> Optional[Dict]: + """Public, stable wrapper around the internal HTTP helper.""" + return await self._make_request(url, params) + async def get_user_data(self, github_username: str) -> Optional[Dict]: """Fetch user data""" url = f"{self.base_url}/users/{github_username}"