Skip to content

Commit 1bf9e8b

Browse files
committed
[feat]: add contributor recommendation tool
1 parent 52d4e52 commit 1bf9e8b

File tree

3 files changed

+208
-3
lines changed

3 files changed

+208
-3
lines changed

backend/app/agents/devrel/github/github_toolkit.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from .prompts.intent_analysis import GITHUB_INTENT_ANALYSIS_PROMPT
77
from .tools.search import handle_web_search
88
# TODO: Implement all tools
9-
# from .tools.contributor_recommendation import handle_contributor_recommendation
9+
from .tools.contributor_recommendation import handle_contributor_recommendation
1010
# from .tools.repository_query import handle_repo_query
1111
# from .tools.issue_creation import handle_issue_creation
1212
# from .tools.documentation_generation import handle_documentation_generation
@@ -102,8 +102,7 @@ async def execute(self, query: str) -> Dict[str, Any]:
102102
logger.info(f"Executing {classification} for query")
103103

104104
if classification == "contributor_recommendation":
105-
result = "Not implemented"
106-
# result = await handle_contributor_recommendation(query)
105+
result = await handle_contributor_recommendation(query)
107106
elif classification == "repo_support":
108107
result = "Not implemented"
109108
# result = await handle_repo_query(query)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
QUERY_ALIGNMENT_PROMPT = """Analyze this contributor recommendation request and process it for technical search:
2+
3+
USER REQUEST: {query}
4+
5+
Your task:
6+
1. Extract the core technical requirements
7+
2. Generate a clean, technical search query optimized for finding contributor profiles
8+
3. Extract specific keywords that would appear in developer profiles (languages, frameworks, tools, domains)
9+
10+
Guidelines:
11+
- aligned_query: Convert user request into clear technical language that matches how developers describe their skills
12+
- keywords: Extract 3-5 specific technical terms (React, Python, API, database, etc.)
13+
- Focus on technologies, not business requirements
14+
- Make it searchable against developer profiles and contribution history
15+
16+
Example transformations:
17+
18+
Input: "I need help with our Stripe payment integration issue"
19+
Output: {{"query_type": "general", "aligned_query": "developer with payment processing and Stripe API integration experience", "keywords": ["Stripe", "payment", "API", "integration"], "technical_domain": "backend"}}
20+
21+
Input: "Find experts for database optimization"
22+
Output: {{"query_type": "general", "aligned_query": "backend developer with database performance optimization experience", "keywords": ["database", "optimization", "performance", "SQL"], "technical_domain": "backend"}}
23+
24+
Input: "https://github.com/owner/repo/issues/123 - authentication bug"
25+
Output: {{"query_type": "github_issue", "aligned_query": "developer with authentication and security implementation experience", "keywords": ["authentication", "security", "OAuth", "JWT"], "technical_domain": "security"}}
26+
27+
IMPORTANT FORMATTING RULES:
28+
- DO NOT use markdown formatting
29+
- DO NOT wrap in code blocks (```)
30+
- DO NOT add any text before or after the JSON
31+
- DO NOT add explanations
32+
- Return EXACTLY this format: {{"query_type": "...", "aligned_query": "...", "keywords": [...], "technical_domain": "..."}}
33+
34+
Expected JSON schema:
35+
{{"query_type": "github_issue" | "general", "aligned_query": "clean technical search text", "keywords": ["keyword1", "keyword2"], "technical_domain": "frontend|backend|fullstack|ml|devops|mobile|security|other"}}
36+
37+
Return the JSON object only:"""
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,170 @@
1+
import logging
2+
import re
3+
from typing import Any, Dict
4+
from urllib.parse import urlparse
5+
from langchain_core.messages import HumanMessage
6+
from langchain_google_genai import ChatGoogleGenerativeAI
17

8+
from app.core.config import settings
9+
from app.database.weaviate.operations import search_contributors
10+
from app.services.github.issue_processor import GitHubIssueProcessor
11+
from app.services.embedding_service.service import EmbeddingService
12+
from ..prompts.contributor_recommendation.query_alignment import QUERY_ALIGNMENT_PROMPT
13+
14+
logger = logging.getLogger(__name__)
15+
16+
class ContributorRecommendationWorkflow:
17+
"""
18+
Contributor recommendation with proper query alignment for hybrid search.
19+
"""
20+
21+
def __init__(self):
22+
self.query_alignment_llm = ChatGoogleGenerativeAI(
23+
model=settings.github_agent_model,
24+
temperature=0.1,
25+
google_api_key=settings.gemini_api_key
26+
)
27+
self.embedding_service = EmbeddingService()
28+
29+
async def _align_user_request(self, query: str) -> Dict[str, Any]:
30+
"""
31+
Align user request into optimized format for hybrid search.
32+
Extract clean technical query + keywords that match contributor profiles.
33+
"""
34+
logger.info("Aligning user request for hybrid search optimization")
35+
36+
url_match = re.search(r'https?://github\.com/[\w-]+/[\w.-]+/issues/\d+', query)
37+
38+
if url_match:
39+
issue_content = await self._fetch_github_issue_content(url_match.group(0))
40+
full_query = f"{query}\n\nIssue content: {issue_content}"
41+
else:
42+
full_query = query
43+
44+
prompt = QUERY_ALIGNMENT_PROMPT.format(query=full_query)
45+
response = await self.query_alignment_llm.ainvoke([HumanMessage(content=prompt)])
46+
47+
try:
48+
import json
49+
print(response)
50+
result = json.loads(response.content.strip())
51+
logger.info(f"Query aligned: '{result.get('aligned_query')}' with keywords: {result.get('keywords')}")
52+
return result
53+
except json.JSONDecodeError:
54+
logger.warning("Failed to parse alignment result, using fallback")
55+
return {
56+
"query_type": "general",
57+
"aligned_query": query,
58+
"keywords": [],
59+
"technical_domain": "other"
60+
}
61+
62+
async def _fetch_github_issue_content(self, github_url: str) -> str:
63+
"""Fetch GitHub issue content."""
64+
try:
65+
parsed_url = urlparse(github_url)
66+
path_parts = parsed_url.path.strip('/').split('/')
67+
68+
if len(path_parts) >= 4 and path_parts[2] == "issues":
69+
owner, repo, issue_number = path_parts[0], path_parts[1], int(path_parts[3])
70+
processor = GitHubIssueProcessor(owner, repo, issue_number)
71+
72+
content = await processor._fetch_issue_content()
73+
return content
74+
else:
75+
raise ValueError("Invalid GitHub issue URL")
76+
77+
except Exception as e:
78+
logger.error(f"GitHub issue fetching failed: {e}")
79+
raise
80+
81+
async def handle_contributor_recommendation(query: str) -> Dict[str, Any]:
82+
"""
83+
Main entry point with unified query processing.
84+
"""
85+
logger.info(f"Processing contributor recommendation: {query[:100]}...")
86+
87+
try:
88+
workflow = ContributorRecommendationWorkflow()
89+
90+
alignment_result = await workflow._align_user_request(query)
91+
search_text = alignment_result.get("aligned_query", query)
92+
93+
logger.info("Generating embedding for semantic search")
94+
enhanced_search_text = f"Looking for contributor with expertise in: {search_text}"
95+
query_embedding = await workflow.embedding_service.get_embedding(enhanced_search_text)
96+
logger.info(f"Generated embedding with dimension: {len(query_embedding)}")
97+
98+
logger.info("Performing hybrid search (semantic + keyword matching)")
99+
100+
results = await search_contributors(
101+
query_embedding=query_embedding,
102+
keywords=alignment_result.get("keywords", []),
103+
limit=5,
104+
vector_weight=0.7, # Semantic similarity
105+
bm25_weight=0.3 # Keyword matching
106+
)
107+
108+
logger.info(f"Search complete: Found {len(results)} potential contributors")
109+
110+
if not results:
111+
logger.info("No contributors found matching the search criteria")
112+
return {
113+
"status": "success",
114+
"recommendations": [],
115+
"message": "No suitable contributors found",
116+
"search_query": search_text,
117+
"keywords_used": alignment_result.get("keywords", []),
118+
"technical_domain": alignment_result.get("technical_domain", "other")
119+
}
120+
121+
logger.info("Formatting recommendations with scores")
122+
recommendations = []
123+
for contributor in results:
124+
languages = contributor.get('languages', [])
125+
topics = contributor.get('topics', [])
126+
hybrid_score = contributor.get('hybrid_score', 0)
127+
vector_score = contributor.get('vector_score', 0)
128+
bm25_score = contributor.get('bm25_score', 0)
129+
130+
reason_parts = []
131+
if languages:
132+
reason_parts.append(f"Expert in {', '.join(languages)}")
133+
if topics:
134+
reason_parts.append(f"Active in {', '.join(topics)}")
135+
136+
username = contributor.get("github_username")
137+
recommendation = {
138+
"user": username,
139+
"reason": " • ".join(reason_parts) if reason_parts else "Strong technical match",
140+
"search_score": round(hybrid_score, 4),
141+
"vector_score": round(vector_score, 4),
142+
"keyword_score": round(bm25_score, 4),
143+
"languages": languages,
144+
"topics": topics
145+
}
146+
147+
recommendations.append(recommendation)
148+
logger.info(
149+
f"@{username} (score: {hybrid_score:.4f}) - {reason_parts[0] if reason_parts else 'Technical match'}")
150+
151+
logger.info(f"Successfully generated {len(recommendations)} contributor recommendations")
152+
153+
return {
154+
"status": "success",
155+
"recommendations": recommendations,
156+
"message": f"Found {len(recommendations)} suitable contributors",
157+
"search_query": search_text,
158+
"keywords_used": alignment_result.get("keywords", []),
159+
"technical_domain": alignment_result.get("technical_domain", "other"),
160+
"search_metadata": {
161+
"total_candidates": len(results),
162+
"vector_weight": 0.7,
163+
"keyword_weight": 0.3,
164+
"embedding_dimension": len(query_embedding)
165+
}
166+
}
167+
168+
except Exception as e:
169+
logger.error(f"Error in contributor recommendation: {str(e)}", exc_info=True)
170+
return {"status": "error", "message": str(e)}

0 commit comments

Comments
 (0)