Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions backend/app/agents/devrel/github/github_toolkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .prompts.intent_analysis import GITHUB_INTENT_ANALYSIS_PROMPT
from .tools.search import handle_web_search
# TODO: Implement all tools
# from .tools.contributor_recommendation import handle_contributor_recommendation
from .tools.contributor_recommendation import handle_contributor_recommendation
# from .tools.repository_query import handle_repo_query
# from .tools.issue_creation import handle_issue_creation
# from .tools.documentation_generation import handle_documentation_generation
Expand Down Expand Up @@ -102,8 +102,7 @@ async def execute(self, query: str) -> Dict[str, Any]:
logger.info(f"Executing {classification} for query")

if classification == "contributor_recommendation":
result = "Not implemented"
# result = await handle_contributor_recommendation(query)
result = await handle_contributor_recommendation(query)
elif classification == "repo_support":
result = "Not implemented"
# result = await handle_repo_query(query)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
ISSUE_SUMMARIZATION_PROMPT = """You are a technical analyst optimizing GitHub issues for contributor search.

Analyze the provided GitHub issue and create a technical summary optimized for finding relevant expert contributors.

Focus on:
- Core technical problem or feature request
- Specific technologies, frameworks, libraries, APIs mentioned
- Technical skills and expertise required to solve this
- Programming languages and tools involved
- System components affected (frontend, backend, database, etc.)

Create a summary that reads like a job requirement for finding the right technical expert.

**GitHub Issue Content:**
---
{issue_content}
---

**Optimized Technical Summary for Contributor Search:**
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
QUERY_ALIGNMENT_PROMPT = """Analyze this contributor recommendation request and process it for technical search:

USER REQUEST: {query}

Your task:
1. Extract the core technical requirements
2. Generate a clean, technical search query optimized for finding contributor profiles
3. Extract specific keywords that would appear in developer profiles (languages, frameworks, tools, domains)

Guidelines:
- aligned_query: Convert user request into clear technical language that matches how developers describe their skills
- keywords: Extract 3-5 specific technical terms (React, Python, API, database, etc.)
- Focus on technologies, not business requirements
- Make it searchable against developer profiles and contribution history

Example transformations:

Input: "I need help with our Stripe payment integration issue"
Output: {{"query_type": "general", "aligned_query": "developer with payment processing and Stripe API integration experience", "keywords": ["Stripe", "payment", "API", "integration"], "technical_domain": "backend"}}

Input: "Find experts for database optimization"
Output: {{"query_type": "general", "aligned_query": "backend developer with database performance optimization experience", "keywords": ["database", "optimization", "performance", "SQL"], "technical_domain": "backend"}}

Input: "https://github.com/owner/repo/issues/123 - authentication bug"
Output: {{"query_type": "github_issue", "aligned_query": "developer with authentication and security implementation experience", "keywords": ["authentication", "security", "OAuth", "JWT"], "technical_domain": "security"}}

IMPORTANT FORMATTING RULES:
- DO NOT use markdown formatting
- DO NOT wrap in code blocks (```)
- DO NOT add any text before or after the JSON
- DO NOT add explanations
- Return EXACTLY this format: {{"query_type": "...", "aligned_query": "...", "keywords": [...], "technical_domain": "..."}}

Expected JSON schema:
{{"query_type": "github_issue" | "general", "aligned_query": "clean technical search text", "keywords": ["keyword1", "keyword2"], "technical_domain": "frontend|backend|fullstack|ml|devops|mobile|security|other"}}

Return the JSON object only:"""
10 changes: 8 additions & 2 deletions backend/app/agents/devrel/github/prompts/intent_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

AVAILABLE FUNCTIONS:
- web_search: Search the web for information
- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate
- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate (supports both issue URLs and general queries)
- repo_support: Questions about codebase structure, dependencies, impact analysis, architecture
- issue_creation: Creating bug reports, feature requests, or tracking items
- documentation_generation: Generating docs, READMEs, API docs, guides, or explanations
Expand All @@ -12,7 +12,13 @@
USER QUERY: {user_query}

Classification guidelines:
- contributor_recommendation: Finding reviewers, assignees, collaborators
- contributor_recommendation:
* "who should review this PR/issue?"
* "find experts in React/Python/ML"
* "recommend assignees for stripe integration"
* "best people for database optimization"
* URLs like github.com/owner/repo/issues/123
* "I need help with RabbitMQ, can you suggest some people?"
- repo_support: Code structure, dependencies, impact analysis, architecture
- issue_creation: Creating bugs, features, tracking items
- documentation_generation: Docs, READMEs, guides, explanations
Expand Down
169 changes: 169 additions & 0 deletions backend/app/agents/devrel/github/tools/contributor_recommendation.py
Original file line number Diff line number Diff line change
@@ -1 +1,170 @@
import logging
import re
from typing import Any, Dict
from urllib.parse import urlparse
from langchain_core.messages import HumanMessage
from langchain_google_genai import ChatGoogleGenerativeAI

from app.core.config import settings
from app.database.weaviate.operations import search_contributors
from app.services.github.issue_processor import GitHubIssueProcessor
from app.services.embedding_service.service import EmbeddingService
from ..prompts.contributor_recommendation.query_alignment import QUERY_ALIGNMENT_PROMPT

logger = logging.getLogger(__name__)

class ContributorRecommendationWorkflow:
"""
Contributor recommendation with proper query alignment for hybrid search.
"""

def __init__(self):
self.query_alignment_llm = ChatGoogleGenerativeAI(
model=settings.github_agent_model,
temperature=0.1,
google_api_key=settings.gemini_api_key
)
self.embedding_service = EmbeddingService()

async def _align_user_request(self, query: str) -> Dict[str, Any]:
"""
Align user request into optimized format for hybrid search.
Extract clean technical query + keywords that match contributor profiles.
"""
logger.info("Aligning user request for hybrid search optimization")

url_match = re.search(r'https?://github\.com/[\w-]+/[\w.-]+/issues/\d+', query)

if url_match:
issue_content = await self._fetch_github_issue_content(url_match.group(0))
full_query = f"{query}\n\nIssue content: {issue_content}"
else:
full_query = query

prompt = QUERY_ALIGNMENT_PROMPT.format(query=full_query)
response = await self.query_alignment_llm.ainvoke([HumanMessage(content=prompt)])

try:
import json
print(response)
result = json.loads(response.content.strip())
logger.info(f"Query aligned: '{result.get('aligned_query')}' with keywords: {result.get('keywords')}")
return result
except json.JSONDecodeError:
logger.warning("Failed to parse alignment result, using fallback")
return {
"query_type": "general",
"aligned_query": query,
"keywords": [],
"technical_domain": "other"
}

async def _fetch_github_issue_content(self, github_url: str) -> str:
"""Fetch GitHub issue content."""
try:
parsed_url = urlparse(github_url)
path_parts = parsed_url.path.strip('/').split('/')

if len(path_parts) >= 4 and path_parts[2] == "issues":
owner, repo, issue_number = path_parts[0], path_parts[1], int(path_parts[3])
processor = GitHubIssueProcessor(owner, repo, issue_number)

content = await processor.fetch_issue_content()
return content
else:
raise ValueError("Invalid GitHub issue URL")

except Exception as e:
logger.error(f"GitHub issue fetching failed: {e}")
raise

async def handle_contributor_recommendation(query: str) -> Dict[str, Any]:
"""
Main entry point with unified query processing.
"""
logger.info(f"Processing contributor recommendation: {query[:100]}...")

try:
workflow = ContributorRecommendationWorkflow()

alignment_result = await workflow._align_user_request(query)
search_text = alignment_result.get("aligned_query", query)

logger.info("Generating embedding for semantic search")
enhanced_search_text = f"Looking for contributor with expertise in: {search_text}"
query_embedding = await workflow.embedding_service.get_embedding(enhanced_search_text)
logger.info(f"Generated embedding with dimension: {len(query_embedding)}")

logger.info("Performing hybrid search (semantic + keyword matching)")

results = await search_contributors(
query_embedding=query_embedding,
keywords=alignment_result.get("keywords", []),
limit=5,
vector_weight=0.7, # Semantic similarity
bm25_weight=0.3 # Keyword matching
)

logger.info(f"Search complete: Found {len(results)} potential contributors")

if not results:
logger.info("No contributors found matching the search criteria")
return {
"status": "success",
"recommendations": [],
"message": "No suitable contributors found",
"search_query": search_text,
"keywords_used": alignment_result.get("keywords", []),
"technical_domain": alignment_result.get("technical_domain", "other")
}

logger.info("Formatting recommendations with scores")
recommendations = []
for contributor in results:
languages = contributor.get('languages', [])
topics = contributor.get('topics', [])
hybrid_score = contributor.get('hybrid_score', 0)
vector_score = contributor.get('vector_score', 0)
bm25_score = contributor.get('bm25_score', 0)

reason_parts = []
if languages:
reason_parts.append(f"Expert in {', '.join(languages)}")
if topics:
reason_parts.append(f"Active in {', '.join(topics)}")

username = contributor.get("github_username")
recommendation = {
"user": username,
"reason": " • ".join(reason_parts) if reason_parts else "Strong technical match",
"search_score": round(hybrid_score, 4),
"vector_score": round(vector_score, 4),
"keyword_score": round(bm25_score, 4),
"languages": languages,
"topics": topics
}

recommendations.append(recommendation)
logger.info(
f"@{username} (score: {hybrid_score:.4f}) - {reason_parts[0] if reason_parts else 'Technical match'}")

logger.info(f"Successfully generated {len(recommendations)} contributor recommendations")

return {
"status": "success",
"recommendations": recommendations,
"message": f"Found {len(recommendations)} suitable contributors",
"search_query": search_text,
"keywords_used": alignment_result.get("keywords", []),
"technical_domain": alignment_result.get("technical_domain", "other"),
"search_metadata": {
"total_candidates": len(results),
"vector_weight": 0.7,
"keyword_weight": 0.3,
"embedding_dimension": len(query_embedding)
}
}

except Exception as e:
logger.error(f"Error in contributor recommendation: {str(e)}", exc_info=True)
return {"status": "error", "message": str(e)}
11 changes: 10 additions & 1 deletion backend/app/agents/devrel/prompts/response_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,25 @@
- Avoid complex markdown formatting like **bold** or *italic*
- Use plain text with clear line breaks
- Format links as plain URLs: https://example.com
- Use simple emojis for visual appeal: 🔗 📚 ⚡
- Use simple emojis for visual appeal
- Keep paragraphs short and scannable
- Use "→" for arrows instead of markdown arrows

SPECIAL FORMATTING FOR CONTRIBUTOR RECOMMENDATIONS:
If the task result contains contributor recommendations:
- Start with "Found X Contributors"
- Show search query used and keywords
- For each contributor: "1. username (Score: X.XXX)"
- Include their expertise/reason for recommendation
- End with metadata about search and actionable guidance

Instructions:
1. Synthesize all information - Use reasoning process, tool results, and task results together
2. Address the user's needs - Focus on what they're trying to accomplish
3. Be actionable - Provide specific steps, resources, or guidance
4. Stay DevRel-focused - Be encouraging, helpful, and community-oriented
5. Reference sources - Mention what you researched or considered when relevant
6. Format for readability - Clean, simple text that displays well
7. For contributor recommendations - Use the special formatting above to show scores and details

Create a helpful, comprehensive response:"""
2 changes: 1 addition & 1 deletion backend/app/api/v1/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from fastapi.responses import HTMLResponse
from app.database.supabase.client import get_supabase_client
from app.services.auth.verification import find_user_by_session_and_verify, get_verification_session_info
from app.services.user.profiling import profile_user_from_github
from app.services.github.user.profiling import profile_user_from_github
from typing import Optional
import logging
import asyncio
Expand Down
83 changes: 83 additions & 0 deletions backend/app/services/github/issue_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import logging
from typing import List
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage

from app.core.config import settings
from app.services.embedding_service.service import EmbeddingService
from app.services.github.user.profiling import GitHubUserProfiler
from app.agents.devrel.github.prompts.contributor_recommendation.issue_summarization import ISSUE_SUMMARIZATION_PROMPT

logger = logging.getLogger(__name__)


class GitHubIssueProcessor:
"""
A service to fetch, summarize, and embed a GitHub issue.
"""

def __init__(self, owner: str, repo: str, issue_number: int):
self.owner = owner
self.repo = repo
self.issue_number = issue_number
self.summarizer_llm = ChatGoogleGenerativeAI(
model=settings.github_agent_model,
temperature=0.1,
google_api_key=settings.gemini_api_key
)
self.embedding_service = EmbeddingService()

async def fetch_issue_content(self) -> str:
"""
Fetches and consolidates all text content from a GitHub issue.
"""
logger.info(f"Fetching content for {self.owner}/{self.repo}#{self.issue_number}")
async with GitHubUserProfiler() as profiler:
issue_url = f"{profiler.base_url}/repos/{self.owner}/{self.repo}/issues/{self.issue_number}"
comments_url = f"{issue_url}/comments"

issue_data = await profiler.request(issue_url)
if not issue_data:
raise ValueError("Failed to fetch issue data.")

content_parts = [
f"Title: {issue_data['title']}",
f"Body: {issue_data['body']}",
]

comments_data = await profiler.request(comments_url)
if comments_data:
comment_texts = [
f"Comment by {c['user']['login']}: {c['body']}"
for c in comments_data if c.get('body')
]
content_parts.extend(comment_texts)

return "\n\n---\n\n".join(content_parts)

async def _summarize_context(self, content: str) -> str:
"""Generates a technical summary of the issue content using an LLM."""
logger.info(f"Summarizing issue content for {self.owner}/{self.repo}#{self.issue_number}")
prompt = ISSUE_SUMMARIZATION_PROMPT.format(issue_content=content)
response = await self.summarizer_llm.ainvoke([HumanMessage(content=prompt)])
logger.info(f"Generated summary: {response.content.strip()[:100]}")
return response.content.strip()

async def get_embedding_for_issue(self) -> List[float]:
"""
Orchestrates the entire process: fetch, summarize, and embed.
Returns a vector embedding representing the issue.
"""
try:
content = await self.fetch_issue_content()
if not content:
raise ValueError("Failed to fetch issue content.")

summary = await self._summarize_context(content)

logger.info("Embedding issue summary")
embedding = await self.embedding_service.get_embedding(summary)
return embedding
except Exception as e:
logger.error(f"Error processing issue {self.owner}/{self.repo}#{self.issue_number}: {str(e)}")
raise e
Loading