Skip to content

Commit 452a278

Browse files
Merge pull request #131 from DhruvK278/feat/github-MCP-server
Add GitHub MCP microservice for repository queries
2 parents 59e3656 + a247ba9 commit 452a278

File tree

12 files changed

+2950
-1784
lines changed

12 files changed

+2950
-1784
lines changed

backend/.env.example

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ DISCORD_BOT_TOKEN=
99
# ENABLE_DISCORD_BOT=true
1010

1111
GITHUB_TOKEN=
12+
# Add Org Name here
13+
GITHUB_ORG=
1214

1315
# EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
1416
# EMBEDDING_MAX_BATCH_SIZE=32

backend/app/agents/devrel/github/github_toolkit.py

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,28 @@
11
import logging
2+
import os
3+
import json
4+
import re
25
from typing import Dict, Any
36
from langchain_google_genai import ChatGoogleGenerativeAI
47
from langchain_core.messages import HumanMessage
58
from app.core.config import settings
69
from .prompts.intent_analysis import GITHUB_INTENT_ANALYSIS_PROMPT
710
from .tools.search import handle_web_search
8-
# TODO: Implement all tools
11+
from .tools.github_support import handle_github_supp
912
from .tools.contributor_recommendation import handle_contributor_recommendation
10-
# from .tools.repository_query import handle_repo_query
11-
# from .tools.issue_creation import handle_issue_creation
12-
# from .tools.documentation_generation import handle_documentation_generation
1313
from .tools.general_github_help import handle_general_github_help
14+
1415
logger = logging.getLogger(__name__)
1516

17+
DEFAULT_ORG = os.getenv("GITHUB_ORG")
18+
19+
20+
def normalize_org(org_from_user: str = None) -> str:
21+
"""Fallback to env org if user does not specify one."""
22+
if org_from_user and org_from_user.strip():
23+
return org_from_user.strip()
24+
return DEFAULT_ORG
25+
1626

1727
class GitHubToolkit:
1828
"""
@@ -32,30 +42,37 @@ def __init__(self):
3242
"web_search",
3343
"contributor_recommendation",
3444
"repo_support",
45+
"github_support",
3546
"issue_creation",
3647
"documentation_generation",
3748
"find_good_first_issues",
3849
"general_github_help"
3950
]
4051

4152
async def classify_intent(self, user_query: str) -> Dict[str, Any]:
42-
"""
43-
Classify intent and return classification with reasoning.
44-
45-
Args:
46-
user_query: The user's request or question
47-
48-
Returns:
49-
Dictionary containing classification, reasoning, and confidence
50-
"""
53+
"""Classify intent and return classification with reasoning."""
5154
logger.info(f"Classifying intent for query: {user_query[:100]}")
5255

5356
try:
5457
prompt = GITHUB_INTENT_ANALYSIS_PROMPT.format(user_query=user_query)
5558
response = await self.llm.ainvoke([HumanMessage(content=prompt)])
5659

57-
import json
58-
result = json.loads(response.content.strip())
60+
content = response.content.strip()
61+
62+
try:
63+
result = json.loads(content)
64+
except json.JSONDecodeError:
65+
match = re.search(r"\{.*\}", content, re.DOTALL)
66+
if match:
67+
result = json.loads(match.group())
68+
else:
69+
logger.error(f"Invalid JSON in LLM response: {content}")
70+
return {
71+
"classification": "general_github_help",
72+
"reasoning": "Failed to parse LLM response as JSON",
73+
"confidence": "low",
74+
"query": user_query
75+
}
5976

6077
classification = result.get("classification")
6178
if classification not in self.tools:
@@ -65,21 +82,12 @@ async def classify_intent(self, user_query: str) -> Dict[str, Any]:
6582

6683
result["query"] = user_query
6784

68-
logger.info(f"Classified intent as for query: {user_query} is: {classification}")
85+
logger.info(f"Classified intent for query: {user_query} -> {classification}")
6986
logger.info(f"Reasoning: {result.get('reasoning', 'No reasoning provided')}")
7087
logger.info(f"Confidence: {result.get('confidence', 'unknown')}")
7188

7289
return result
7390

74-
except json.JSONDecodeError as e:
75-
logger.error(f"Error parsing JSON response from LLM: {str(e)}")
76-
logger.error(f"Raw response: {response.content}")
77-
return {
78-
"classification": "general_github_help",
79-
"reasoning": f"Failed to parse LLM response: {str(e)}",
80-
"confidence": "low",
81-
"query": user_query
82-
}
8391
except Exception as e:
8492
logger.error(f"Error in intent classification: {str(e)}")
8593
return {
@@ -90,9 +98,7 @@ async def classify_intent(self, user_query: str) -> Dict[str, Any]:
9098
}
9199

92100
async def execute(self, query: str) -> Dict[str, Any]:
93-
"""
94-
Main execution method - classifies intent and delegates to appropriate tools
95-
"""
101+
"""Main execution method - classifies intent and delegates to appropriate tools"""
96102
logger.info(f"Executing GitHub toolkit for query: {query[:100]}")
97103

98104
try:
@@ -103,15 +109,16 @@ async def execute(self, query: str) -> Dict[str, Any]:
103109

104110
if classification == "contributor_recommendation":
105111
result = await handle_contributor_recommendation(query)
112+
elif classification == "github_support":
113+
org = normalize_org()
114+
result = await handle_github_supp(query, org=org)
115+
result["org_used"] = org
106116
elif classification == "repo_support":
107117
result = "Not implemented"
108-
# result = await handle_repo_query(query)
109118
elif classification == "issue_creation":
110119
result = "Not implemented"
111-
# result = await handle_issue_creation(query)
112120
elif classification == "documentation_generation":
113121
result = "Not implemented"
114-
# result = await handle_documentation_generation(query)
115122
elif classification == "web_search":
116123
result = await handle_web_search(query)
117124
else:
Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
GITHUB_INTENT_ANALYSIS_PROMPT = """You are an expert GitHub DevRel AI assistant. Analyze the user query and classify the intent.
22
33
AVAILABLE FUNCTIONS:
4-
- web_search: Search the web for information
5-
- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate (supports both issue URLs and general queries)
4+
- github_support: Questions about repository information, structure, stats, issues, stars, forks, description, or any repository metadata
5+
- web_search: Search the web for general information
6+
- contributor_recommendation: Finding the right people to review PRs, assign issues, or collaborate
67
- repo_support: Questions about codebase structure, dependencies, impact analysis, architecture
78
- issue_creation: Creating bug reports, feature requests, or tracking items
89
- documentation_generation: Generating docs, READMEs, API docs, guides, or explanations
@@ -12,24 +13,37 @@
1213
USER QUERY: {user_query}
1314
1415
Classification guidelines:
16+
- github_support:
17+
- ALWAYS classify as `github_support` if the query asks about:
18+
- repository information
19+
- stats (stars, forks, watchers, issues)
20+
- open issues, closed issues, or "what issues"
21+
- description, license, URL, metadata
22+
- any question containing "<repo> repo", "repository", "repo", "issues in", "stars in", "forks in"
23+
- Example queries:
24+
- "What all issues are in Dev.ai repo?" → github_support
25+
- "How many stars does Devr.AI repo have?" → github_support
26+
- "Show me forks of Aossie-org/Dev.ai" → github_support
1527
- contributor_recommendation:
1628
* "who should review this PR/issue?"
1729
* "find experts in React/Python/ML"
1830
* "recommend assignees for stripe integration"
1931
* "best people for database optimization"
2032
* URLs like github.com/owner/repo/issues/123
2133
* "I need help with RabbitMQ, can you suggest some people?"
22-
- repo_support: Code structure, dependencies, impact analysis, architecture
34+
- repo_support: Code structure, dependencies, impact analysis, architecture
2335
- issue_creation: Creating bugs, features, tracking items
2436
- documentation_generation: Docs, READMEs, guides, explanations
2537
- find_good_first_issues: Beginners, newcomers, "good first issue"
26-
- web_search: General information needing external search
38+
- web_search: Only for information that cannot be found through GitHub API (like news, articles, external documentation)
2739
- general_github_help: General GitHub questions not covered above
2840
41+
IMPORTANT: Repository information queries (issues count, stars, forks, description) should ALWAYS use github_support, not web_search.
42+
2943
CRITICAL: Return ONLY raw JSON. No markdown, no code blocks, no explanation text.
3044
3145
{{
3246
"classification": "function_name_from_list_above",
3347
"reasoning": "Brief explanation of why you chose this function",
3448
"confidence": "high|medium|low"
35-
}}"""
49+
}}"""
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import logging
2+
import os
3+
from typing import Dict, Any, Optional, List, Union
4+
import aiohttp
5+
import asyncio
6+
7+
logger = logging.getLogger(__name__)
8+
9+
class GitHubMCPClient:
10+
"""Client for communicating with the GitHub MCP server."""
11+
12+
def __init__(self, mcp_server_url: str = "http://localhost:8001"):
13+
self.mcp_server_url = mcp_server_url
14+
self.session: Optional[aiohttp.ClientSession] = None
15+
# Default org pulled from environment
16+
self.org = os.getenv("GITHUB_ORG", "Aossie-org")
17+
18+
async def __aenter__(self):
19+
# Async context manager entry
20+
self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=15))
21+
return self
22+
23+
async def __aexit__(self, exc_type, exc_val, exc_tb):
24+
# Async context manager exit
25+
if self.session:
26+
await self.session.close()
27+
28+
async def get_github_supp(self, repo: str, owner: Optional[str] = None) -> Dict[str, Any]:
29+
"""
30+
Fetch metadata for a single repository.
31+
Owner defaults to org from environment if not provided.
32+
"""
33+
if not self.session:
34+
raise RuntimeError("Client not initialized. Use async context manager.")
35+
36+
owner = owner or self.org
37+
38+
try:
39+
payload = {"owner": owner, "repo": repo}
40+
41+
async with self.session.post(
42+
f"{self.mcp_server_url}/github_support",
43+
json=payload,
44+
headers={"Content-Type": "application/json"},
45+
) as response:
46+
if response.status == 200:
47+
result = await response.json()
48+
if result.get("status") == "success":
49+
return result.get("data", {})
50+
else:
51+
return {"error": result.get("error", "Unknown error")}
52+
else:
53+
logger.error(f"MCP server error: {response.status}")
54+
return {"error": f"MCP server error: {response.status}"}
55+
56+
except aiohttp.ClientError as e:
57+
logger.exception("Error communicating with MCP server: %s", e)
58+
return {"error": f"Communication error: {str(e)}"}
59+
except Exception as e:
60+
logger.exception("Unexpected error: %s", e)
61+
return {"error": f"Unexpected error: {str(e)}"}
62+
63+
async def list_org_repos(self, org: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
64+
if not self.session:
65+
raise RuntimeError("Client not initialized. Use async context manager.")
66+
67+
try:
68+
payload = {"org": org}
69+
async with self.session.post(
70+
f"{self.mcp_server_url}/list_org_repos",
71+
json=payload,
72+
headers={"Content-Type": "application/json"},
73+
) as response:
74+
if response.status == 200:
75+
result = await response.json()
76+
if result.get("status") == "success":
77+
return result.get("data", [])
78+
else:
79+
return {"error": result.get("error", "Unknown error")}
80+
else:
81+
logger.error(f"MCP server error: {response.status}")
82+
return {"error": f"MCP server error: {response.status}"}
83+
except aiohttp.ClientError as e:
84+
logger.error(f"Error communicating with MCP server: {e}")
85+
return {"error": f"Communication error: {str(e)}"}
86+
except Exception as e:
87+
logger.error(f"Unexpected error: {e}")
88+
return {"error": f"Unexpected error: {str(e)}"}
89+
90+
91+
async def is_server_available(self) -> bool:
92+
"""Health check for MCP server."""
93+
if not self.session:
94+
return False
95+
96+
try:
97+
async with self.session.get(f"{self.mcp_server_url}/health", timeout=5) as response:
98+
return response.status == 200
99+
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
100+
logger.debug(f"Health check failed: {e}")
101+
return False
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import os
2+
import logging
3+
import asyncio
4+
from dotenv import load_dotenv, find_dotenv
5+
from fastapi import FastAPI, HTTPException
6+
from pydantic import BaseModel
7+
from .github_mcp_service import GitHubMCPService
8+
from typing import Optional
9+
10+
dotenv_path = find_dotenv(usecwd=True)
11+
if dotenv_path:
12+
load_dotenv(dotenv_path=dotenv_path)
13+
else:
14+
load_dotenv()
15+
16+
logging.basicConfig(level=logging.INFO)
17+
logger = logging.getLogger(__name__)
18+
19+
app = FastAPI(title="GitHub MCP Server", version="1.0.0")
20+
21+
# Load env vars
22+
GITHUB_ORG = os.getenv("GITHUB_ORG")
23+
if not GITHUB_ORG:
24+
logger.warning("GITHUB_ORG not set in .env — defaulting to manual owner input")
25+
26+
github_service: Optional[GitHubMCPService] = None
27+
try:
28+
token = os.getenv("GITHUB_TOKEN") or os.getenv("GH_TOKEN")
29+
if not token:
30+
logger.warning("GITHUB_TOKEN/GH_TOKEN not set; GitHub API calls may be rate-limited or fail.")
31+
github_service = GitHubMCPService(token=token)
32+
logger.info("GitHub service initialized successfully")
33+
except Exception as e:
34+
logger.exception("Failed to initialize GitHub service")
35+
github_service = None
36+
37+
class RepoInfoRequest(BaseModel):
38+
repo: str
39+
owner: Optional[str] = None
40+
41+
class RepoInfoResponse(BaseModel):
42+
status: str
43+
data: dict
44+
error: str = None
45+
46+
@app.get("/health")
47+
async def health_check():
48+
"""Health check endpoint"""
49+
return {"status": "healthy", "service": "github-mcp"}
50+
51+
class OrgInfoRequest(BaseModel):
52+
org: str
53+
54+
@app.post("/list_org_repos")
55+
async def list_org_repos(request: OrgInfoRequest):
56+
try:
57+
if not github_service:
58+
raise HTTPException(status_code=503, detail="GitHub service not available")
59+
60+
result = await asyncio.to_thread(github_service.list_org_repos, request.org)
61+
62+
if "error" in result:
63+
return {"status": "error", "data": {}, "error": result["error"]}
64+
65+
return {"status": "success", "data": result}
66+
67+
except Exception as e:
68+
logger.exception("Error listing org repos")
69+
raise HTTPException(status_code=500, detail=str(e))
70+
71+
@app.post("/github_support")
72+
async def get_github_supp(request: RepoInfoRequest):
73+
"""Get repo details, using fixed org from env"""
74+
if not github_service:
75+
raise HTTPException(status_code=503, detail="GitHub service not available")
76+
owner = request.owner or GITHUB_ORG
77+
if not owner:
78+
raise HTTPException(status_code=400, detail="Missing owner; provide 'owner' or set GITHUB_ORG")
79+
80+
try:
81+
result = await asyncio.to_thread(github_service.repo_query, owner, request.repo)
82+
if "error" in result:
83+
return RepoInfoResponse(status="error", data={}, error=result["error"])
84+
return RepoInfoResponse(status="success", data=result)
85+
except Exception as e:
86+
logger.exception("Error getting repo info")
87+
raise HTTPException(status_code=500, detail=str(e))
88+
89+
if __name__ == "__main__":
90+
import uvicorn
91+
uvicorn.run(app, host="0.0.0.0", port=8001)

0 commit comments

Comments
 (0)