Skip to content

Commit 43d479c

Browse files
committed
[feat]: implement custom hybrid search
1 parent 1356976 commit 43d479c

File tree

2 files changed

+77
-1
lines changed

2 files changed

+77
-1
lines changed

backend/app/database/weaviate/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
search_similar_contributors,
44
search_contributors_by_keywords,
55
get_contributor_profile,
6+
search_contributors,
67
WeaviateUserOperations
78
)
89

@@ -13,6 +14,7 @@
1314
"search_similar_contributors",
1415
"search_contributors_by_keywords",
1516
"get_contributor_profile",
17+
"search_contributors",
1618
"WeaviateUserOperations",
1719
"get_weaviate_client"
1820
]

backend/app/database/weaviate/operations.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,66 @@ async def search_contributors_by_keywords(self, keywords: List[str], limit: int
213213
logger.error(f"Unexpected error in keyword search: {str(e)}")
214214
return []
215215

216-
# TODO: Add hybrid search for contributors. Default in built hybrid search doesn't support custom vectors.
216+
async def hybrid_search_contributors(
217+
self,
218+
query_embedding: List[float],
219+
keywords: List[str],
220+
limit: int = 10,
221+
vector_weight: float = 0.7,
222+
bm25_weight: float = 0.3
223+
) -> List[Dict[str, Any]]:
224+
"""
225+
Hybrid search combining vector similarity and BM25 keyword search.
226+
"""
227+
try:
228+
vector_results = await self.search_similar_contributors(
229+
query_embedding, limit
230+
) if query_embedding else []
231+
232+
bm25_results = await self.search_contributors_by_keywords(
233+
keywords, limit
234+
) if keywords else []
235+
236+
combined = {}
237+
238+
for result in vector_results:
239+
user_id = result["user_id"]
240+
combined[user_id] = result.copy()
241+
combined[user_id]["vector_score"] = result.get("similarity_score", 0.0)
242+
combined[user_id]["bm25_score"] = 0.0
243+
combined[user_id]["search_method"] = "vector"
244+
245+
max_bm25_score = max([r.get("search_score", 0) for r in bm25_results]) if bm25_results else 1.0
246+
247+
for result in bm25_results:
248+
user_id = result["user_id"]
249+
normalized_bm25 = result.get("search_score", 0) / max_bm25_score if max_bm25_score > 0 else 0.0
250+
if user_id in combined:
251+
combined[user_id]["bm25_score"] = normalized_bm25
252+
combined[user_id]["search_method"] = "hybrid"
253+
else:
254+
combined[user_id] = result.copy()
255+
combined[user_id]["vector_score"] = 0.0
256+
combined[user_id]["bm25_score"] = normalized_bm25
257+
combined[user_id]["search_method"] = "bm25"
258+
259+
for result in combined.values():
260+
result["hybrid_score"] = (
261+
vector_weight * result["vector_score"] + bm25_weight * result["bm25_score"]
262+
)
263+
264+
final_results = sorted(
265+
combined.values(),
266+
key=lambda x: x["hybrid_score"],
267+
reverse=True
268+
)[:limit]
269+
270+
logger.info(f"Hybrid search returned {len(final_results)} results")
271+
return final_results
272+
273+
except Exception as e:
274+
logger.error(f"Error in hybrid search: {str(e)}")
275+
return []
217276

218277
async def get_contributor_profile(self, github_username: str) -> Optional[WeaviateUserProfile]:
219278
"""Get a specific contributor's profile by GitHub username."""
@@ -303,3 +362,18 @@ async def get_contributor_profile(github_username: str) -> Optional[WeaviateUser
303362
"""Convenience function to get a contributor's profile by GitHub username."""
304363
operations = WeaviateUserOperations()
305364
return await operations.get_contributor_profile(github_username)
365+
366+
async def search_contributors(
367+
query_embedding: List[float],
368+
keywords: List[str],
369+
limit: int = 10,
370+
vector_weight: float = 0.7,
371+
bm25_weight: float = 0.3
372+
) -> List[Dict[str, Any]]:
373+
"""
374+
Convenience function to perform hybrid search combining vector similarity and BM25 keyword search.
375+
"""
376+
operations = WeaviateUserOperations()
377+
return await operations.hybrid_search_contributors(
378+
query_embedding, keywords, limit, vector_weight, bm25_weight
379+
)

0 commit comments

Comments
 (0)