@@ -213,7 +213,66 @@ async def search_contributors_by_keywords(self, keywords: List[str], limit: int
213213 logger .error (f"Unexpected error in keyword search: { str (e )} " )
214214 return []
215215
216- # TODO: Add hybrid search for contributors. Default in built hybrid search doesn't support custom vectors.
216+ async def hybrid_search_contributors (
217+ self ,
218+ query_embedding : List [float ],
219+ keywords : List [str ],
220+ limit : int = 10 ,
221+ vector_weight : float = 0.7 ,
222+ bm25_weight : float = 0.3
223+ ) -> List [Dict [str , Any ]]:
224+ """
225+ Hybrid search combining vector similarity and BM25 keyword search.
226+ """
227+ try :
228+ vector_results = await self .search_similar_contributors (
229+ query_embedding , limit
230+ ) if query_embedding else []
231+
232+ bm25_results = await self .search_contributors_by_keywords (
233+ keywords , limit
234+ ) if keywords else []
235+
236+ combined = {}
237+
238+ for result in vector_results :
239+ user_id = result ["user_id" ]
240+ combined [user_id ] = result .copy ()
241+ combined [user_id ]["vector_score" ] = result .get ("similarity_score" , 0.0 )
242+ combined [user_id ]["bm25_score" ] = 0.0
243+ combined [user_id ]["search_method" ] = "vector"
244+
245+ max_bm25_score = max ([r .get ("search_score" , 0 ) for r in bm25_results ]) if bm25_results else 1.0
246+
247+ for result in bm25_results :
248+ user_id = result ["user_id" ]
249+ normalized_bm25 = result .get ("search_score" , 0 ) / max_bm25_score if max_bm25_score > 0 else 0.0
250+ if user_id in combined :
251+ combined [user_id ]["bm25_score" ] = normalized_bm25
252+ combined [user_id ]["search_method" ] = "hybrid"
253+ else :
254+ combined [user_id ] = result .copy ()
255+ combined [user_id ]["vector_score" ] = 0.0
256+ combined [user_id ]["bm25_score" ] = normalized_bm25
257+ combined [user_id ]["search_method" ] = "bm25"
258+
259+ for result in combined .values ():
260+ result ["hybrid_score" ] = (
261+ vector_weight * result ["vector_score" ] + bm25_weight * result ["bm25_score" ]
262+ )
263+
264+ final_results = sorted (
265+ combined .values (),
266+ key = lambda x : x ["hybrid_score" ],
267+ reverse = True
268+ )[:limit ]
269+
270+ logger .info (f"Hybrid search returned { len (final_results )} results" )
271+ return final_results
272+
273+ except Exception as e :
274+ logger .error (f"Error in hybrid search: { str (e )} " )
275+ return []
217276
218277 async def get_contributor_profile (self , github_username : str ) -> Optional [WeaviateUserProfile ]:
219278 """Get a specific contributor's profile by GitHub username."""
@@ -303,3 +362,18 @@ async def get_contributor_profile(github_username: str) -> Optional[WeaviateUser
303362 """Convenience function to get a contributor's profile by GitHub username."""
304363 operations = WeaviateUserOperations ()
305364 return await operations .get_contributor_profile (github_username )
365+
366+ async def search_contributors (
367+ query_embedding : List [float ],
368+ keywords : List [str ],
369+ limit : int = 10 ,
370+ vector_weight : float = 0.7 ,
371+ bm25_weight : float = 0.3
372+ ) -> List [Dict [str , Any ]]:
373+ """
374+ Convenience function to perform hybrid search combining vector similarity and BM25 keyword search.
375+ """
376+ operations = WeaviateUserOperations ()
377+ return await operations .hybrid_search_contributors (
378+ query_embedding , keywords , limit , vector_weight , bm25_weight
379+ )
0 commit comments