11import os
22import logging
33import time
4- from typing import Dict , Optional
54from src .llm import get_llm
65from datasets import Dataset
76from dotenv import load_dotenv
87from ragas import evaluate
98from ragas .metrics import answer_relevancy , faithfulness
109from src .shared .common_fn import load_embedding_model
11- import math
1210load_dotenv ()
1311
1412EMBEDDING_MODEL = os .getenv ("EMBEDDING_MODEL" )
1513EMBEDDING_FUNCTION , _ = load_embedding_model (EMBEDDING_MODEL )
1614
17- def sanitize_data (data ):
18- for key , value in data .items ():
19- if isinstance (value , float ) and (math .isnan (value ) or math .isinf (value )):
20- data [key ] = None
21- return data
22-
23- def preprocess_dataset (example ):
24- example ["contexts" ] = [example ["contexts" ]]
25- return example
26-
2715def get_ragas_metrics (question : str , context : list , answer : list , model : str ):
2816 """Calculates RAGAS metrics."""
2917 try :
3018 start_time = time .time ()
31- question = [question ] * len (answer )
3219 dataset = Dataset .from_dict (
33- {"question" : question , "answer" : answer , "contexts" : context }
20+ {"question" : [ question ] * len ( answer ) , "answer" : answer , "contexts" : [[ ctx ] for ctx in context ] }
3421 )
3522 dataset = dataset .map (preprocess_dataset )
3623 logging .info ("Evaluation dataset created successfully." )
@@ -53,8 +40,6 @@ def get_ragas_metrics(question: str, context: list, answer: list, model: str):
5340 .round (4 )
5441 .to_dict (orient = "list" )
5542 )
56- #score_dict = sanitize_data(score_dict)
57- print ("Score dict : " ,score_dict )
5843 end_time = time .time ()
5944 logging .info (f"Evaluation completed in: { end_time - start_time :.2f} seconds" )
6045 return score_dict
0 commit comments