1- import boto3
21import os
3- import pytest
42from unittest .mock import MagicMock
53
6- from haystack import Pipeline , Document
4+ import boto3
5+ import pytest
6+ from haystack import Document , Pipeline
77from haystack .components .builders import PromptBuilder
88from haystack .components .writers import DocumentWriter
9+ from haystack .dataclasses import ChatMessage
910from haystack .document_stores .in_memory import InMemoryDocumentStore
10- from haystack_experimental . components . extractors import LLMMetadataExtractor
11- from haystack_experimental .components .extractors import LLMProvider
11+
12+ from haystack_experimental .components .extractors import LLMMetadataExtractor , LLMProvider
1213
1314
1415class TestLLMMetadataExtractor :
@@ -95,7 +96,10 @@ def test_to_dict_openai(self, monkeypatch):
9596 "model" : "gpt-4o-mini" ,
9697 "organization" : None ,
9798 "streaming_callback" : None ,
98- "system_prompt" : None ,
99+ "max_retries" : None ,
100+ "timeout" : None ,
101+ "tools" : None ,
102+ "tools_strict" : False ,
99103 },
100104 "max_workers" : 3 ,
101105 },
@@ -106,11 +110,7 @@ def test_to_dict_aws_bedrock(self, boto3_session_mock):
106110 prompt = "some prompt that was used with the LLM {{document.content}}" ,
107111 expected_keys = ["key1" , "key2" ],
108112 generator_api = LLMProvider .AWS_BEDROCK ,
109- generator_api_params = {
110- "model" : "meta.llama.test" ,
111- "max_length" : 100 ,
112- "truncate" : False ,
113- },
113+ generator_api_params = {"model" : "meta.llama.test" },
114114 raise_on_failure = True ,
115115 )
116116 extractor_dict = extractor .to_dict ()
@@ -146,11 +146,11 @@ def test_to_dict_aws_bedrock(self, boto3_session_mock):
146146 "strict" : False ,
147147 },
148148 "model" : "meta.llama.test" ,
149- "model_family" : None ,
150- "max_length" : 100 ,
151- "truncate" : False ,
149+ "stop_words" : [],
150+ "generation_kwargs" : {},
152151 "streaming_callback" : None ,
153152 "boto3_config" : None ,
153+ "tools" : None ,
154154 },
155155 "expected_keys" : ["key1" , "key2" ],
156156 "page_range" : None ,
@@ -179,7 +179,6 @@ def test_from_dict_openai(self, monkeypatch):
179179 "model" : "gpt-4o-mini" ,
180180 "organization" : None ,
181181 "streaming_callback" : None ,
182- "system_prompt" : None ,
183182 },
184183 },
185184 }
@@ -225,10 +224,11 @@ def test_from_dict_aws_bedrock(self, boto3_session_mock):
225224 "strict" : False ,
226225 },
227226 "model" : "meta.llama.test" ,
228- "max_length " : 200 ,
229- "truncate " : False ,
227+ "stop_words " : [] ,
228+ "generation_kwargs " : {} ,
230229 "streaming_callback" : None ,
231230 "boto3_config" : None ,
231+ "tools" : None ,
232232 },
233233 "expected_keys" : ["key1" , "key2" ],
234234 "page_range" : None ,
@@ -244,8 +244,6 @@ def test_from_dict_aws_bedrock(self, boto3_session_mock):
244244 == "some prompt that was used with the LLM {{document.content}}"
245245 )
246246 assert extractor .generator_api == LLMProvider .AWS_BEDROCK
247- assert extractor .llm_provider .max_length == 200
248- assert extractor .llm_provider .truncate is False
249247 assert extractor .llm_provider .model == "meta.llama.test"
250248
251249 def test_warm_up (self , monkeypatch ):
@@ -288,7 +286,7 @@ def test_extract_metadata_missing_key(self, monkeypatch, caplog):
288286 def test_prepare_prompts (self , monkeypatch ):
289287 monkeypatch .setenv ("OPENAI_API_KEY" , "test-api-key" )
290288 extractor = LLMMetadataExtractor (
291- prompt = "prompt {{document.content}}" ,
289+ prompt = "some_user_definer_prompt {{document.content}}" ,
292290 generator_api = LLMProvider .OPENAI ,
293291 )
294292 docs = [
@@ -300,15 +298,16 @@ def test_prepare_prompts(self, monkeypatch):
300298 ),
301299 ]
302300 prompts = extractor ._prepare_prompts (docs )
301+
303302 assert prompts == [
304- "prompt deepset was founded in 2018 in Berlin, and is known for its Haystack framework" ,
305- "prompt Hugging Face is a company founded in Paris, France and is known for its Transformers library",
303+ ChatMessage . from_dict ({ "_role" : "user" , "_meta" : {}, "_name" : None , "_content" : [{ "text" : "some_user_definer_prompt deepset was founded in 2018 in Berlin, and is known for its Haystack framework"}]}) ,
304+ ChatMessage . from_dict ({ "_role" : "user" , "_meta" : {}, "_name" : None , "_content" : [{ "text" : "some_user_definer_prompt Hugging Face is a company founded in Paris, France and is known for its Transformers library"}]})
306305 ]
307306
308307 def test_prepare_prompts_empty_document (self , monkeypatch ):
309308 monkeypatch .setenv ("OPENAI_API_KEY" , "test-api-key" )
310309 extractor = LLMMetadataExtractor (
311- prompt = "prompt {{document.content}}" ,
310+ prompt = "some_user_definer_prompt {{document.content}}" ,
312311 generator_api = LLMProvider .OPENAI ,
313312 )
314313 docs = [
@@ -320,13 +319,14 @@ def test_prepare_prompts_empty_document(self, monkeypatch):
320319 prompts = extractor ._prepare_prompts (docs )
321320 assert prompts == [
322321 None ,
323- "prompt Hugging Face is a company founded in Paris, France and is known for its Transformers library" ,
322+ ChatMessage .from_dict (
323+ {"_role" : "user" , "_meta" : {}, "_name" : None , "_content" : [{"text" : "some_user_definer_prompt Hugging Face is a company founded in Paris, France and is known for its Transformers library" }]})
324324 ]
325325
326326 def test_prepare_prompts_expanded_range (self , monkeypatch ):
327327 monkeypatch .setenv ("OPENAI_API_KEY" , "test-api-key" )
328328 extractor = LLMMetadataExtractor (
329- prompt = "prompt {{document.content}}" ,
329+ prompt = "some_user_definer_prompt {{document.content}}" ,
330330 generator_api = LLMProvider .OPENAI ,
331331 page_range = ["1-2" ],
332332 )
@@ -336,9 +336,11 @@ def test_prepare_prompts_expanded_range(self, monkeypatch):
336336 )
337337 ]
338338 prompts = extractor ._prepare_prompts (docs , expanded_range = [1 , 2 ])
339- assert prompts == [
340- "prompt Hugging Face is a company founded in Paris, France and is known for its Transformers library\f Page 2\f " ,
341- ]
339+
340+ assert prompts == [ChatMessage .from_dict ({"_role" : "user" ,
341+ "_meta" : {},
342+ "_name" : None ,
343+ "_content" : [{"text" : "some_user_definer_prompt Hugging Face is a company founded in Paris, France and is known for its Transformers library\x0c Page 2\x0c " }]})]
342344
343345 def test_run_no_documents (self , monkeypatch ):
344346 monkeypatch .setenv ("OPENAI_API_KEY" , "test-api-key" )
0 commit comments