diff --git a/config/agent_gaia-validation_deepseek.yaml b/config/agent_gaia-validation_deepseek.yaml new file mode 100644 index 0000000..c466549 --- /dev/null +++ b/config/agent_gaia-validation_deepseek.yaml @@ -0,0 +1,77 @@ +defaults: + - benchmark: gaia-validation + - override hydra/job_logging: none + - _self_ # Allow defining variables at the top of this file + + +main_agent: + prompt_class: MainAgentPromptBoxedDeepSeek + llm: + provider_class: "DeepSeekOpenRouterClient" + model_name: "deepseek/deepseek-chat-v3.1" + async_client: true + temperature: 0.3 + top_p: 0.95 + min_p: 0.0 + top_k: -1 + max_tokens: 32000 + openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}" + openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}" + openrouter_provider: null + disable_cache_control: false + keep_tool_result: -1 + oai_tool_thinking: false + + tool_config: + - tool-reasoning + + max_turns: -1 # Maximum number of turns for main agent execution + max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn + + input_process: + hint_generation: true + hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}" + output_process: + final_answer_extraction: true + final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}" + + openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction + add_message_id: true + keep_tool_result: -1 + chinese_context: "${oc.env:CHINESE_CONTEXT,false}" + + +sub_agents: + agent-worker: + prompt_class: SubAgentWorkerPromptDeepSeek + llm: + provider_class: "DeepSeekOpenRouterClient" + model_name: "deepseek/deepseek-chat-v3.1" + async_client: true + temperature: 0.3 + top_p: 0.95 + min_p: 0.0 + top_k: -1 + max_tokens: 32000 + openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}" + openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}" + openrouter_provider: null + disable_cache_control: false + keep_tool_result: -1 + oai_tool_thinking: false + + tool_config: + - tool-searching + - tool-image-video + - tool-reading + - tool-code + - tool-audio + + max_turns: -1 # Maximum number of turns for main agent execution + max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn + + +# Can define some top-level or default parameters here +output_dir: logs/ +data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored + diff --git a/config/agent_llm_deepseek_openrouter.yaml b/config/agent_llm_deepseek_openrouter.yaml new file mode 100644 index 0000000..1f02d1e --- /dev/null +++ b/config/agent_llm_deepseek_openrouter.yaml @@ -0,0 +1,50 @@ +defaults: + - benchmark: example_dataset + - override hydra/job_logging: none + - _self_ # Allow defining variables at the top of this file + + +main_agent: + prompt_class: MainAgentPromptBoxedDeepSeek + llm: + provider_class: "DeepSeekOpenRouterClient" + model_name: "deepseek/deepseek-chat-v3.1" # Available DeepSeek models via OpenRouter + async_client: true + temperature: 0.3 + top_p: 0.95 + min_p: 0.0 + top_k: -1 + max_tokens: 32000 + openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}" + openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}" + openrouter_provider: null + disable_cache_control: false + keep_tool_result: -1 + oai_tool_thinking: false + + tool_config: + - tool-reading + - tool-searching + + max_turns: -1 # Maximum number of turns for main agent execution + max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn + + input_process: + hint_generation: false + hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}" + output_process: + final_answer_extraction: false + final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}" + + openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction + add_message_id: true + keep_tool_result: -1 + chinese_context: "${oc.env:CHINESE_CONTEXT,false}" + + +sub_agents: null + + +# Can define some top-level or default parameters here +output_dir: logs/ +data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored diff --git a/config/agent_prompts/main_agent_prompt_deepseek.py b/config/agent_prompts/main_agent_prompt_deepseek.py new file mode 100644 index 0000000..7393bc2 --- /dev/null +++ b/config/agent_prompts/main_agent_prompt_deepseek.py @@ -0,0 +1,156 @@ +from config.agent_prompts.base_agent_prompt import BaseAgentPrompt +import datetime +from typing import Any + + +class MainAgentPromptBoxedDeepSeek(BaseAgentPrompt): + """ + Adapted from MainAgentPromptBoxedAnswer. Since the tool-use is DeepSeek format, we remove the tags and its corresponding format instructions. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.is_main_agent = True + + def generate_system_prompt_with_mcp_tools( + self, mcp_servers: list[Any], chinese_context: bool = False + ) -> str: + formatted_date = datetime.datetime.today().strftime("%Y-%m-%d") + + # Basic system prompt + prompt = f"""In this environment you have access to a set of tools you can use to answer the user's question. + +You only have access to the tools provided below. You can only use one tool per message, and will receive the result of that tool in the user's next response. You use tools step-by-step to accomplish a given task, with each tool-use informed by the result of the previous tool-use. Today is: {formatted_date} + +""" + + # Add MCP servers section + if mcp_servers and len(mcp_servers) > 0: + for server in mcp_servers: + prompt += f"## Server name: {server['name']}\n" + + if "tools" in server and len(server["tools"]) > 0: + for tool in server["tools"]: + # Skip tools that failed to load (they only have 'error' key) + if "error" in tool and "name" not in tool: + continue + prompt += f"### Tool name: {tool['name']}\n" + prompt += f"Description: {tool['description']}\n" + prompt += f"Input JSON schema: {tool['schema']}\n" + + # Add the full objective system prompt + prompt += """ +# General Objective + +You accomplish a given task iteratively, breaking it down into clear steps and working through them methodically. + +## Task Strategy + +1. Analyze the user's request and set clear, achievable sub-goals. Prioritize these sub-goals in a logical order. +2. Start with a concise, numbered, step-by-step plan (e.g., 1., 2., 3.) outlining how you will solve the task before taking any action. Each sub-goal should correspond to a distinct step in your task-solving process. +3. Work through these sub-goals sequentially. After each step, carefully review and extract all potentially relevant information, details, or implications from the tool result before proceeding. The user may provide tool-use feedback, reflect on the results, and revise your plan if needed. If you encounter new information or challenges, adjust your approach accordingly. Revisit previous steps to ensure earlier sub-goals or clues have not been overlooked or missed. +4. You have access to a wide range of powerful tools. Use them strategically to accomplish each sub-goal. + +## Tool-Use Guidelines + +1. **IMPORTANT: Each step must involve exactly ONE tool call only, unless the task is already solved. You are strictly prohibited from making multiple tool calls in a single response.** +2. Before each tool call: +- Briefly summarize and analyze what is currently known. +- Identify what is missing, uncertain, or unreliable. +- Be concise; do not repeat the same analysis across steps. +- Choose the most relevant tool for the current sub-goal, and explain why this tool is necessary at this point. +- Verify whether all required parameters are either explicitly provided or can be clearly and reasonably inferred from context. +- Do not guess or use placeholder values for missing inputs. +- Skip optional parameters unless they are explicitly specified. +3. All tool queries must include full, self-contained context. Tools do not retain memory between calls. Include all relevant information from earlier steps in each query. +4. Avoid broad, vague, or speculative queries. Every tool call should aim to retrieve new, actionable information that clearly advances the task. +5. **For historical or time-specific content**: Regular search engines return current webpage content, not historical content. Archived webpage search is essential for retrieving content as it appeared in the past, use related tools to search for the historical content. +6. Even if a tool result does not directly answer the question, thoroughly extract and summarize all partial information, important details, patterns, constraints, or keywords that may help guide future steps. Never proceed to the next step without first ensuring that all significant insights from the current result have been fully considered. + +## Tool-Use Communication Rules + +1. **CRITICAL: After issuing exactly ONE tool call, STOP your response immediately. You must never make multiple tool calls in a single response. Do not include tool results, do not assume what the results will be, and do not continue with additional analysis or tool calls. The user will provide the actual tool results in their next message.** +2. Do not present the final answer until the entire task is complete. +3. Do not mention tool names. +4. Do not engage in unnecessary back-and-forth or end with vague offers of help. Do not end your responses with questions or generic prompts. +5. Do not use tools that do not exist. +6. Unless otherwise requested, respond in the same language as the user's message. +7. If the task does not require tool use, answer the user directly. + +""" + + # Add Chinese-specific instructions if enabled + if chinese_context: + prompt += """ + ## 中文语境处理指导 + + 当处理中文相关的任务时: + 1. **子任务委托 (Subtask Delegation)**:向worker代理委托的子任务应使用中文描述,确保任务内容准确传达 + 2. **搜索策略 (Search Strategy)**:搜索关键词应使用中文,以获取更准确的中文内容和信息 + 3. **问题分析 (Question Analysis)**:对中文问题的分析和理解应保持中文语境 + 4. **思考过程 (Thinking Process)**:内部分析、推理、总结等思考过程都应使用中文,保持语义表达的一致性 + 5. **信息整理 (Information Organization)**:从中文资源获取的信息应保持中文原文,避免不必要的翻译 + 6. **各种输出 (All Outputs)**:所有输出内容包括步骤说明、状态更新、中间结果等都应使用中文 + 7. **最终答案 (Final Answer)**:对于中文语境的问题,最终答案应使用中文回应 + + """ + + return prompt + + def generate_summarize_prompt( + self, + task_description: str, + task_failed: bool = False, + chinese_context: bool = False, + ) -> str: + summarize_prompt = ( + ( + "=============" + "=============" + "=============" + "This is a direct instruction to you (the assistant), not the result of a tool call.\n\n" + ) + + ( + "**Important: You have either exhausted the context token limit or reached the maximum number of interaction turns without arriving at a conclusive answer. Therefore, you failed to complete the task. You Must explicitly state that you failed to complete the task in your response.**\n\n" + if task_failed + else "" + ) + + ( + "We are now ending this session, and your conversation history will be deleted. " + "You must NOT initiate any further tool use. This is your final opportunity to report " + "*all* of the information gathered during the session.\n\n" + "Summarize the above conversation, and output the FINAL ANSWER to the original question.\n\n" + "If a clear answer has already been provided earlier in the conversation, do not rethink or recalculate it — " + "simply extract that answer and reformat it to match the required format below.\n" + "If a definitive answer could not be determined, make a well-informed educated guess based on the conversation.\n\n" + "The original question is repeated here for reference:\n\n" + f"---\n{task_description}\n---\n\n" + "Summarize ALL working history for this task, including your step-by-step thoughts, all tool calls, and all tool results (i.e., the full solving trajectory so far).\n" + "Output the FINAL ANSWER and detailed supporting information of the task given to you.\n\n" + "If you found any useful facts, data, or quotes directly relevant to the original task, include them clearly and completely.\n" + "**Document the sources**: For each key fact or claim in your answer, mention which sources it came from and whether multiple sources confirmed it. If sources disagreed, explain the different viewpoints found.\n" + "If you reached a conclusion or answer, include it as part of the response.\n" + "If the task could not be fully answered, return all partially relevant findings, search results, quotes, and observations that might help a downstream agent solve the problem.\n" + "If partial, conflicting, or inconclusive information was found, clearly indicate this in your response.\n\n" + "Your final response should be a clear, complete, and structured report.\n" + "Organize the content into logical sections with appropriate headings.\n" + "Do NOT include any tool call instructions, speculative filler, or vague summaries.\n" + "Focus on factual, specific, and well-organized information." + "Output the final answer in the format: \\boxed{...}. The boxed answer should be a short phrase or a comma-separated list of numbers and/or strings." + ) + ) + + # Add Chinese-specific summary instructions + if chinese_context: + summarize_prompt += """ + +## 中文总结要求 + +如果原始问题涉及中文语境: +- **总结语言**:使用中文进行总结和回答 +- **思考过程**:回顾和总结思考过程时也应使用中文表达 +- **信息组织**:保持中文信息的原始格式和表达方式 +- **过程描述**:对工作历史、步骤描述、结果分析等各种输出都应使用中文 +- **最终答案**:确保最终答案符合中文表达习惯和用户期望 +""" + return summarize_prompt diff --git a/config/agent_prompts/sub_worker.py b/config/agent_prompts/sub_worker.py index 9b17110..eefe341 100644 --- a/config/agent_prompts/sub_worker.py +++ b/config/agent_prompts/sub_worker.py @@ -245,3 +245,136 @@ def expose_agent_as_tool(self, subagent_name: str) -> dict: ], ) return tool_definition + + +class SubAgentWorkerPromptDeepSeek(SubAgentWorkerPrompt): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.is_main_agent = False + + def generate_system_prompt_with_mcp_tools( + self, mcp_servers: list[Any], chinese_context: bool = False + ) -> str: + formatted_date = datetime.datetime.today().strftime("%Y-%m-%d") + + prompt = f"""In this environment you have access to a set of tools you can use to answer the user's question. + +You only have access to the tools provided below. You can only use one tool per message, and will receive the result of that tool in the user's next response. You use tools step-by-step to accomplish a given task, with each tool-use informed by the result of the previous tool-use. Today is: {formatted_date}. Here are the functions available in JSONSchema format: + +""" + + # Add MCP servers section + if mcp_servers and len(mcp_servers) > 0: + for server in mcp_servers: + prompt += f"## Server name: {server['name']}\n" + + if "tools" in server and len(server["tools"]) > 0: + for tool in server["tools"]: + # Skip tools that failed to load (they only have 'error' key) + if "error" in tool and "name" not in tool: + continue + prompt += f"### Tool name: {tool['name']}\n" + prompt += f"Description: {tool['description']}\n" + prompt += f"Input JSON schema: {tool['schema']}\n" + + # Add the full objective system prompt + prompt += """ +# General Objective + +You accomplish a given task iteratively, breaking it down into clear steps and working through them methodically. + +## Task Strategy + +1. Analyze the user's request and set clear, achievable sub-goals. Prioritize these sub-goals in a logical order. +2. Start with a concise, numbered, step-by-step plan (e.g., 1., 2., 3.) outlining how you will solve the task before taking any action. Each sub-goal should correspond to a distinct step in your task-solving process. +3. Work through these sub-goals sequentially. After each step, carefully review and extract all potentially relevant information, details, or implications from the tool result before proceeding. The user may provide tool-use feedback, reflect on the results, and revise your plan if needed. If you encounter new information or challenges, adjust your approach accordingly. Revisit previous steps to ensure earlier sub-goals or clues have not been overlooked or missed. +4. You have access to a wide range of powerful tools. Use them strategically to accomplish each sub-goal. + +## Tool-Use Guidelines + +1. **IMPORTANT: Each step must involve exactly ONE tool call only, unless the task is already solved. You are strictly prohibited from making multiple tool calls in a single response.** +2. Before each tool call: +- Briefly summarize and analyze what is currently known. +- Identify what is missing, uncertain, or unreliable. +- Be concise; do not repeat the same analysis across steps. +- Choose the most relevant tool for the current sub-goal, and explain why this tool is necessary at this point. +- Verify whether all required parameters are either explicitly provided or can be clearly and reasonably inferred from context. +- Do not guess or use placeholder values for missing inputs. +- Skip optional parameters unless they are explicitly specified. +3. All tool queries must include full, self-contained context. Tools do not retain memory between calls. Include all relevant information from earlier steps in each query. +4. Avoid broad, vague, or speculative queries. Every tool call should aim to retrieve new, actionable information that clearly advances the task. +5. **For historical or time-specific content**: Regular search engines return current webpage content, not historical content. Archived webpage search is essential for retrieving content as it appeared in the past, use related tools to search for the historical content. +6. Even if a tool result does not directly answer the question, thoroughly extract and summarize all partial information, important details, patterns, constraints, or keywords that may help guide future steps. Never proceed to the next step without first ensuring that all significant insights from the current result have been fully considered. + +## Tool-Use Communication Rules + +1. **CRITICAL: After issuing exactly ONE tool call, STOP your response immediately. You must never make multiple tool calls in a single response. Do not include tool results, do not assume what the results will be, and do not continue with additional analysis or tool calls. The user will provide the actual tool results in their next message.** +2. Do not present the final answer until the entire task is complete. +3. Do not mention tool names. +4. Do not engage in unnecessary back-and-forth or end with vague offers of help. Do not end your responses with questions or generic prompts. +5. Do not use tools that do not exist. +6. Unless otherwise requested, respond in the same language as the user's message. +7. If the task does not require tool use, answer the user directly. + +""" + + # Add Chinese-specific instructions if enabled + if chinese_context: + prompt += """ + ## 中文语境处理指导 + + 当处理中文相关的任务时: + 1. **子任务委托 (Subtask Delegation)**:向worker代理委托的子任务应使用中文描述,确保任务内容准确传达 + 2. **搜索策略 (Search Strategy)**:搜索关键词应使用中文,以获取更准确的中文内容和信息 + 3. **问题分析 (Question Analysis)**:对中文问题的分析和理解应保持中文语境 + 4. **思考过程 (Thinking Process)**:内部分析、推理、总结等思考过程都应使用中文,保持语义表达的一致性 + 5. **信息整理 (Information Organization)**:从中文资源获取的信息应保持中文原文,避免不必要的翻译 + 6. **各种输出 (All Outputs)**:所有输出内容包括步骤说明、状态更新、中间结果等都应使用中文 + 7. **最终答案 (Final Answer)**:对于中文语境的问题,最终答案应使用中文回应 + + """ + + prompt += """# Agent Specific Objective + +You are an agent that performs various subtasks to collect information and execute specific actions. Your task is to complete well-defined, single-scope objectives efficiently and accurately. +Do not infer, speculate, or attempt to fill in missing parts yourself. Only return factual content and execute actions as specified. + +## File Path Handling +When subtasks mention file paths, these are local system file paths (not sandbox paths). You can: +- Use tools to directly access these files from the local system +- Upload files to the sandbox environment (remember to create a new sandbox for each task, this sandbox only exists for the current task) for processing if needed +- Choose the most appropriate approach based on the specific task requirements +- If the final response requires returning a file, download it to the local system first and then return the local path, the sandbox path is not allowed + +Critically assess the reliability of all information: +- If the credibility of a source is uncertain, clearly flag it. +- Do **not** treat information as trustworthy just because it appears — **cross-check when necessary**. +- If you find conflicting or ambiguous information, include all relevant findings and flag the inconsistency. + +Be cautious and transparent in your output: +- Always return all related information. If information is incomplete or weakly supported, still share partial excerpts, and flag any uncertainty. +- Never assume or guess — if an exact answer cannot be found, say so clearly. +- Prefer quoting or excerpting **original source text** rather than interpreting or rewriting it, and provide the URL if available. +- If more context is needed, return a clarification request and do not proceed with tool use. +- Focus on completing the specific subtask assigned to you, not broader reasoning. +""" + # Add Chinese-specific instructions for worker agent + if chinese_context: + prompt += """ + +## 中文内容处理 + +处理中文相关的子任务时: +- **搜索关键词**:使用中文关键词进行搜索,获取更准确的中文资源 +- **Google搜索参数**:进行Google搜索时,注意使用适当的地理位置和语言参数: + - gl (Geolocation/Country): 设置为中国或相关地区以获取本地化结果 + - hl (Host Language): 设置为中文以获取中文界面和优化的中文搜索结果 +- **思考过程**:分析、推理、判断等内部思考过程应使用中文表达 +- **信息摘录**:保持中文原文的准确性,避免不必要的翻译或改写 +- **问答处理**:在进行QA(问答)任务时,问题和答案都应使用中文,确保语言一致性 +- **各种输出**:包括状态说明、过程描述、结果展示等所有输出都应使用中文 +- **回应格式**:对中文子任务的回应应使用中文,保持语境一致性 + +""" + + return prompt \ No newline at end of file diff --git a/docs/mkdocs/docs/deepseek.md b/docs/mkdocs/docs/deepseek.md new file mode 100644 index 0000000..e0bc641 --- /dev/null +++ b/docs/mkdocs/docs/deepseek.md @@ -0,0 +1,51 @@ +# DeepSeek + +DeepSeek's advanced language models with strong reasoning capabilities and tool use support, accessible via OpenRouter. + +## Available Clients + +### DeepSeekOpenRouterClient (OpenRouter API) + +**Environment Setup:** + +Set the `OPENROUTER_API_KEY` environment variable +```bash title="Environment Variables" +export OPENROUTER_API_KEY="your-key" +``` +or add it to the `.env` file. + +**Configuration:** + +```yaml title="Agent Configuration" +main_agent: + llm: + provider_class: "DeepSeekOpenRouterClient" + model_name: "deepseek/deepseek-chat-v3.1" # Available DeepSeek models via OpenRouter + async_client: true + temperature: 0.3 + top_p: 0.95 + min_p: 0.0 + top_k: -1 + max_tokens: 32000 + openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}" + openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}" + openrouter_provider: null # You can specify the provider to use + disable_cache_control: false + keep_tool_result: -1 + oai_tool_thinking: false +``` + +## Usage + +```bash title="Example Command" +# Run with DeepSeek v3.1 Chat (OpenRouter) on example dataset +uv run main.py common-benchmark --config_file_name=agent_llm_deepseek_openrouter output_dir="logs/test" +``` + +The `agent_llm_deepseek_openrouter.yaml` configuration file provides a ready-to-use setup with the example dataset benchmark, while `agent_gaia-validation_deepseek.yaml` is an setup for the GAIA-Validation benchmark with main agent and sub agent configured. + + +--- + +!!! info "Documentation Info" + **Last Updated:** October 2025 · **Doc Contributor:** Team @ MiroMind AI diff --git a/docs/mkdocs/mkdocs.yml b/docs/mkdocs/mkdocs.yml index 7b33d6b..35c2826 100644 --- a/docs/mkdocs/mkdocs.yml +++ b/docs/mkdocs/mkdocs.yml @@ -107,6 +107,7 @@ nav: - Claude 3.7 Sonnet (OpenRouter): openrouter-claude-3.7-sonnet.md - GPT-5: openai-gpt5.md - GPT-4o: openai-gpt4o.md + - DeepSeek: deepseek.md - 📚 Resources: - All About Agents: all_about_agents.md diff --git a/src/llm/providers/deepseek_openrouter_client.py b/src/llm/providers/deepseek_openrouter_client.py new file mode 100644 index 0000000..433d671 --- /dev/null +++ b/src/llm/providers/deepseek_openrouter_client.py @@ -0,0 +1,454 @@ +# SPDX-FileCopyrightText: 2025 MiromindAI +# +# SPDX-License-Identifier: Apache-2.0 + +import asyncio +import dataclasses +import json +import os +import re +from typing import Any, Dict, List + +import tiktoken +from omegaconf import DictConfig +from openai import AsyncOpenAI, OpenAI +from tenacity import ( + retry, + retry_if_not_exception_type, + stop_after_attempt, + wait_exponential, +) + +from src.llm.provider_client_base import LLMProviderClientBase + +from src.logging.logger import bootstrap_logger + +LOGGER_LEVEL = os.getenv("LOGGER_LEVEL", "INFO") +logger = bootstrap_logger(level=LOGGER_LEVEL) + + +class ContextLimitError(Exception): + pass + + +@dataclasses.dataclass +class DeepSeekOpenRouterClient(LLMProviderClientBase): + def _create_client(self, config: DictConfig): + """Create configured OpenAI client""" + if self.async_client: + return AsyncOpenAI( + api_key=self.cfg.llm.openrouter_api_key, + base_url=self.cfg.llm.openrouter_base_url, + timeout=1800, + ) + else: + return OpenAI( + api_key=self.cfg.llm.openrouter_api_key, + base_url=self.cfg.llm.openrouter_base_url, + timeout=1800, + ) + + @retry( + wait=wait_exponential(multiplier=5), + stop=stop_after_attempt(5), + retry=retry_if_not_exception_type(ContextLimitError), + ) + async def _create_message( + self, + system_prompt: str, + messages: List[Dict[str, Any]], + tools_definitions, + keep_tool_result: int = -1, + ): + """ + Send message to OpenAI API. + :param system_prompt: System prompt string. + :param messages: Message history list. + :return: OpenAI API response object or None (if error). + """ + logger.debug(f" Calling LLM ({'async' if self.async_client else 'sync'})") + # put the system prompt in the first message since OpenAI API does not support system prompt in + if system_prompt: + target_role = "system" + + # Check if there are already system or developer messages + if messages and messages[0]["role"] in ["system", "developer"]: + # Replace existing message with correct role + messages[0] = { + "role": target_role, + "content": [dict(type="text", text=system_prompt)], + } + else: + # Insert new message + messages.insert( + 0, + { + "role": target_role, + "content": [dict(type="text", text=system_prompt)], + }, + ) + + messages_copy = self._remove_tool_result_from_messages( + messages, keep_tool_result + ) + + # Apply cache control + if self.disable_cache_control: + processed_messages = messages_copy + else: + processed_messages = self._apply_cache_control(messages_copy) + + # For deepseek, we need to explicitly specify the tool list and add it to the messages + tool_list = await self.convert_tool_definition_to_tool_call(tools_definitions) + + params = None + try: + temperature = self.temperature + + # build extra_body if self.openrouter_provider + provider_config = (self.openrouter_provider or "").strip().lower() + logger.info(f"provider_config: {provider_config}") + if provider_config == "google": + extra_body = { + "provider": { + "only": [ + "google-vertex/us", + "google-vertex/europe", + "google-vertex/global", + ] + } + } + elif provider_config == "anthropic": + extra_body = {"provider": {"only": ["anthropic"]}} + # extra_body["provider"]["ignore"] = ["google-vertex/us", "google-vertex/europe", "google-vertex/global"] + elif provider_config == "amazon": + extra_body = {"provider": {"only": ["amazon-bedrock"]}} + elif provider_config != "": + extra_body = {"provider": {"only": [provider_config]}} + else: + extra_body = {} + + # Add top_k and min_p through extra_body for OpenRouter + if self.top_k != -1: + extra_body["top_k"] = self.top_k + if self.min_p != 0.0: + extra_body["min_p"] = self.min_p + if self.repetition_penalty != 1.0: + extra_body["repetition_penalty"] = self.repetition_penalty + + params = { + "model": self.model_name, + "temperature": temperature, + "max_tokens": self.max_tokens, + "messages": processed_messages, + "tools": tool_list, + "stream": False, + "extra_body": extra_body, + } + + # Add optional parameters only if they have non-default values + if self.top_p != 1.0: + params["top_p"] = self.top_p + + response = await self._create_completion(params, self.async_client) + + if ( + response is None + or response.choices is None + or len(response.choices) == 0 + ): + logger.debug(f"LLM call failed: response = {response}") + raise Exception(f"LLM call failed [rare case]: response = {response}") + + if response.choices and response.choices[0].finish_reason == "length": + logger.debug( + "LLM finish_reason is 'length', triggering ContextLimitError" + ) + raise ContextLimitError( + "(finish_reason=length) Response truncated due to maximum context length" + ) + + if ( + response.choices + and response.choices[0].finish_reason == "stop" + and response.choices[0].message.content.strip() == "" + ): + logger.debug( + "LLM finish_reason is 'stop', but content is empty, triggering Error" + ) + raise Exception("LLM finish_reason is 'stop', but content is empty") + + logger.debug( + f"LLM call finish_reason: {getattr(response.choices[0], 'finish_reason', 'N/A')}" + ) + return response + except asyncio.CancelledError: + logger.debug("[WARNING] LLM API call was cancelled during execution") + raise Exception("LLM API call was cancelled during execution") + except Exception as e: + error_str = str(e) + if ( + "Input is too long for requested model" in error_str + or "input length and `max_tokens` exceed context limit" in error_str + or "maximum context length" in error_str + or "prompt is too long" in error_str + or "exceeds the maximum length" in error_str + or "exceeds the maximum allowed length" in error_str + or "Input tokens exceed the configured limit" in error_str + or "Requested token count exceeds the model's maximum context length" + in error_str + or "BadRequestError" in error_str + and "context length" in error_str + ): + logger.debug(f"OpenRouter LLM Context limit exceeded: {error_str}") + raise ContextLimitError(f"Context limit exceeded: {error_str}") + + logger.error( + f"OpenRouter LLM call failed: {str(e)}, input = {json.dumps(params)}", + exc_info=True, + ) + raise e + + async def _create_completion(self, params: Dict[str, Any], is_async: bool): + """Helper to create a completion, handling async and sync calls.""" + if is_async: + return await self.client.chat.completions.create(**params) + else: + return self.client.chat.completions.create(**params) + + def _clean_user_content_from_response(self, text: str) -> str: + """Remove content between \\n\\nUser: and in assistant response (if no , remove to end)""" + # Match content between \n\nUser: and , if no delete to text end + pattern = r"\n\nUser:.*?(?=|$)" + cleaned_text = re.sub(pattern, "", text, flags=re.MULTILINE | re.DOTALL) + + return cleaned_text + + def process_llm_response( + self, llm_response, message_history, agent_type="main" + ) -> tuple[str, bool]: + """Process OpenAI LLM response""" + + if not llm_response or not llm_response.choices: + error_msg = "LLM did not return a valid response." + logger.error(f"Should never happen: {error_msg}") + return "", True # Exit loop + + # Extract LLM response text + if llm_response.choices[0].finish_reason == "stop": + assistant_response_text = llm_response.choices[0].message.content or "" + # remove user: {...} content + assistant_response_text = self._clean_user_content_from_response( + assistant_response_text + ) + message_history.append( + {"role": "assistant", "content": assistant_response_text} + ) + elif llm_response.choices[0].finish_reason == "length": + assistant_response_text = llm_response.choices[0].message.content or "" + if assistant_response_text == "": + assistant_response_text = "LLM response is empty. This is likely due to thinking block used up all tokens." + else: + assistant_response_text = self._clean_user_content_from_response( + assistant_response_text + ) + message_history.append( + {"role": "assistant", "content": assistant_response_text} + ) + elif llm_response.choices[0].finish_reason == "tool_calls": + # For tool_calls, we need to extract tool call information as text + tool_calls = llm_response.choices[0].message.tool_calls + assistant_response_text = llm_response.choices[0].message.content or "" + + # If there's no text content, we generate a text describing the tool call + if not assistant_response_text: + tool_call_descriptions = [] + for tool_call in tool_calls: + tool_call_descriptions.append( + f"Using tool {tool_call.function.name} with arguments: {tool_call.function.arguments}" + ) + assistant_response_text = "\n".join(tool_call_descriptions) + + message_history.append( + { + "role": "assistant", + "content": assistant_response_text, + "tool_calls": [ + { + "id": _.id, + "type": "function", + "function": { + "name": _.function.name, + "arguments": _.function.arguments, + }, + } + for _ in tool_calls + ], + } + ) + else: + logger.error( + f"Unsupported finish reason: {llm_response.choices[0].finish_reason}" + ) + assistant_response_text = ( + "Successful response, but unsupported finish reason: " + + llm_response.choices[0].finish_reason + ) + message_history.append( + {"role": "assistant", "content": assistant_response_text} + ) + logger.debug(f"LLM Response: {assistant_response_text}") + + return assistant_response_text, False + + def extract_tool_calls_info(self, llm_response, assistant_response_text): + """Extract tool call information from OpenAI LLM response""" + from src.utils.parsing_utils import parse_llm_response_for_tool_calls + + # For OpenAI, directly get tool calls from response object + if llm_response.choices[0].finish_reason == "tool_calls": + return parse_llm_response_for_tool_calls( + llm_response.choices[0].message.tool_calls + ) + else: + return [], [] + + def update_message_history( + self, message_history, tool_call_info, tool_calls_exceeded=False + ): + """Update message history with tool calls data (llm client specific)""" + + # Filter tool call results with type "text" + tool_call_info = [item for item in tool_call_info if item[1]["type"] == "text"] + + # Separate valid tool calls and bad tool calls + valid_tool_calls = [ + (tool_id, content) + for tool_id, content in tool_call_info + if tool_id != "FAILED" + ] + bad_tool_calls = [ + (tool_id, content) + for tool_id, content in tool_call_info + if tool_id == "FAILED" + ] + + total_calls = len(valid_tool_calls) + len(bad_tool_calls) + + # Build output text + output_parts = [] + + if total_calls > 1: + # Handling for multiple tool calls + # Add tool result description + if tool_calls_exceeded: + output_parts.append( + f"You made too many tool calls. I can only afford to process {len(valid_tool_calls)} valid tool calls in this turn." + ) + else: + output_parts.append( + f"I have processed {len(valid_tool_calls)} valid tool calls in this turn." + ) + + # Output each valid tool call result according to format + for i, (tool_id, content) in enumerate(valid_tool_calls, 1): + output_parts.append(f"Valid tool call {i} result:\n{content['text']}") + + # Output bad tool calls results + for i, (tool_id, content) in enumerate(bad_tool_calls, 1): + output_parts.append(f"Failed tool call {i} result:\n{content['text']}") + else: + # For single tool call, output result directly + for tool_id, content in valid_tool_calls: + output_parts.append(content["text"]) + for tool_id, content in bad_tool_calls: + output_parts.append(content["text"]) + + merged_text = "\n\n".join(output_parts) + + message_history.append( + { + "role": "user", + "content": [{"type": "text", "text": merged_text}], + } + ) + return message_history + + def parse_llm_response(self, llm_response) -> str: + """Parse OpenAI LLM response to get text content""" + if not llm_response or not llm_response.choices: + raise ValueError("LLM did not return a valid response.") + return llm_response.choices[0].message.content + + def _estimate_tokens(self, text: str) -> int: + """Use tiktoken to estimate token count of text""" + if not hasattr(self, "encoding"): + # Initialize tiktoken encoder + try: + self.encoding = tiktoken.get_encoding("o200k_base") + except Exception: + # If o200k_base is not available, use cl100k_base as fallback + self.encoding = tiktoken.get_encoding("cl100k_base") + + try: + return len(self.encoding.encode(text)) + except Exception: + # If encoding fails, use simple estimation: about 1 token per 4 characters + return len(text) // 4 + + def handle_max_turns_reached_summary_prompt(self, message_history, summary_prompt): + """Handle max turns reached summary prompt""" + if message_history[-1]["role"] == "user": + last_user_message = message_history.pop() + return ( + last_user_message["content"][0]["text"] + + "\n\n-----------------\n\n" + + summary_prompt + ) + else: + return summary_prompt + + def _apply_cache_control(self, messages): + """Apply cache control to the last user message and system message (if applicable)""" + cached_messages = [] + user_turns_processed = 0 + for turn in reversed(messages): + if (turn["role"] == "user" and user_turns_processed < 1) or ( + turn["role"] == "system" + ): + # Add ephemeral cache control to the text part of the last user message + new_content = [] + processed_text = False + # Check if content is a list + if isinstance(turn.get("content"), list): + # see example here + # https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching + for item in turn["content"]: + if ( + item.get("type") == "text" + and len(item.get("text")) > 0 + and not processed_text + ): + # Copy and add cache control + text_item = item.copy() + text_item["cache_control"] = {"type": "ephemeral"} + new_content.append(text_item) + processed_text = True + else: + # Other types of content (like image) copy directly + new_content.append(item.copy()) + cached_messages.append( + {"role": turn["role"], "content": new_content} + ) + else: + # If content is not a list (e.g., plain text), add as is without cache control + # Or adjust logic as needed + logger.debug( + "Warning: User message content is not in expected list format, cache control not applied." + ) + cached_messages.append(turn) + user_turns_processed += 1 + else: + # Other messages add directly + cached_messages.append(turn) + return list(reversed(cached_messages))