Skip to content

Commit 0d37bec

Browse files
authored
feat(model): add support for deepseek V3.1 as llm agent. (#86)
* add ds client support. * change config to reuse claude client for using ds from openrouter. * support deepseek V3.1 as the client. * upd: add comments. * upd: add subagent prompt to support openai-style tool calling. * upd: rename openai -> deepseek. * upd docs.
1 parent b9527ec commit 0d37bec

File tree

7 files changed

+922
-0
lines changed

7 files changed

+922
-0
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
defaults:
2+
- benchmark: gaia-validation
3+
- override hydra/job_logging: none
4+
- _self_ # Allow defining variables at the top of this file
5+
6+
7+
main_agent:
8+
prompt_class: MainAgentPromptBoxedDeepSeek
9+
llm:
10+
provider_class: "DeepSeekOpenRouterClient"
11+
model_name: "deepseek/deepseek-chat-v3.1"
12+
async_client: true
13+
temperature: 0.3
14+
top_p: 0.95
15+
min_p: 0.0
16+
top_k: -1
17+
max_tokens: 32000
18+
openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
19+
openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
20+
openrouter_provider: null
21+
disable_cache_control: false
22+
keep_tool_result: -1
23+
oai_tool_thinking: false
24+
25+
tool_config:
26+
- tool-reasoning
27+
28+
max_turns: -1 # Maximum number of turns for main agent execution
29+
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
30+
31+
input_process:
32+
hint_generation: true
33+
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
34+
output_process:
35+
final_answer_extraction: true
36+
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
37+
38+
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
39+
add_message_id: true
40+
keep_tool_result: -1
41+
chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
42+
43+
44+
sub_agents:
45+
agent-worker:
46+
prompt_class: SubAgentWorkerPromptDeepSeek
47+
llm:
48+
provider_class: "DeepSeekOpenRouterClient"
49+
model_name: "deepseek/deepseek-chat-v3.1"
50+
async_client: true
51+
temperature: 0.3
52+
top_p: 0.95
53+
min_p: 0.0
54+
top_k: -1
55+
max_tokens: 32000
56+
openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
57+
openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
58+
openrouter_provider: null
59+
disable_cache_control: false
60+
keep_tool_result: -1
61+
oai_tool_thinking: false
62+
63+
tool_config:
64+
- tool-searching
65+
- tool-image-video
66+
- tool-reading
67+
- tool-code
68+
- tool-audio
69+
70+
max_turns: -1 # Maximum number of turns for main agent execution
71+
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
72+
73+
74+
# Can define some top-level or default parameters here
75+
output_dir: logs/
76+
data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
77+
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
defaults:
2+
- benchmark: example_dataset
3+
- override hydra/job_logging: none
4+
- _self_ # Allow defining variables at the top of this file
5+
6+
7+
main_agent:
8+
prompt_class: MainAgentPromptBoxedDeepSeek
9+
llm:
10+
provider_class: "DeepSeekOpenRouterClient"
11+
model_name: "deepseek/deepseek-chat-v3.1" # Available DeepSeek models via OpenRouter
12+
async_client: true
13+
temperature: 0.3
14+
top_p: 0.95
15+
min_p: 0.0
16+
top_k: -1
17+
max_tokens: 32000
18+
openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
19+
openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
20+
openrouter_provider: null
21+
disable_cache_control: false
22+
keep_tool_result: -1
23+
oai_tool_thinking: false
24+
25+
tool_config:
26+
- tool-reading
27+
- tool-searching
28+
29+
max_turns: -1 # Maximum number of turns for main agent execution
30+
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
31+
32+
input_process:
33+
hint_generation: false
34+
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
35+
output_process:
36+
final_answer_extraction: false
37+
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
38+
39+
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
40+
add_message_id: true
41+
keep_tool_result: -1
42+
chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
43+
44+
45+
sub_agents: null
46+
47+
48+
# Can define some top-level or default parameters here
49+
output_dir: logs/
50+
data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
from config.agent_prompts.base_agent_prompt import BaseAgentPrompt
2+
import datetime
3+
from typing import Any
4+
5+
6+
class MainAgentPromptBoxedDeepSeek(BaseAgentPrompt):
7+
"""
8+
Adapted from MainAgentPromptBoxedAnswer. Since the tool-use is DeepSeek format, we remove the <use_mcp_tool> tags and its corresponding format instructions.
9+
"""
10+
11+
def __init__(self, *args, **kwargs):
12+
super().__init__(*args, **kwargs)
13+
self.is_main_agent = True
14+
15+
def generate_system_prompt_with_mcp_tools(
16+
self, mcp_servers: list[Any], chinese_context: bool = False
17+
) -> str:
18+
formatted_date = datetime.datetime.today().strftime("%Y-%m-%d")
19+
20+
# Basic system prompt
21+
prompt = f"""In this environment you have access to a set of tools you can use to answer the user's question.
22+
23+
You only have access to the tools provided below. You can only use one tool per message, and will receive the result of that tool in the user's next response. You use tools step-by-step to accomplish a given task, with each tool-use informed by the result of the previous tool-use. Today is: {formatted_date}
24+
25+
"""
26+
27+
# Add MCP servers section
28+
if mcp_servers and len(mcp_servers) > 0:
29+
for server in mcp_servers:
30+
prompt += f"## Server name: {server['name']}\n"
31+
32+
if "tools" in server and len(server["tools"]) > 0:
33+
for tool in server["tools"]:
34+
# Skip tools that failed to load (they only have 'error' key)
35+
if "error" in tool and "name" not in tool:
36+
continue
37+
prompt += f"### Tool name: {tool['name']}\n"
38+
prompt += f"Description: {tool['description']}\n"
39+
prompt += f"Input JSON schema: {tool['schema']}\n"
40+
41+
# Add the full objective system prompt
42+
prompt += """
43+
# General Objective
44+
45+
You accomplish a given task iteratively, breaking it down into clear steps and working through them methodically.
46+
47+
## Task Strategy
48+
49+
1. Analyze the user's request and set clear, achievable sub-goals. Prioritize these sub-goals in a logical order.
50+
2. Start with a concise, numbered, step-by-step plan (e.g., 1., 2., 3.) outlining how you will solve the task before taking any action. Each sub-goal should correspond to a distinct step in your task-solving process.
51+
3. Work through these sub-goals sequentially. After each step, carefully review and extract all potentially relevant information, details, or implications from the tool result before proceeding. The user may provide tool-use feedback, reflect on the results, and revise your plan if needed. If you encounter new information or challenges, adjust your approach accordingly. Revisit previous steps to ensure earlier sub-goals or clues have not been overlooked or missed.
52+
4. You have access to a wide range of powerful tools. Use them strategically to accomplish each sub-goal.
53+
54+
## Tool-Use Guidelines
55+
56+
1. **IMPORTANT: Each step must involve exactly ONE tool call only, unless the task is already solved. You are strictly prohibited from making multiple tool calls in a single response.**
57+
2. Before each tool call:
58+
- Briefly summarize and analyze what is currently known.
59+
- Identify what is missing, uncertain, or unreliable.
60+
- Be concise; do not repeat the same analysis across steps.
61+
- Choose the most relevant tool for the current sub-goal, and explain why this tool is necessary at this point.
62+
- Verify whether all required parameters are either explicitly provided or can be clearly and reasonably inferred from context.
63+
- Do not guess or use placeholder values for missing inputs.
64+
- Skip optional parameters unless they are explicitly specified.
65+
3. All tool queries must include full, self-contained context. Tools do not retain memory between calls. Include all relevant information from earlier steps in each query.
66+
4. Avoid broad, vague, or speculative queries. Every tool call should aim to retrieve new, actionable information that clearly advances the task.
67+
5. **For historical or time-specific content**: Regular search engines return current webpage content, not historical content. Archived webpage search is essential for retrieving content as it appeared in the past, use related tools to search for the historical content.
68+
6. Even if a tool result does not directly answer the question, thoroughly extract and summarize all partial information, important details, patterns, constraints, or keywords that may help guide future steps. Never proceed to the next step without first ensuring that all significant insights from the current result have been fully considered.
69+
70+
## Tool-Use Communication Rules
71+
72+
1. **CRITICAL: After issuing exactly ONE tool call, STOP your response immediately. You must never make multiple tool calls in a single response. Do not include tool results, do not assume what the results will be, and do not continue with additional analysis or tool calls. The user will provide the actual tool results in their next message.**
73+
2. Do not present the final answer until the entire task is complete.
74+
3. Do not mention tool names.
75+
4. Do not engage in unnecessary back-and-forth or end with vague offers of help. Do not end your responses with questions or generic prompts.
76+
5. Do not use tools that do not exist.
77+
6. Unless otherwise requested, respond in the same language as the user's message.
78+
7. If the task does not require tool use, answer the user directly.
79+
80+
"""
81+
82+
# Add Chinese-specific instructions if enabled
83+
if chinese_context:
84+
prompt += """
85+
## 中文语境处理指导
86+
87+
当处理中文相关的任务时:
88+
1. **子任务委托 (Subtask Delegation)**:向worker代理委托的子任务应使用中文描述,确保任务内容准确传达
89+
2. **搜索策略 (Search Strategy)**:搜索关键词应使用中文,以获取更准确的中文内容和信息
90+
3. **问题分析 (Question Analysis)**:对中文问题的分析和理解应保持中文语境
91+
4. **思考过程 (Thinking Process)**:内部分析、推理、总结等思考过程都应使用中文,保持语义表达的一致性
92+
5. **信息整理 (Information Organization)**:从中文资源获取的信息应保持中文原文,避免不必要的翻译
93+
6. **各种输出 (All Outputs)**:所有输出内容包括步骤说明、状态更新、中间结果等都应使用中文
94+
7. **最终答案 (Final Answer)**:对于中文语境的问题,最终答案应使用中文回应
95+
96+
"""
97+
98+
return prompt
99+
100+
def generate_summarize_prompt(
101+
self,
102+
task_description: str,
103+
task_failed: bool = False,
104+
chinese_context: bool = False,
105+
) -> str:
106+
summarize_prompt = (
107+
(
108+
"============="
109+
"============="
110+
"============="
111+
"This is a direct instruction to you (the assistant), not the result of a tool call.\n\n"
112+
)
113+
+ (
114+
"**Important: You have either exhausted the context token limit or reached the maximum number of interaction turns without arriving at a conclusive answer. Therefore, you failed to complete the task. You Must explicitly state that you failed to complete the task in your response.**\n\n"
115+
if task_failed
116+
else ""
117+
)
118+
+ (
119+
"We are now ending this session, and your conversation history will be deleted. "
120+
"You must NOT initiate any further tool use. This is your final opportunity to report "
121+
"*all* of the information gathered during the session.\n\n"
122+
"Summarize the above conversation, and output the FINAL ANSWER to the original question.\n\n"
123+
"If a clear answer has already been provided earlier in the conversation, do not rethink or recalculate it — "
124+
"simply extract that answer and reformat it to match the required format below.\n"
125+
"If a definitive answer could not be determined, make a well-informed educated guess based on the conversation.\n\n"
126+
"The original question is repeated here for reference:\n\n"
127+
f"---\n{task_description}\n---\n\n"
128+
"Summarize ALL working history for this task, including your step-by-step thoughts, all tool calls, and all tool results (i.e., the full solving trajectory so far).\n"
129+
"Output the FINAL ANSWER and detailed supporting information of the task given to you.\n\n"
130+
"If you found any useful facts, data, or quotes directly relevant to the original task, include them clearly and completely.\n"
131+
"**Document the sources**: For each key fact or claim in your answer, mention which sources it came from and whether multiple sources confirmed it. If sources disagreed, explain the different viewpoints found.\n"
132+
"If you reached a conclusion or answer, include it as part of the response.\n"
133+
"If the task could not be fully answered, return all partially relevant findings, search results, quotes, and observations that might help a downstream agent solve the problem.\n"
134+
"If partial, conflicting, or inconclusive information was found, clearly indicate this in your response.\n\n"
135+
"Your final response should be a clear, complete, and structured report.\n"
136+
"Organize the content into logical sections with appropriate headings.\n"
137+
"Do NOT include any tool call instructions, speculative filler, or vague summaries.\n"
138+
"Focus on factual, specific, and well-organized information."
139+
"Output the final answer in the format: \\boxed{...}. The boxed answer should be a short phrase or a comma-separated list of numbers and/or strings."
140+
)
141+
)
142+
143+
# Add Chinese-specific summary instructions
144+
if chinese_context:
145+
summarize_prompt += """
146+
147+
## 中文总结要求
148+
149+
如果原始问题涉及中文语境:
150+
- **总结语言**:使用中文进行总结和回答
151+
- **思考过程**:回顾和总结思考过程时也应使用中文表达
152+
- **信息组织**:保持中文信息的原始格式和表达方式
153+
- **过程描述**:对工作历史、步骤描述、结果分析等各种输出都应使用中文
154+
- **最终答案**:确保最终答案符合中文表达习惯和用户期望
155+
"""
156+
return summarize_prompt

0 commit comments

Comments
 (0)