diff --git a/README.md b/README.md
index 057d8ed..8035078 100644
--- a/README.md
+++ b/README.md
@@ -7,12 +7,11 @@
-[](https://miromindai.github.io/MiroFlow/)
[](https://dr.miromind.ai/)
[](https://huggingface.co/collections/miromind-ai/mirothinker-v02-68af084a18035f57b17cd902)
[](https://huggingface.co/datasets/miromind-ai/MiroVerse-v0.1)
-
[](https://miromind.ai/blog/miroflow)
+
[](https://github.com/MiroMindAI)
[](https://discord.com/invite/GPqEnkzQZd)
[](https://huggingface.co/datasets/miromind-ai/MiroFlow-Benchmarks/resolve/main/assets/wechat.png)
@@ -22,7 +21,9 @@
-### π [Try our Demo!](https://dr.miromind.ai/)ο½[δΈζ](README_zh.md)ο½[ζ₯ζ¬θͺ](README_ja.md)
+## π **[READ THE DOCUMENTATION](https://miromindai.github.io/MiroFlow/)**
+
+### π [Try Demo](https://dr.miromind.ai/) ο½ [δΈζ](README_zh.md) ο½ [ζ₯ζ¬θͺ](README_ja.md)
diff --git a/config/agent_browsecomp-en_claude37sonnet.yaml b/config/agent_browsecomp-en_claude37sonnet.yaml
new file mode 100644
index 0000000..baa3e15
--- /dev/null
+++ b/config/agent_browsecomp-en_claude37sonnet.yaml
@@ -0,0 +1,78 @@
+defaults:
+ - benchmark: browsecomp-en
+ - override hydra/job_logging: none
+ - _self_ # Allow defining variables at the top of this file
+
+
+main_agent:
+ prompt_class: MainAgentPrompt_GAIA
+ llm:
+ provider_class: "ClaudeOpenRouterClient"
+ model_name: "anthropic/claude-3.7-sonnet"
+ async_client: true
+ temperature: 0.3
+ top_p: 0.95
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 32000
+ openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
+ openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
+ openrouter_provider: "anthropic"
+ disable_cache_control: false
+ keep_tool_result: -1
+ oai_tool_thinking: false
+
+ tool_config:
+ - tool-reasoning
+
+ max_turns: 50 # Maximum number of turns for main agent execution
+ max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
+
+ input_process:
+ hint_generation: true
+ hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+ output_process:
+ final_answer_extraction: true
+ final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+ add_message_id: true
+ keep_tool_result: -1
+ chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents:
+ agent-worker:
+ prompt_class: SubAgentWorkerPrompt
+ llm:
+ provider_class: "ClaudeOpenRouterClient"
+ model_name: "anthropic/claude-3.7-sonnet"
+ async_client: true
+ temperature: 0.3
+ top_p: 0.95
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 32000
+ openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
+ openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
+ openrouter_provider: "anthropic"
+ disable_cache_control: false
+ keep_tool_result: -1
+ oai_tool_thinking: false
+
+ tool_config:
+ - tool-searching
+ - tool-image-video
+ - tool-reading
+ - tool-code
+ - tool-audio
+
+ max_turns: 50 # Maximum number of turns for main agent execution
+ max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
+
+
diff --git a/config/agent_mirothinker.yaml b/config/agent_browsecomp-en_mirothinker.yaml
similarity index 75%
rename from config/agent_mirothinker.yaml
rename to config/agent_browsecomp-en_mirothinker.yaml
index 709eeed..63bca34 100644
--- a/config/agent_mirothinker.yaml
+++ b/config/agent_browsecomp-en_mirothinker.yaml
@@ -1,35 +1,37 @@
defaults:
- - benchmark: gaia-validation
+ - benchmark: browsecomp-en
- override hydra/job_logging: none
- _self_ # Allow defining variables at the top of this file
main_agent:
- prompt_class: MainAgentPromptBoxedAnswer
+ prompt_class: MainAgentPrompt_GAIA
llm:
provider_class: "MiroThinkerSGLangClient"
- model_name: "MODEL_NAME"
+ model_name: "DUMMY_MODEL_NAME"
async_client: true
- temperature: 0.6
+ temperature: 0.3
top_p: 0.95
min_p: 0.0
top_k: -1
- max_tokens: 8192
+ max_tokens: 4096
oai_mirothinker_api_key: "${oc.env:OAI_MIROTHINKER_API_KEY,dummy_key}"
oai_mirothinker_base_url: "${oc.env:OAI_MIROTHINKER_BASE_URL,http://localhost:61005/v1}"
keep_tool_result: -1
oai_tool_thinking: false
- tool_config: []
+ tool_config:
+ - tool-reasoning
- max_turns: -1 # Maximum number of turns for main agent execution
+ max_turns: 50 # Maximum number of turns for main agent execution
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
input_process:
hint_generation: false
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+
output_process:
- final_answer_extraction: false
+ final_answer_extraction: true
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
@@ -43,25 +45,31 @@ sub_agents:
prompt_class: SubAgentWorkerPrompt
llm:
provider_class: "MiroThinkerSGLangClient"
- model_name: "MODEL_NAME"
+ model_name: "DUMMY_MODEL_NAME"
async_client: true
- temperature: 0.6
- top_p: 0.95
+ temperature: 0.3
+ top_p: 1.0
min_p: 0.0
top_k: -1
- max_tokens: 8192
+ max_tokens: 4096
oai_mirothinker_api_key: "${oc.env:OAI_MIROTHINKER_API_KEY,dummy_key}"
oai_mirothinker_base_url: "${oc.env:OAI_MIROTHINKER_BASE_URL,http://localhost:61005/v1}"
keep_tool_result: -1
oai_tool_thinking: false
tool_config:
+ - tool-searching
+ - tool-image-video
- tool-reading
+ - tool-code
+ - tool-audio
- max_turns: -1 # Maximum number of turns for main agent execution
+ max_turns: 50 # Maximum number of turns for main agent execution
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
# Can define some top-level or default parameters here
output_dir: logs/
-data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
\ No newline at end of file
+data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
+
+
diff --git a/config/agent_finsearchcomp_claude37sonnet.yaml b/config/agent_finsearchcomp_claude37sonnet.yaml
new file mode 100644
index 0000000..70cc22b
--- /dev/null
+++ b/config/agent_finsearchcomp_claude37sonnet.yaml
@@ -0,0 +1,77 @@
+defaults:
+ - benchmark: finsearchcomp
+ - override hydra/job_logging: none
+ - _self_ # Allow defining variables at the top of this file
+
+
+main_agent:
+ prompt_class: MainAgentPrompt_GAIA
+ llm:
+ provider_class: "ClaudeOpenRouterClient"
+ model_name: "anthropic/claude-3.7-sonnet"
+ async_client: true
+ temperature: 0.6
+ top_p: 0.95
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 32000
+ openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
+ openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
+ openrouter_provider: "anthropic"
+ disable_cache_control: false
+ keep_tool_result: -1
+ oai_tool_thinking: false
+
+ tool_config:
+ - tool-reasoning
+
+ max_turns: 20 # Maximum number of turns for main agent execution
+ max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
+
+ input_process:
+ hint_generation: true
+ hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+ output_process:
+ final_answer_extraction: true
+ final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+ add_message_id: true
+ keep_tool_result: -1
+ chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents:
+ agent-worker:
+ prompt_class: SubAgentWorkerPrompt
+ llm:
+ provider_class: "ClaudeOpenRouterClient"
+ model_name: "anthropic/claude-3.7-sonnet"
+ async_client: true
+ temperature: 0.6
+ top_p: 0.95
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 32000
+ openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
+ openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
+ openrouter_provider: "anthropic"
+ disable_cache_control: false
+ keep_tool_result: -1
+ oai_tool_thinking: false
+
+ tool_config:
+ - tool-searching
+ - tool-image-video
+ - tool-reading
+ - tool-code
+ - tool-audio
+
+ max_turns: 20 # Maximum number of turns for main agent execution
+ max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
+
+
diff --git a/config/agent_finsearchcomp.yaml b/config/agent_finsearchcomp_mirothinker.yaml
similarity index 96%
rename from config/agent_finsearchcomp.yaml
rename to config/agent_finsearchcomp_mirothinker.yaml
index 16225e6..df0b56c 100644
--- a/config/agent_finsearchcomp.yaml
+++ b/config/agent_finsearchcomp_mirothinker.yaml
@@ -8,7 +8,7 @@ main_agent:
prompt_class: MainAgentPrompt_GAIA
llm:
provider_class: "MiroThinkerSGLangClient"
- model_name: "MODEL_NAME"
+ model_name: "DUMMY_MODEL_NAME"
async_client: true
temperature: 0.6
top_p: 0.95
@@ -44,7 +44,7 @@ sub_agents:
prompt_class: SubAgentWorkerPrompt
llm:
provider_class: "MiroThinkerSGLangClient"
- model_name: "MODEL_NAME"
+ model_name: "DUMMY_MODEL_NAME"
async_client: true
temperature: 0.6
top_p: 0.95
@@ -69,3 +69,5 @@ sub_agents:
# Can define some top-level or default parameters here
output_dir: logs/
data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
+
+
diff --git a/config/agent_gaia-validation-text-only_mirothinker.yaml b/config/agent_gaia-validation-text-only_mirothinker.yaml
index 4faec6a..a813115 100644
--- a/config/agent_gaia-validation-text-only_mirothinker.yaml
+++ b/config/agent_gaia-validation-text-only_mirothinker.yaml
@@ -8,7 +8,7 @@ main_agent:
prompt_class: MainAgentPrompt_GAIA
llm:
provider_class: "MiroThinkerSGLangClient"
- model_name: "MODEL_NAME"
+ model_name: "DUMMY_MODEL_NAME"
async_client: true
temperature: 0.3
top_p: 0.95
@@ -45,7 +45,7 @@ sub_agents:
prompt_class: SubAgentWorkerPrompt
llm:
provider_class: "MiroThinkerSGLangClient"
- model_name: "anthropic/claude-3.7-sonnet"
+ model_name: "DUMMY_MODEL_NAME"
async_client: true
temperature: 0.3
top_p: 1.0
diff --git a/config/agent_gaia-validation-text-only_mirothinker_single_agent.yaml b/config/agent_gaia-validation-text-only_mirothinker_single_agent.yaml
new file mode 100644
index 0000000..b9e92e6
--- /dev/null
+++ b/config/agent_gaia-validation-text-only_mirothinker_single_agent.yaml
@@ -0,0 +1,53 @@
+defaults:
+ - benchmark: gaia-validation-text-only
+ - override hydra/job_logging: none
+ - _self_ # Allow defining variables at the top of this file
+
+
+main_agent:
+ prompt_class: MainAgentPrompt_GAIA
+ llm:
+ provider_class: "MiroThinkerSGLangClient"
+ model_name: "MODEL_NAME"
+ async_client: true
+ temperature: 0.3
+ top_p: 0.95
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 4096
+ oai_mirothinker_api_key: "${oc.env:OAI_MIROTHINKER_API_KEY,dummy_key}"
+ oai_mirothinker_base_url: "${oc.env:OAI_MIROTHINKER_BASE_URL,http://localhost:61005/v1}"
+ keep_tool_result: -1
+ oai_tool_thinking: false
+
+ tool_config:
+ - tool-reasoning
+ - tool-searching
+ - tool-image-video
+ - tool-reading
+ - tool-code
+ - tool-audio
+
+ max_turns: 50 # Maximum number of turns for main agent execution
+ max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
+
+ input_process:
+ hint_generation: false
+ hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+
+ output_process:
+ final_answer_extraction: true
+ final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+ add_message_id: true
+ keep_tool_result: -1
+ chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
+
diff --git a/config/agent_llm_claude37sonnet.yaml b/config/agent_llm_claude37sonnet.yaml
new file mode 100644
index 0000000..59261b7
--- /dev/null
+++ b/config/agent_llm_claude37sonnet.yaml
@@ -0,0 +1,53 @@
+defaults:
+ - benchmark: example_dataset
+ - override hydra/job_logging: none
+ - _self_ # Allow defining variables at the top of this file
+
+
+main_agent:
+ prompt_class: MainAgentPromptBoxedAnswer
+ llm:
+ provider_class: "ClaudeOpenRouterClient"
+ model_name: "anthropic/claude-3.7-sonnet"
+ async_client: true
+ temperature: 0.3
+ top_p: 0.95
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 32000
+ openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
+ openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
+ openrouter_provider: "anthropic"
+ disable_cache_control: false
+ keep_tool_result: -1
+ oai_tool_thinking: false
+
+ tool_config:
+ - tool-reading
+ - tool-searching
+
+ max_turns: 20 # Maximum number of turns for main agent execution
+ max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
+
+ input_process:
+ hint_generation: false
+ hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+ output_process:
+ final_answer_extraction: false
+ final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+ add_message_id: true
+ keep_tool_result: -1
+ chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
+
+
+
diff --git a/config/agent_llm_claude37sonnet_anthropic.yaml b/config/agent_llm_claude37sonnet_anthropic.yaml
new file mode 100644
index 0000000..a57851e
--- /dev/null
+++ b/config/agent_llm_claude37sonnet_anthropic.yaml
@@ -0,0 +1,51 @@
+defaults:
+ - benchmark: example_dataset
+ - override hydra/job_logging: none
+ - _self_ # Allow defining variables at the top of this file
+
+
+main_agent:
+ prompt_class: MainAgentPromptBoxedAnswer
+ llm:
+ provider_class: "ClaudeAnthropicClient"
+ model_name: "claude-3-7-sonnet-20250219"
+ async_client: true
+ temperature: 0.3
+ top_p: 0.95
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 16000
+ anthropic_api_key: "${oc.env:ANTHROPIC_API_KEY,???}"
+ anthropic_base_url: "${oc.env:ANTHROPIC_BASE_URL,https://api.anthropic.com}"
+ disable_cache_control: false
+ keep_tool_result: -1
+ oai_tool_thinking: false
+
+ tool_config:
+ - tool-reading
+ - tool-searching
+
+ max_turns: 20 # Maximum number of turns for main agent execution
+ max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
+
+ input_process:
+ hint_generation: false
+ hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+ output_process:
+ final_answer_extraction: false
+ final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+ add_message_id: true
+ keep_tool_result: -1
+ chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
+
+
diff --git a/config/agent_llm_gpt4o.yaml b/config/agent_llm_gpt4o.yaml
new file mode 100644
index 0000000..a94656a
--- /dev/null
+++ b/config/agent_llm_gpt4o.yaml
@@ -0,0 +1,50 @@
+defaults:
+ - benchmark: example_dataset
+ - override hydra/job_logging: none
+ - _self_ # Allow defining variables at the top of this file
+
+
+main_agent:
+ prompt_class: MainAgentPromptBoxedAnswer
+ llm:
+ provider_class: "GPTOpenAIClient"
+ model_name: "gpt-4o"
+ async_client: true
+ temperature: 0.7
+ top_p: 1.0
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 4096
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+ openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
+ keep_tool_result: -1
+ oai_tool_thinking: false
+
+ tool_config:
+ - tool-reading
+ - tool-searching
+
+ max_turns: 20 # Maximum number of turns for main agent execution
+ max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
+
+ input_process:
+ hint_generation: false
+ hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+ output_process:
+ final_answer_extraction: false
+ final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+ add_message_id: true
+ keep_tool_result: -1
+ chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
+
+
diff --git a/config/agent_llm_gpt5.yaml b/config/agent_llm_gpt5.yaml
new file mode 100644
index 0000000..5223b77
--- /dev/null
+++ b/config/agent_llm_gpt5.yaml
@@ -0,0 +1,51 @@
+defaults:
+ - benchmark: example_dataset
+ - override hydra/job_logging: none
+ - _self_ # Allow defining variables at the top of this file
+
+
+main_agent:
+ prompt_class: MainAgentPromptBoxedAnswer
+ llm:
+ provider_class: "GPT5OpenAIClient"
+ model_name: "gpt-5"
+ async_client: true
+ temperature: 1.0
+ top_p: 1.0
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 4096
+ reasoning_effort: "high" # Use high in the main agent, and use the default medium in the sub-agent.
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+ openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
+ keep_tool_result: -1
+ oai_tool_thinking: false
+
+ tool_config:
+ - tool-reading
+ - tool-searching
+
+ max_turns: 20 # Maximum number of turns for main agent execution
+ max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
+
+ input_process:
+ hint_generation: false
+ hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+ output_process:
+ final_answer_extraction: false
+ final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+ add_message_id: true
+ keep_tool_result: -1
+ chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
+
+
diff --git a/config/agent_llm_mirothinker.yaml b/config/agent_llm_mirothinker.yaml
new file mode 100644
index 0000000..afc855b
--- /dev/null
+++ b/config/agent_llm_mirothinker.yaml
@@ -0,0 +1,49 @@
+defaults:
+ - benchmark: example_dataset
+ - override hydra/job_logging: none
+ - _self_ # Allow defining variables at the top of this file
+
+
+main_agent:
+ prompt_class: MainAgentPromptBoxedAnswer
+ llm:
+ provider_class: "MiroThinkerSGLangClient"
+ model_name: "DUMMY_MODEL_NAME"
+ async_client: true
+ temperature: 0.6
+ top_p: 0.95
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 8192
+ oai_mirothinker_api_key: "${oc.env:OAI_MIROTHINKER_API_KEY,dummy_key}"
+ oai_mirothinker_base_url: "${oc.env:OAI_MIROTHINKER_BASE_URL,http://localhost:61005/v1}"
+ keep_tool_result: -1
+ oai_tool_thinking: false
+
+ tool_config:
+ - tool-reading
+ - tool-searching
+
+ max_turns: 20 # Maximum number of turns for main agent execution
+ max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
+
+ input_process:
+ hint_generation: false
+ hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+ output_process:
+ final_answer_extraction: false
+ final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+ add_message_id: true
+ keep_tool_result: -1
+ chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
+
diff --git a/config/agent_quickstart_reading.yaml b/config/agent_quickstart_reading.yaml
index e016162..ed3b69f 100644
--- a/config/agent_quickstart_reading.yaml
+++ b/config/agent_quickstart_reading.yaml
@@ -1,5 +1,5 @@
defaults:
- - benchmark: gaia-validation
+ - benchmark: example_dataset
- override hydra/job_logging: none
- _self_ # Allow defining variables at the top of this file
diff --git a/config/agent_quickstart_search.yaml b/config/agent_quickstart_search.yaml
index 96df9e5..a027d2b 100644
--- a/config/agent_quickstart_search.yaml
+++ b/config/agent_quickstart_search.yaml
@@ -1,5 +1,5 @@
defaults:
- - benchmark: gaia-validation
+ - benchmark: example_dataset
- override hydra/job_logging: none
- _self_ # Allow defining variables at the top of this file
diff --git a/config/agent_quickstart_single_agent.yaml b/config/agent_quickstart_single_agent.yaml
index 068da69..39d7068 100644
--- a/config/agent_quickstart_single_agent.yaml
+++ b/config/agent_quickstart_single_agent.yaml
@@ -1,5 +1,5 @@
defaults:
- - benchmark: gaia-validation
+ - benchmark: example_dataset
- override hydra/job_logging: none
- _self_ # Allow defining variables at the top of this file
diff --git a/config/agent_xbench-ds.yaml b/config/agent_xbench-ds_claude37sonnet.yaml
similarity index 99%
rename from config/agent_xbench-ds.yaml
rename to config/agent_xbench-ds_claude37sonnet.yaml
index 6b5213f..aeaac08 100644
--- a/config/agent_xbench-ds.yaml
+++ b/config/agent_xbench-ds_claude37sonnet.yaml
@@ -75,3 +75,4 @@ sub_agents:
output_dir: logs/
data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
+
diff --git a/config/benchmark/browsecomp-en.yaml b/config/benchmark/browsecomp-en.yaml
new file mode 100644
index 0000000..98c32bb
--- /dev/null
+++ b/config/benchmark/browsecomp-en.yaml
@@ -0,0 +1,20 @@
+# config/benchmark/browsecomp-en.yaml
+defaults:
+ - default
+ - _self_
+
+name: "browsecomp-en"
+
+data:
+ data_dir: "${data_dir}/browsecomp-test" # Path to browsecomp-test (English) dataset
+ metadata_file: "standardized_data.jsonl" # Metadata filename
+ whitelist: [] # Optional: List of specific task_ids to run
+
+execution:
+ max_tasks: null # null = no limit, or specify a number
+ max_concurrent: 5 # Number of parallel tasks
+ pass_at_k: 1 # Number of attempts per task
+
+# OpenAI API key for evaluation (required for browsecomp since it has ground truth)
+openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+
diff --git a/config/benchmark/example_dataset.yaml b/config/benchmark/example_dataset.yaml
new file mode 100644
index 0000000..36eee7e
--- /dev/null
+++ b/config/benchmark/example_dataset.yaml
@@ -0,0 +1,21 @@
+# config/benchmark/example_dataset.yaml
+defaults:
+ - default
+ - _self_
+
+name: "example_dataset"
+
+data:
+ data_dir: "${data_dir}/example_dataset" # Path to example_dataset
+ metadata_file: "standardized_data.jsonl" # Metadata filename
+ whitelist: [] # Optional: List of specific task_ids to run
+
+execution:
+ max_tasks: null # null = no limit, or specify a number
+ max_concurrent: 5 # Number of parallel tasks
+ pass_at_k: 1 # Number of attempts per task
+
+# OpenAI API key for evaluation (required for example_dataset since it has ground truth)
+openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+
+
diff --git a/docs/mkdocs/docs/browsecomp_en.md b/docs/mkdocs/docs/browsecomp_en.md
new file mode 100644
index 0000000..cdde3c6
--- /dev/null
+++ b/docs/mkdocs/docs/browsecomp_en.md
@@ -0,0 +1,91 @@
+# BrowseComp-EN (English)
+
+MiroFlow's evaluation on the BrowseComp-EN benchmark demonstrates advanced web browsing and information retrieval capabilities.
+
+More details: [BrowseComp: A Simple Yet Challenging Benchmark for Browsing Agents](https://arxiv.org/abs/2504.12516)
+
+---
+
+## Dataset Overview
+
+!!! abstract "Key Dataset Characteristics"
+
+ - **Total Tasks**: 1,266 tasks in the test split
+ - **Language**: English
+ - **Task Types**: Web browsing, search, and information retrieval
+ - **Evaluation**: Automated comparison with ground truth answers
+
+---
+
+## Quick Start Guide
+
+### Step 1: Prepare the BrowseComp-EN Dataset
+
+```bash title="Download BrowseComp-EN Dataset"
+uv run main.py prepare-benchmark get browsecomp-test
+```
+
+This will create the standardized dataset at `data/browsecomp-test/standardized_data.jsonl`.
+
+!!! warning "Requires HuggingFace Token"
+ Add your HuggingFace token to `.env`: `HF_TOKEN="your_token_here"`
+
+### Step 2: Configure API Keys
+
+```env title=".env Configuration"
+# Search and web scraping
+SERPER_API_KEY="xxx"
+JINA_API_KEY="xxx"
+
+# Code execution
+E2B_API_KEY="xxx"
+
+# LLM (Claude 3.7 Sonnet via OpenRouter)
+OPENROUTER_API_KEY="xxx"
+OPENROUTER_BASE_URL="https://openrouter.ai/api/v1"
+
+# Evaluation and hint generation
+OPENAI_API_KEY="xxx"
+
+# Vision capabilities
+ANTHROPIC_API_KEY="xxx"
+GEMINI_API_KEY="xxx"
+```
+
+### Step 3: Run the Evaluation
+
+```bash title="Run BrowseComp-EN Evaluation"
+uv run main.py common-benchmark --config_file_name=agent_browsecomp-en_claude37sonnet benchmark=browsecomp-en output_dir="logs/browsecomp-en/$(date +"%Y%m%d_%H%M")"
+```
+
+Results are automatically generated in the output directory:
+- `benchmark_results.jsonl` - Detailed results for each task
+- `benchmark_results_pass_at_1_accuracy.txt` - Summary accuracy statistics
+
+---
+
+## Usage Examples
+
+```bash title="Limited Task Testing"
+# Test with 10 tasks only
+uv run main.py common-benchmark --config_file_name=agent_browsecomp-en_claude37sonnet benchmark=browsecomp-en benchmark.execution.max_tasks=10 output_dir="logs/browsecomp-en/$(date +"%Y%m%d_%H%M")"
+```
+
+```bash title="Using MiroThinker Model"
+uv run main.py common-benchmark --config_file_name=agent_browsecomp-en_mirothinker benchmark=browsecomp-en output_dir="logs/browsecomp-en/$(date +"%Y%m%d_%H%M")"
+```
+
+---
+
+## Available Agent Configurations
+
+| Agent Configuration | Model | Use Case |
+|-------------------|-------|----------|
+| `agent_browsecomp-en_claude37sonnet` | Claude 3.7 Sonnet | Recommended for better performance |
+| `agent_browsecomp-en_mirothinker` | MiroThinker | For local deployment |
+
+---
+
+!!! info "Documentation Info"
+ **Last Updated:** October 2025 Β· **Doc Contributor:** Team @ MiroMind AI
+
diff --git a/docs/mkdocs/docs/claude-3.7-sonnet.md b/docs/mkdocs/docs/claude-3.7-sonnet.md
index 4d71679..d875683 100644
--- a/docs/mkdocs/docs/claude-3.7-sonnet.md
+++ b/docs/mkdocs/docs/claude-3.7-sonnet.md
@@ -20,18 +20,33 @@ main_agent:
llm:
provider_class: "ClaudeAnthropicClient"
model_name: "claude-3-7-sonnet-20250219" # Use actual model name from Anthropic API
+ async_client: true
+ temperature: 0.3
+ top_p: 0.95
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 32000
anthropic_api_key: "${oc.env:ANTHROPIC_API_KEY,???}"
anthropic_base_url: "${oc.env:ANTHROPIC_BASE_URL,https://api.anthropic.com}"
+ disable_cache_control: false
+ keep_tool_result: -1
+ oai_tool_thinking: false
```
+!!! tip "Sampling Parameters"
+ - `min_p` and `top_k` are required in the configuration
+ - Anthropic API natively supports `top_k`, but `min_p` is not used by the API
+ - Set `min_p: 0.0` (disabled) and `top_k: -1` (disabled) or a specific value like `top_k: 40`
+
## Usage
```bash title="Example Command"
-# Use existing config
-uv run main.py trace --config_file_name=your_config_file \
- --task="Your task" --task_file_name="data/file.txt"
+# Run with Claude 3.7 Sonnet (Anthropic SDK) on example dataset
+uv run main.py common-benchmark --config_file_name=agent_llm_claude37sonnet_anthropic output_dir="logs/test"
```
+The `agent_llm_claude37sonnet_anthropic.yaml` configuration file provides a ready-to-use setup with the example dataset benchmark.
+
---
!!! info "Documentation Info"
diff --git a/docs/mkdocs/docs/finsearchcomp.md b/docs/mkdocs/docs/finsearchcomp.md
index 925909e..8d2556e 100644
--- a/docs/mkdocs/docs/finsearchcomp.md
+++ b/docs/mkdocs/docs/finsearchcomp.md
@@ -2,7 +2,7 @@
MiroFlow's evaluation on the FinSearchComp benchmark demonstrates capabilities in financial information search and analysis tasks, showcasing advanced reasoning abilities in complex financial research scenarios.
-More details: [FinSearchComp Dataset](https://huggingface.co/datasets/ByteSeedXpert/FinSearchComp)
+More details: [FinSearchComp: Towards a Realistic, Expert-Level Evaluation of Financial Search and Reasoning](https://arxiv.org/abs/2509.13160)
---
@@ -59,9 +59,9 @@ JINA_API_KEY="xxx"
# For Linux sandbox (code execution environment)
E2B_API_KEY="xxx"
-# We use MiroThinker model for financial analysis
-OAI_MIROTHINKER_API_KEY="xxx"
-OAI_MIROTHINKER_BASE_URL="http://localhost:61005/v1"
+# We use Claude 3.7 Sonnet for financial analysis via OpenRouter
+OPENROUTER_API_KEY="xxx"
+OPENROUTER_BASE_URL="https://openrouter.ai/api/v1"
# Used for hint generation and final answer extraction
OPENAI_API_KEY="xxx"
@@ -80,7 +80,7 @@ GEMINI_API_KEY="xxx"
Execute the following command to run evaluation on the FinSearchComp dataset:
```bash title="Run FinSearchComp Evaluation"
-uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark=finsearchcomp output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
+uv run main.py common-benchmark --config_file_name=agent_finsearchcomp_claude37sonnet benchmark=finsearchcomp output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
```
!!! tip "Progress Monitoring and Resume"
@@ -93,7 +93,7 @@ uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark
If you need to resume an interrupted evaluation, specify the same output directory to continue from where you left off.
```bash title="Resume Evaluation, e.g."
- uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark=finsearchcomp output_dir=${PATH_TO_LOG}
+ uv run main.py common-benchmark --config_file_name=agent_finsearchcomp_claude37sonnet benchmark=finsearchcomp output_dir=${PATH_TO_LOG}
```
### Step 4: Extract Results
@@ -129,7 +129,7 @@ uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark
After running evaluations, you'll find the following structure:
```
-logs/finsearchcomp/agent_finsearchcomp_YYYYMMDD_HHMM/
+logs/finsearchcomp/agent_finsearchcomp_claude37sonnet_YYYYMMDD_HHMM/
βββ benchmark_results.jsonl # Task results summary
βββ benchmark_results_pass_at_1_accuracy.txt # Accuracy statistics
βββ task_(T1)Time_Sensitive_Data_Fetching_*.json # T1 task traces
@@ -154,12 +154,12 @@ The progress checker provides detailed statistics:
### Single Run Evaluation
```bash title="Basic Evaluation"
-uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark=finsearchcomp output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
+uv run main.py common-benchmark --config_file_name=agent_finsearchcomp_claude37sonnet benchmark=finsearchcomp output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
```
### Limited Task Testing
```bash title="Test with Limited Tasks"
-uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark=finsearchcomp benchmark.execution.max_tasks=5 output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
+uv run main.py common-benchmark --config_file_name=agent_finsearchcomp_claude37sonnet benchmark=finsearchcomp benchmark.execution.max_tasks=5 output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
```
### Custom Agent Configuration
diff --git a/docs/mkdocs/docs/futurex.md b/docs/mkdocs/docs/futurex.md
index eec7860..97a3da0 100644
--- a/docs/mkdocs/docs/futurex.md
+++ b/docs/mkdocs/docs/futurex.md
@@ -2,6 +2,9 @@
MiroFlow's evaluation on the Futurex-Online benchmark demonstrates capabilities in future event prediction tasks.
+More details: [FutureX: An Advanced Live Benchmark for LLM Agents in Future Prediction](https://arxiv.org/abs/2508.11987)
+
+
---
## Dataset Overview
diff --git a/docs/mkdocs/docs/index.md b/docs/mkdocs/docs/index.md
index 42ea809..bdbdb4e 100644
--- a/docs/mkdocs/docs/index.md
+++ b/docs/mkdocs/docs/index.md
@@ -9,7 +9,29 @@
## π What is MiroFlow?
-**MiroFlow** is a comprehensive agentic foundation platform for building intelligent AI agents that achieve state-of-the-art performance on complex tasks. It provides enhanced conversation management, flexible tool integration, and extensive benchmark evaluations across multiple datasets.
+**MiroFlow** is an agentic AI platform for building intelligent agents with flexible tool integration and comprehensive benchmark evaluations.
+
+
+## π Recent Updates
+
+!!! success "Latest Changes & Improvements"
+
+ **Oct 2025** -
+
+ - Add support for Index
+ - Add support for BrowseComp-EN evaluation
+ - Add support for MiroAPI https://github.com/MiroMindAI/MiroFlow/pull/76
+
+
+ - π Added support for FinSearchComp evaluation benchmark [#51](https://github.com/MiroMindAI/MiroFlow/pull/51)
+ - π Added support for XBench-DS (Deep Search) evaluation [#47](https://github.com/MiroMindAI/MiroFlow/pull/47)
+ - π§ Updated o3 hints and summary to more models [#58](https://github.com/MiroMindAI/MiroFlow/pull/58)
+ - β¨ Added support for GPT-5 integration [#52](https://github.com/MiroMindAI/MiroFlow/pull/52)
+ - π§ Improved tool logs and per-task log storage [#69](https://github.com/MiroMindAI/MiroFlow/pull/69)
+ - π€ Added support for single agent mode [#67](https://github.com/MiroMindAI/MiroFlow/pull/67)
+ - π Added comprehensive collection of agentic AI research papers [#65](https://github.com/MiroMindAI/MiroFlow/pull/65)
+
+
@@ -53,21 +75,6 @@ Explore the complete MiroMind AI ecosystem:
| **MiroTrain** | Complete training recipes and tools | [GitHub](https://github.com/MiroMindAI/MiroTrain) :material-arrow-right: |
-## π Recent Updates
-
-!!! success "Latest Changes & Improvements"
-
- **Oct 2025** -
-
- - π Added support for FinSearchComp evaluation benchmark [#51](https://github.com/MiroMindAI/MiroFlow/pull/51)
- - π Added support for XBench-DS (Deep Search) evaluation [#47](https://github.com/MiroMindAI/MiroFlow/pull/47)
- - π§ Updated o3 hints and summary to more models [#58](https://github.com/MiroMindAI/MiroFlow/pull/58)
- - β¨ Added support for GPT-5 integration [#52](https://github.com/MiroMindAI/MiroFlow/pull/52)
- - π§ Improved tool logs and per-task log storage [#69](https://github.com/MiroMindAI/MiroFlow/pull/69)
- - π€ Added support for single agent mode [#67](https://github.com/MiroMindAI/MiroFlow/pull/67)
- - π Added comprehensive collection of agentic AI research papers [#65](https://github.com/MiroMindAI/MiroFlow/pull/65)
-
-
diff --git a/docs/mkdocs/docs/mirothinker.md b/docs/mkdocs/docs/mirothinker.md
index ea7b242..93213d8 100644
--- a/docs/mkdocs/docs/mirothinker.md
+++ b/docs/mkdocs/docs/mirothinker.md
@@ -56,19 +56,17 @@ OAI_MIROTHINKER_BASE_URL="http://localhost:61005/v1"
Test your setup with the following command:
```bash title="Test Command"
-uv run main.py trace --config_file_name=agent_mirothinker \
- --task="What is the first country listed in the XLSX file that have names starting with Co?" \
- --task_file_name="data/FSI-2023-DOWNLOAD.xlsx"
+uv run main.py common-benchmark --config_file_name=agent_llm_mirothinker output_dir="logs/test"
```
This command will:
-- Use the `agent_mirothinker` configuration with the dedicated MiroThinkerSGLangClient
-- Process the specified Excel file
-- Query the model to find countries starting with "Co"
+- Use the `agent_llm_mirothinker` configuration with the dedicated MiroThinkerSGLangClient
+- Run the example dataset benchmark (configured in the YAML file)
+- Test the model's question-answering capabilities
### Configuration Details
-The `./config/agent_mirothinker.yaml` configuration file uses:
+The `./config/agent_llm_mirothinker.yaml` configuration file uses:
- `provider_class: "MiroThinkerSGLangClient"` - A dedicated client for MiroThinker models deployed with SGLang
- Model path and generation parameters (temperature, top_p, max_tokens, etc.)
diff --git a/docs/mkdocs/docs/openai-gpt.md b/docs/mkdocs/docs/openai-gpt.md
deleted file mode 100644
index e111449..0000000
--- a/docs/mkdocs/docs/openai-gpt.md
+++ /dev/null
@@ -1,78 +0,0 @@
-# OpenAI GPT Models
-
-OpenAI's latest models including GPT-5, GPT-4o and advanced reasoning models with strong coding, vision, and reasoning capabilities.
-
-## Client Used for GPT-5
-
-`GPT5OpenAIClient`
-
-### Environment Setup
-
-```bash title="Environment Variables"
-export OPENAI_API_KEY="your-openai-key"
-export OPENAI_BASE_URL="https://api.openai.com/v1" # optional
-```
-
-### Configuration
-
-```yaml title="Agent Configuration"
-main_agent:
- llm:
- provider_class: "GPT5OpenAIClient"
- model_name: "gpt-5"
- async_client: true
- temperature: 1.0
- top_p: 1.0
- min_p: 0.0
- top_k: -1
- max_tokens: 128000
- reasoning_effort: "high" # Use high in the main agent, and use the default medium in the sub-agent.
- openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
- openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
-```
-
-### Usage
-
-```bash title="Example Command"
-# Create custom OpenAI config
-uv run main.py trace --config_file_name=your_config_file \
- --task="Your task" --task_file_name="data/file.txt"
-```
-
-## Client Used for GPT-4o
-
-`GPTOpenAIClient`
-
-### Environment Setup
-
-```bash title="Environment Variables"
-export OPENAI_API_KEY="your-openai-key"
-export OPENAI_BASE_URL="https://api.openai.com/v1" # optional
-```
-
-### Configuration
-
-```yaml title="Agent Configuration"
-main_agent:
- llm:
- provider_class: "GPTOpenAIClient"
- model_name: "gpt-4o" # or gpt-4o-mini, etc.
- openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
- openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
-```
-
-### Usage
-
-```bash title="Example Command"
-# Create custom OpenAI config
-uv run main.py trace --config_file_name=your_config_file \
- --task="Your task" --task_file_name="data/file.txt"
-```
-
-!!! note "Configuration Notes"
- - `GPTOpenAIClient` also supports GPT-5, but it has not been fully validated on MiroFlow yet. We recommend using `GPT5OpenAIClient`.
-
----
-
-!!! info "Documentation Info"
- **Last Updated:** October 2025 Β· **Doc Contributor:** Team @ MiroMind AI
\ No newline at end of file
diff --git a/docs/mkdocs/docs/openai-gpt4o.md b/docs/mkdocs/docs/openai-gpt4o.md
new file mode 100644
index 0000000..4bc59bb
--- /dev/null
+++ b/docs/mkdocs/docs/openai-gpt4o.md
@@ -0,0 +1,54 @@
+# OpenAI GPT-4o
+
+OpenAI's GPT-4o model with multimodal capabilities, strong reasoning, and efficient performance.
+
+## Client Configuration
+
+**Client Class**: `GPTOpenAIClient`
+
+### Environment Setup
+
+```bash title="Environment Variables"
+export OPENAI_API_KEY="your-openai-key"
+export OPENAI_BASE_URL="https://api.openai.com/v1" # optional
+```
+
+### Agent Configuration
+
+```yaml title="Agent Configuration"
+main_agent:
+ llm:
+ provider_class: "GPTOpenAIClient"
+ model_name: "gpt-4o" # or gpt-4o-mini
+ async_client: true
+ temperature: 0.7
+ top_p: 1.0
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 16000
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+ openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
+```
+
+### Usage
+
+```bash title="Example Command"
+# Run with GPT-4o on example dataset
+uv run main.py common-benchmark --config_file_name=agent_llm_gpt4o output_dir="logs/test"
+```
+
+The `agent_llm_gpt4o.yaml` configuration file provides a ready-to-use setup with the example dataset benchmark.
+
+!!! note "Available Models"
+ The `GPTOpenAIClient` supports multiple GPT-4o variants:
+ - `gpt-4o` - Full GPT-4o model
+ - `gpt-4o-mini` - Smaller, faster variant
+
+!!! warning "GPT-5 Support"
+ `GPTOpenAIClient` also supports GPT-5, but it has not been fully validated on MiroFlow yet. We recommend using `GPT5OpenAIClient` for GPT-5.
+
+---
+
+!!! info "Documentation Info"
+ **Last Updated:** October 2025 Β· **Doc Contributor:** Team @ MiroMind AI
+
diff --git a/docs/mkdocs/docs/openai-gpt5.md b/docs/mkdocs/docs/openai-gpt5.md
new file mode 100644
index 0000000..4409a7e
--- /dev/null
+++ b/docs/mkdocs/docs/openai-gpt5.md
@@ -0,0 +1,53 @@
+# OpenAI GPT-5
+
+OpenAI's GPT-5 model with advanced reasoning capabilities and strong coding, vision, and problem-solving abilities.
+
+## Client Configuration
+
+**Client Class**: `GPT5OpenAIClient`
+
+### Environment Setup
+
+```bash title="Environment Variables"
+export OPENAI_API_KEY="your-openai-key"
+export OPENAI_BASE_URL="https://api.openai.com/v1" # optional
+```
+
+### Agent Configuration
+
+```yaml title="Agent Configuration"
+main_agent:
+ llm:
+ provider_class: "GPT5OpenAIClient"
+ model_name: "gpt-5"
+ async_client: true
+ temperature: 1.0
+ top_p: 1.0
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 16000
+ reasoning_effort: "high" # Use high in the main agent, and use the default medium in the sub-agent.
+ openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+ openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
+```
+
+### Usage
+
+```bash title="Example Command"
+# Run with GPT-5 on example dataset
+uv run main.py common-benchmark --config_file_name=agent_llm_gpt5 output_dir="logs/test"
+```
+
+The `agent_llm_gpt5.yaml` configuration file provides a ready-to-use setup with the example dataset benchmark.
+
+!!! tip "Reasoning Effort"
+ GPT-5 supports the `reasoning_effort` parameter. The configuration uses `"high"` for better reasoning performance.
+
+!!! tip "Sampling Parameters"
+ While `min_p` and `top_k` are required in the configuration, OpenAI's API does not use them. Set them to `min_p: 0.0` and `top_k: -1` (disabled).
+
+---
+
+!!! info "Documentation Info"
+ **Last Updated:** October 2025 Β· **Doc Contributor:** Team @ MiroMind AI
+
diff --git a/docs/mkdocs/docs/openrouter-claude-3.7-sonnet.md b/docs/mkdocs/docs/openrouter-claude-3.7-sonnet.md
index d932f0b..83c3ddf 100644
--- a/docs/mkdocs/docs/openrouter-claude-3.7-sonnet.md
+++ b/docs/mkdocs/docs/openrouter-claude-3.7-sonnet.md
@@ -20,27 +20,30 @@ main_agent:
llm:
provider_class: "ClaudeOpenRouterClient"
model_name: "anthropic/claude-3.7-sonnet" # or openai/gpt-4, etc.
+ async_client: true
+ temperature: 0.3
+ top_p: 0.95
+ min_p: 0.0
+ top_k: -1
+ max_tokens: 32000
openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
openrouter_provider: "anthropic" # Force provider, or "" for auto
+ disable_cache_control: false
+ keep_tool_result: -1
+ oai_tool_thinking: false
```
-## Other Supported Models
-
-- `openai/gpt-4`
-- `openai/gpt-3.5-turbo`
-- `anthropic/claude-3-opus`
-- `google/gemini-pro`
-- Many others via unified OpenAI format
## Usage
```bash title="Example Command"
-# Use existing OpenRouter config
-uv run main.py trace --config_file_name=your_config_file \
- --task="Your task" --task_file_name="data/file.txt"
+# Run with Claude 3.7 Sonnet on example dataset
+uv run main.py common-benchmark --config_file_name=agent_llm_claude37sonnet output_dir="logs/test"
```
+The `agent_llm_claude37sonnet.yaml` configuration file provides a ready-to-use setup with the example dataset benchmark.
+
## Benefits vs Direct API
- Unified chat format
diff --git a/docs/mkdocs/docs/xbench_ds.md b/docs/mkdocs/docs/xbench_ds.md
index 2ab9dc0..c1f9bf7 100644
--- a/docs/mkdocs/docs/xbench_ds.md
+++ b/docs/mkdocs/docs/xbench_ds.md
@@ -52,7 +52,7 @@ OPENAI_BASE_URL="https://api.openai.com/v1"
```bash
uv run main.py common-benchmark \
- --config_file_name=agent_xbench-ds \
+ --config_file_name=agent_xbench-ds_claude37sonnet \
output_dir="logs/xbench-ds/$(date +"%Y%m%d_%H%M")"
```
@@ -72,7 +72,7 @@ Replace `$PATH_TO_LOG` with your actual output directory path.
```bash title="Resume Interrupted Evaluation"
uv run main.py common-benchmark \
- --config_file_name=agent_xbench-ds \
+ --config_file_name=agent_xbench-ds_claude37sonnet \
output_dir="logs/xbench-ds/20250922_1430"
```
diff --git a/docs/mkdocs/mkdocs.yml b/docs/mkdocs/mkdocs.yml
index 60e0ff3..f45f9d7 100644
--- a/docs/mkdocs/mkdocs.yml
+++ b/docs/mkdocs/mkdocs.yml
@@ -37,62 +37,86 @@ repo_url: https://github.com/MiroMindAI/MiroFlow
nav:
- - Introduction:
+ - π Introduction:
- News & Updates: index.md
- License: license.md
- - Quick Start:
+ - π Quick Start:
- Quickstart: quickstart.md
- Core Concepts: core_concepts.md
- YAML Configuration: yaml_config.md
- - Evaluation:
+ - "π Evaluation":
- Overview: evaluation_overview.md
- - Benchmarks:
- - GAIA-Validation:
- - Prerequisites: gaia_validation_prerequisites.md
- - Claude-3.7-Sonnet: gaia_validation_claude37sonnet.md
- - GPT-5: gaia_validation_gpt5.md
- - MiroThinker: gaia_validation_mirothinker.md
- - GAIA-Validation-Text-Only: gaia_validation_text_only.md
- - GAIA-Test: gaia_test.md
- - FutureX: futurex.md
- - xBench-DeepSearch: xbench_ds.md
- - FinSearchComp: finsearchcomp.md
- - Download Datasets: download_datasets.md
- - Add New Benchmarks: contribute_benchmarks.md
-
- - Tools:
+ - How to add new benchmarks: contribute_benchmarks.md
+ - "": ""
+ - "": ""
+ - "": ""
+ - "": ""
+ - GAIA-Val:
+ - Prepare Dataset: gaia_validation_prerequisites.md
+ - Claude-3.7-Sonnet: gaia_validation_claude37sonnet.md
+ - GPT-5: gaia_validation_gpt5.md
+ - MiroThinker: gaia_validation_mirothinker.md
+ - GAIA-Val-Text: gaia_validation_text_only.md
+ - GAIA-Test: gaia_test.md
+ - BrowseComp-EN: browsecomp_en.md
+ - FutureX: futurex.md
+ - xBench-DeepSearch: xbench_ds.md
+ - FinSearchComp: finsearchcomp.md
+
+ # - Benchmarks:
+ # - GAIA-Validation-Text-Only: gaia_validation_text_only.md
+ # - GAIA-Test: gaia_test.md
+ # - BrowseComp-EN: browsecomp_en.md
+ # - FutureX: futurex.md
+ # - xBench-DeepSearch: xbench_ds.md
+ # - FinSearchComp: finsearchcomp.md
+ # - Download Datasets: download_datasets.md
+
+
+
+ - π§ Tools:
- Overview: tool_overview.md
- - Tools:
- - tool-reasoning: tool_reasoning.md
- - tool-reasoning-os: tool_reasoning_os.md
- - tool-image-video: tool_vqa.md
- - tool-image-video-os: tool_vqa_os.md
- - tool-audio-os: tool_audio_os.md
- - tool-searching: tool_searching.md
- - tool-python: tool_python.md
+ - How to add new tools: contribute_tools.md
+ - "": ""
+ - "": ""
+ - "": ""
+ - "": ""
+ - tool-reasoning: tool_reasoning.md
+ - tool-reasoning-os: tool_reasoning_os.md
+ - tool-image-video: tool_vqa.md
+ - tool-image-video-os: tool_vqa_os.md
+ - tool-audio-os: tool_audio_os.md
+ - tool-searching: tool_searching.md
+ - tool-python: tool_python.md
+ - "": ""
+ - "": ""
+ - "": ""
+ - "": ""
- Advanced Features:
- E2B Advanced Features: e2b_advanced_features.md
- MiroAPI: miro_api.md
- - Add New Tools: contribute_tools.md
- - LLM Clients:
+ - π€ LLM Clients:
- Overview: llm_clients_overview.md
- - Models:
- - MiroThinker: mirothinker.md
- - Claude-3.7-Sonnet:
- - Official SDK: claude-3.7-sonnet.md
- - OpenRouter: openrouter-claude-3.7-sonnet.md
- - OpenAI-GPT: openai-gpt.md
- - Add New LLM Clients: contribute_llm_clients.md
-
- - Resources:
- - π All About Agents: all_about_agents.md
- - π Open Source Data: data.md
- - π± Applications: applications.md
- - π FAQs: faqs.md
- - π Contributors: contributors.md
+ - How to add new LLM clients: contribute_llm_clients.md
+ - "": ""
+ - "": ""
+ - "": ""
+ - "": ""
+ - MiroThinker: mirothinker.md
+ - Claude 3.7 Sonnet (Official SDK): claude-3.7-sonnet.md
+ - Claude 3.7 Sonnet (OpenRouter): openrouter-claude-3.7-sonnet.md
+ - GPT-5: openai-gpt5.md
+ - GPT-4o: openai-gpt4o.md
+
+ - π Resources:
+ - All About Agents: all_about_agents.md
+ - Open Source Data: data.md
+ - Applications: applications.md
+ - FAQs: faqs.md
+ - Contributors: contributors.md
extra:
diff --git a/scripts/run_evaluate_multiple_runs_finsearchcomp.sh b/scripts/run_evaluate_multiple_runs_finsearchcomp.sh
index e7c90fe..b6d65aa 100755
--- a/scripts/run_evaluate_multiple_runs_finsearchcomp.sh
+++ b/scripts/run_evaluate_multiple_runs_finsearchcomp.sh
@@ -5,14 +5,14 @@
# SPDX-License-Identifier: Apache-2.0
# Multiple runs FinSearchComp evaluation script
-# Based on the working command: uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark=finsearchcomp output_dir=logs/finsearchcomp/$(date +"%Y%m%d_%H%M")
+# Based on the working command: uv run main.py common-benchmark --config_file_name=agent_finsearchcomp_claude37sonnet benchmark=finsearchcomp output_dir=logs/finsearchcomp/$(date +"%Y%m%d_%H%M")
# Configuration parameters
NUM_RUNS=${NUM_RUNS:-3}
MAX_TASKS=${MAX_TASKS:-1}
MAX_CONCURRENT=${MAX_CONCURRENT:-5}
BENCHMARK_NAME="finsearchcomp"
-AGENT_SET=${AGENT_SET:-"agent_finsearchcomp"}
+AGENT_SET=${AGENT_SET:-"agent_finsearchcomp_claude37sonnet"}
# Set results directory with timestamp
TIMESTAMP=$(date +%Y%m%d_%H%M)
diff --git a/scripts/run_evaluate_multiple_runs_mirothinker_gaia-validation-text-only.sh b/scripts/run_evaluate_multiple_runs_mirothinker_gaia-validation-text-only.sh
index c65f0dc..0cc11fb 100644
--- a/scripts/run_evaluate_multiple_runs_mirothinker_gaia-validation-text-only.sh
+++ b/scripts/run_evaluate_multiple_runs_mirothinker_gaia-validation-text-only.sh
@@ -5,9 +5,9 @@
# SPDX-License-Identifier: Apache-2.0
# Configuration parameters
-NUM_RUNS=3
-AGENT_SET="agent_gaia-validation-text-only_mirothinker"
-MAX_CONCURRENT=15
+NUM_RUNS=8
+AGENT_SET="agent_gaia-validation-text-only_mirothinker_single_agent"
+MAX_CONCURRENT=8
# Set results directory with timestamp
TIMESTAMP=$(date +%Y%m%d_%H%M)
diff --git a/scripts/run_evaluate_multiple_runs_xbench-ds.sh b/scripts/run_evaluate_multiple_runs_xbench-ds.sh
index a0026b1..bd166f7 100644
--- a/scripts/run_evaluate_multiple_runs_xbench-ds.sh
+++ b/scripts/run_evaluate_multiple_runs_xbench-ds.sh
@@ -6,7 +6,7 @@
# Configuration parameters
NUM_RUNS=3
-AGENT_SET="agent_xbench-ds"
+AGENT_SET="agent_xbench-ds_claude37sonnet"
BENCHMARK_NAME="xbench-ds"
MAX_CONCURRENT=5
export CHINESE_CONTEXT="true"
diff --git a/src/llm/providers/claude_anthropic_client.py b/src/llm/providers/claude_anthropic_client.py
index d701d49..3e92537 100644
--- a/src/llm/providers/claude_anthropic_client.py
+++ b/src/llm/providers/claude_anthropic_client.py
@@ -29,17 +29,19 @@ def __post_init__(self):
def _create_client(self, config: DictConfig):
"""Create Anthropic client"""
- api_key = config.env.anthropic_api_key
+ api_key = self.cfg.llm.anthropic_api_key
if self.async_client:
return AsyncAnthropic(
api_key=api_key,
base_url=self.cfg.llm.anthropic_base_url,
+ timeout=600.0, # 10 minutes timeout for long requests
)
else:
return Anthropic(
api_key=api_key,
base_url=self.cfg.llm.anthropic_base_url,
+ timeout=600.0, # 10 minutes timeout for long requests
)
@retry(wait=wait_fixed(10), stop=stop_after_attempt(5))
diff --git a/utils/progress_check/check_finsearchcomp_progress.py b/utils/progress_check/check_finsearchcomp_progress.py
index 52035fe..1104582 100755
--- a/utils/progress_check/check_finsearchcomp_progress.py
+++ b/utils/progress_check/check_finsearchcomp_progress.py
@@ -348,7 +348,7 @@ def main():
print(f"Error: {e}")
print(f"\nUsage: python {sys.argv[0]} [LOG_FOLDER_PATH]")
print(
- f"Example: python {sys.argv[0]} logs/finsearchcomp/agent_finsearchcomp_20250924_1555"
+ f"Example: python {sys.argv[0]} logs/finsearchcomp/agent_finsearchcomp_claude37sonnet_20250924_1555"
)
return 1