diff --git a/README.md b/README.md
index 057d8ed..8035078 100644
--- a/README.md
+++ b/README.md
@@ -7,12 +7,11 @@
 
 <div align="center">
 
-[![DOCS](https://img.shields.io/badge/Documentation-4285F4?style=for-the-badge&logo=gitbook&logoColor=white)](https://miromindai.github.io/MiroFlow/)
 [![DEMO](https://img.shields.io/badge/Demo-FFB300?style=for-the-badge&logo=airplayvideo&logoColor=white)](https://dr.miromind.ai/)
 [![MODELS](https://img.shields.io/badge/Models-5EDDD2?style=for-the-badge&logo=huggingface&logoColor=ffffff&labelColor)](https://huggingface.co/collections/miromind-ai/mirothinker-v02-68af084a18035f57b17cd902)
 [![DATA](https://img.shields.io/badge/Data-0040A1?style=for-the-badge&logo=huggingface&logoColor=ffffff&labelColor)](https://huggingface.co/datasets/miromind-ai/MiroVerse-v0.1)
-
 [![BLOG](https://img.shields.io/badge/Blog-4285F4?style=for-the-badge&logo=google-chrome&logoColor=white)](https://miromind.ai/blog/miroflow)
+
 [![GITHUB](https://img.shields.io/badge/Github-24292F?style=for-the-badge&logo=github&logoColor=white)](https://github.com/MiroMindAI)
 [![DISCORD](https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/GPqEnkzQZd)
 [![WeChat](https://img.shields.io/badge/WeChat-07C160?style=for-the-badge&logo=wechat&logoColor=white)](https://huggingface.co/datasets/miromind-ai/MiroFlow-Benchmarks/resolve/main/assets/wechat.png)
@@ -22,7 +21,9 @@
 
 <div align="center">
 
-### 🚀 [Try our Demo!](https://dr.miromind.ai/)｜[中文](README_zh.md)｜[日本語](README_ja.md)
+## 📚 **[READ THE DOCUMENTATION](https://miromindai.github.io/MiroFlow/)**
+
+### 🚀 [Try Demo](https://dr.miromind.ai/) ｜ [中文](README_zh.md) ｜ [日本語](README_ja.md)
 
 </div>
 
diff --git a/config/agent_browsecomp-en_claude37sonnet.yaml b/config/agent_browsecomp-en_claude37sonnet.yaml
new file mode 100644
index 0000000..baa3e15
--- /dev/null
+++ b/config/agent_browsecomp-en_claude37sonnet.yaml
@@ -0,0 +1,78 @@
+defaults:
+  - benchmark: browsecomp-en
+  - override hydra/job_logging: none
+  - _self_  # Allow defining variables at the top of this file
+
+
+main_agent:
+  prompt_class: MainAgentPrompt_GAIA
+  llm: 
+    provider_class: "ClaudeOpenRouterClient"
+    model_name: "anthropic/claude-3.7-sonnet"
+    async_client: true
+    temperature: 0.3
+    top_p: 0.95
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 32000
+    openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
+    openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
+    openrouter_provider: "anthropic"
+    disable_cache_control: false
+    keep_tool_result: -1
+    oai_tool_thinking: false
+  
+  tool_config:
+    - tool-reasoning
+
+  max_turns: 50  # Maximum number of turns for main agent execution
+  max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+  
+  input_process:
+    hint_generation: true
+    hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+  output_process:
+    final_answer_extraction: true
+    final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+  openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+  add_message_id: true
+  keep_tool_result: -1
+  chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents:
+  agent-worker:
+    prompt_class: SubAgentWorkerPrompt
+    llm: 
+      provider_class: "ClaudeOpenRouterClient"
+      model_name: "anthropic/claude-3.7-sonnet"
+      async_client: true
+      temperature: 0.3
+      top_p: 0.95
+      min_p: 0.0
+      top_k: -1
+      max_tokens: 32000
+      openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
+      openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
+      openrouter_provider: "anthropic"
+      disable_cache_control: false
+      keep_tool_result: -1
+      oai_tool_thinking: false
+    
+    tool_config:
+      - tool-searching
+      - tool-image-video
+      - tool-reading
+      - tool-code
+      - tool-audio
+
+    max_turns: 50  # Maximum number of turns for main agent execution
+    max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
+
diff --git a/config/agent_mirothinker.yaml b/config/agent_browsecomp-en_mirothinker.yaml
similarity index 75%
rename from config/agent_mirothinker.yaml
rename to config/agent_browsecomp-en_mirothinker.yaml
index 709eeed..63bca34 100644
--- a/config/agent_mirothinker.yaml
+++ b/config/agent_browsecomp-en_mirothinker.yaml
@@ -1,35 +1,37 @@
 defaults:
-  - benchmark: gaia-validation
+  - benchmark: browsecomp-en
   - override hydra/job_logging: none
   - _self_  # Allow defining variables at the top of this file
 
 
 main_agent:
-  prompt_class: MainAgentPromptBoxedAnswer
+  prompt_class: MainAgentPrompt_GAIA
   llm: 
     provider_class: "MiroThinkerSGLangClient"
-    model_name: "MODEL_NAME"
+    model_name: "DUMMY_MODEL_NAME"
     async_client: true
-    temperature: 0.6
+    temperature: 0.3
     top_p: 0.95
     min_p: 0.0
     top_k: -1
-    max_tokens: 8192
+    max_tokens: 4096
     oai_mirothinker_api_key: "${oc.env:OAI_MIROTHINKER_API_KEY,dummy_key}"
     oai_mirothinker_base_url: "${oc.env:OAI_MIROTHINKER_BASE_URL,http://localhost:61005/v1}"
     keep_tool_result: -1
     oai_tool_thinking: false
   
-  tool_config: []
+  tool_config:
+    - tool-reasoning
 
-  max_turns: -1  # Maximum number of turns for main agent execution
+  max_turns: 50  # Maximum number of turns for main agent execution
   max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
   
   input_process:
     hint_generation: false
     hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+
   output_process:
-    final_answer_extraction: false
+    final_answer_extraction: true
     final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
 
   openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
@@ -43,25 +45,31 @@ sub_agents:
     prompt_class: SubAgentWorkerPrompt
     llm: 
       provider_class: "MiroThinkerSGLangClient"
-      model_name: "MODEL_NAME"
+      model_name: "DUMMY_MODEL_NAME"
       async_client: true
-      temperature: 0.6
-      top_p: 0.95
+      temperature: 0.3
+      top_p: 1.0
       min_p: 0.0
       top_k: -1
-      max_tokens: 8192
+      max_tokens: 4096
       oai_mirothinker_api_key: "${oc.env:OAI_MIROTHINKER_API_KEY,dummy_key}"
       oai_mirothinker_base_url: "${oc.env:OAI_MIROTHINKER_BASE_URL,http://localhost:61005/v1}"
       keep_tool_result: -1
       oai_tool_thinking: false
     
     tool_config:
+      - tool-searching
+      - tool-image-video
       - tool-reading
+      - tool-code
+      - tool-audio
 
-    max_turns: -1  # Maximum number of turns for main agent execution
+    max_turns: 50  # Maximum number of turns for main agent execution
     max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
 
 
 # Can define some top-level or default parameters here
 output_dir: logs/
-data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
\ No newline at end of file
+data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
+
diff --git a/config/agent_finsearchcomp_claude37sonnet.yaml b/config/agent_finsearchcomp_claude37sonnet.yaml
new file mode 100644
index 0000000..70cc22b
--- /dev/null
+++ b/config/agent_finsearchcomp_claude37sonnet.yaml
@@ -0,0 +1,77 @@
+defaults:
+  - benchmark: finsearchcomp
+  - override hydra/job_logging: none
+  - _self_  # Allow defining variables at the top of this file
+
+
+main_agent:
+  prompt_class: MainAgentPrompt_GAIA
+  llm: 
+    provider_class: "ClaudeOpenRouterClient"
+    model_name: "anthropic/claude-3.7-sonnet"
+    async_client: true
+    temperature: 0.6
+    top_p: 0.95
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 32000
+    openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
+    openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
+    openrouter_provider: "anthropic"
+    disable_cache_control: false
+    keep_tool_result: -1
+    oai_tool_thinking: false
+  
+  tool_config:
+    - tool-reasoning
+
+  max_turns: 20  # Maximum number of turns for main agent execution
+  max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+  
+  input_process:
+    hint_generation: true
+    hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+  output_process:
+    final_answer_extraction: true
+    final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+  openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+  add_message_id: true
+  keep_tool_result: -1
+  chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents:
+  agent-worker:
+    prompt_class: SubAgentWorkerPrompt
+    llm: 
+      provider_class: "ClaudeOpenRouterClient"
+      model_name: "anthropic/claude-3.7-sonnet"
+      async_client: true
+      temperature: 0.6
+      top_p: 0.95
+      min_p: 0.0
+      top_k: -1
+      max_tokens: 32000
+      openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
+      openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
+      openrouter_provider: "anthropic"
+      disable_cache_control: false
+      keep_tool_result: -1
+      oai_tool_thinking: false
+    
+    tool_config:
+      - tool-searching
+      - tool-image-video
+      - tool-reading
+      - tool-code
+      - tool-audio
+
+    max_turns: 20  # Maximum number of turns for main agent execution
+    max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
+
diff --git a/config/agent_finsearchcomp.yaml b/config/agent_finsearchcomp_mirothinker.yaml
similarity index 96%
rename from config/agent_finsearchcomp.yaml
rename to config/agent_finsearchcomp_mirothinker.yaml
index 16225e6..df0b56c 100644
--- a/config/agent_finsearchcomp.yaml
+++ b/config/agent_finsearchcomp_mirothinker.yaml
@@ -8,7 +8,7 @@ main_agent:
   prompt_class: MainAgentPrompt_GAIA
   llm: 
     provider_class: "MiroThinkerSGLangClient"
-    model_name: "MODEL_NAME"
+    model_name: "DUMMY_MODEL_NAME"
     async_client: true
     temperature: 0.6
     top_p: 0.95
@@ -44,7 +44,7 @@ sub_agents:
     prompt_class: SubAgentWorkerPrompt
     llm: 
       provider_class: "MiroThinkerSGLangClient"
-      model_name: "MODEL_NAME"
+      model_name: "DUMMY_MODEL_NAME"
       async_client: true
       temperature: 0.6
       top_p: 0.95
@@ -69,3 +69,5 @@ sub_agents:
 # Can define some top-level or default parameters here
 output_dir: logs/
 data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
+
diff --git a/config/agent_gaia-validation-text-only_mirothinker.yaml b/config/agent_gaia-validation-text-only_mirothinker.yaml
index 4faec6a..a813115 100644
--- a/config/agent_gaia-validation-text-only_mirothinker.yaml
+++ b/config/agent_gaia-validation-text-only_mirothinker.yaml
@@ -8,7 +8,7 @@ main_agent:
   prompt_class: MainAgentPrompt_GAIA
   llm: 
     provider_class: "MiroThinkerSGLangClient"
-    model_name: "MODEL_NAME"
+    model_name: "DUMMY_MODEL_NAME"
     async_client: true
     temperature: 0.3
     top_p: 0.95
@@ -45,7 +45,7 @@ sub_agents:
     prompt_class: SubAgentWorkerPrompt
     llm: 
       provider_class: "MiroThinkerSGLangClient"
-      model_name: "anthropic/claude-3.7-sonnet"
+      model_name: "DUMMY_MODEL_NAME"
       async_client: true
       temperature: 0.3
       top_p: 1.0
diff --git a/config/agent_gaia-validation-text-only_mirothinker_single_agent.yaml b/config/agent_gaia-validation-text-only_mirothinker_single_agent.yaml
new file mode 100644
index 0000000..b9e92e6
--- /dev/null
+++ b/config/agent_gaia-validation-text-only_mirothinker_single_agent.yaml
@@ -0,0 +1,53 @@
+defaults:
+  - benchmark: gaia-validation-text-only
+  - override hydra/job_logging: none
+  - _self_  # Allow defining variables at the top of this file
+
+
+main_agent:
+  prompt_class: MainAgentPrompt_GAIA
+  llm: 
+    provider_class: "MiroThinkerSGLangClient"
+    model_name: "MODEL_NAME"
+    async_client: true
+    temperature: 0.3
+    top_p: 0.95
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 4096
+    oai_mirothinker_api_key: "${oc.env:OAI_MIROTHINKER_API_KEY,dummy_key}"
+    oai_mirothinker_base_url: "${oc.env:OAI_MIROTHINKER_BASE_URL,http://localhost:61005/v1}"
+    keep_tool_result: -1
+    oai_tool_thinking: false
+  
+  tool_config:
+    - tool-reasoning
+    - tool-searching
+    - tool-image-video
+    - tool-reading
+    - tool-code
+    - tool-audio
+
+  max_turns: 50  # Maximum number of turns for main agent execution
+  max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+  
+  input_process:
+    hint_generation: false
+    hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+
+  output_process:
+    final_answer_extraction: true
+    final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+  openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+  add_message_id: true
+  keep_tool_result: -1
+  chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
diff --git a/config/agent_llm_claude37sonnet.yaml b/config/agent_llm_claude37sonnet.yaml
new file mode 100644
index 0000000..59261b7
--- /dev/null
+++ b/config/agent_llm_claude37sonnet.yaml
@@ -0,0 +1,53 @@
+defaults:
+  - benchmark: example_dataset
+  - override hydra/job_logging: none
+  - _self_  # Allow defining variables at the top of this file
+
+
+main_agent:
+  prompt_class: MainAgentPromptBoxedAnswer
+  llm: 
+    provider_class: "ClaudeOpenRouterClient"
+    model_name: "anthropic/claude-3.7-sonnet"
+    async_client: true
+    temperature: 0.3
+    top_p: 0.95
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 32000
+    openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
+    openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
+    openrouter_provider: "anthropic"
+    disable_cache_control: false
+    keep_tool_result: -1
+    oai_tool_thinking: false
+  
+  tool_config: 
+    - tool-reading
+    - tool-searching
+
+  max_turns: 20  # Maximum number of turns for main agent execution
+  max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+  
+  input_process:
+    hint_generation: false
+    hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+  output_process:
+    final_answer_extraction: false
+    final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+  openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+  add_message_id: true
+  keep_tool_result: -1
+  chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
+
+
diff --git a/config/agent_llm_claude37sonnet_anthropic.yaml b/config/agent_llm_claude37sonnet_anthropic.yaml
new file mode 100644
index 0000000..a57851e
--- /dev/null
+++ b/config/agent_llm_claude37sonnet_anthropic.yaml
@@ -0,0 +1,51 @@
+defaults:
+  - benchmark: example_dataset
+  - override hydra/job_logging: none
+  - _self_  # Allow defining variables at the top of this file
+
+
+main_agent:
+  prompt_class: MainAgentPromptBoxedAnswer
+  llm: 
+    provider_class: "ClaudeAnthropicClient"
+    model_name: "claude-3-7-sonnet-20250219"
+    async_client: true
+    temperature: 0.3
+    top_p: 0.95
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 16000
+    anthropic_api_key: "${oc.env:ANTHROPIC_API_KEY,???}"
+    anthropic_base_url: "${oc.env:ANTHROPIC_BASE_URL,https://api.anthropic.com}"
+    disable_cache_control: false
+    keep_tool_result: -1
+    oai_tool_thinking: false
+  
+  tool_config: 
+    - tool-reading
+    - tool-searching
+
+  max_turns: 20  # Maximum number of turns for main agent execution
+  max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+  
+  input_process:
+    hint_generation: false
+    hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+  output_process:
+    final_answer_extraction: false
+    final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+  openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+  add_message_id: true
+  keep_tool_result: -1
+  chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
+
diff --git a/config/agent_llm_gpt4o.yaml b/config/agent_llm_gpt4o.yaml
new file mode 100644
index 0000000..a94656a
--- /dev/null
+++ b/config/agent_llm_gpt4o.yaml
@@ -0,0 +1,50 @@
+defaults:
+  - benchmark: example_dataset
+  - override hydra/job_logging: none
+  - _self_  # Allow defining variables at the top of this file
+
+
+main_agent:
+  prompt_class: MainAgentPromptBoxedAnswer
+  llm: 
+    provider_class: "GPTOpenAIClient"
+    model_name: "gpt-4o"
+    async_client: true
+    temperature: 0.7
+    top_p: 1.0
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 4096
+    openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+    openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
+    keep_tool_result: -1
+    oai_tool_thinking: false
+  
+  tool_config: 
+    - tool-reading
+    - tool-searching
+
+  max_turns: 20  # Maximum number of turns for main agent execution
+  max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+  
+  input_process:
+    hint_generation: false
+    hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+  output_process:
+    final_answer_extraction: false
+    final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+  openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+  add_message_id: true
+  keep_tool_result: -1
+  chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
+
diff --git a/config/agent_llm_gpt5.yaml b/config/agent_llm_gpt5.yaml
new file mode 100644
index 0000000..5223b77
--- /dev/null
+++ b/config/agent_llm_gpt5.yaml
@@ -0,0 +1,51 @@
+defaults:
+  - benchmark: example_dataset
+  - override hydra/job_logging: none
+  - _self_  # Allow defining variables at the top of this file
+
+
+main_agent:
+  prompt_class: MainAgentPromptBoxedAnswer
+  llm: 
+    provider_class: "GPT5OpenAIClient"
+    model_name: "gpt-5"
+    async_client: true
+    temperature: 1.0
+    top_p: 1.0
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 4096
+    reasoning_effort: "high" # Use high in the main agent, and use the default medium in the sub-agent.
+    openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+    openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
+    keep_tool_result: -1
+    oai_tool_thinking: false
+  
+  tool_config: 
+    - tool-reading
+    - tool-searching
+
+  max_turns: 20  # Maximum number of turns for main agent execution
+  max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+  
+  input_process:
+    hint_generation: false
+    hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+  output_process:
+    final_answer_extraction: false
+    final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+  openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+  add_message_id: true
+  keep_tool_result: -1
+  chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
+
diff --git a/config/agent_llm_mirothinker.yaml b/config/agent_llm_mirothinker.yaml
new file mode 100644
index 0000000..afc855b
--- /dev/null
+++ b/config/agent_llm_mirothinker.yaml
@@ -0,0 +1,49 @@
+defaults:
+  - benchmark: example_dataset
+  - override hydra/job_logging: none
+  - _self_  # Allow defining variables at the top of this file
+
+
+main_agent:
+  prompt_class: MainAgentPromptBoxedAnswer
+  llm: 
+    provider_class: "MiroThinkerSGLangClient"
+    model_name: "DUMMY_MODEL_NAME"
+    async_client: true
+    temperature: 0.6
+    top_p: 0.95
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 8192
+    oai_mirothinker_api_key: "${oc.env:OAI_MIROTHINKER_API_KEY,dummy_key}"
+    oai_mirothinker_base_url: "${oc.env:OAI_MIROTHINKER_BASE_URL,http://localhost:61005/v1}"
+    keep_tool_result: -1
+    oai_tool_thinking: false
+  
+  tool_config: 
+    - tool-reading
+    - tool-searching
+
+  max_turns: 20  # Maximum number of turns for main agent execution
+  max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+  
+  input_process:
+    hint_generation: false
+    hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+  output_process:
+    final_answer_extraction: false
+    final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+  openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+  add_message_id: true
+  keep_tool_result: -1
+  chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents: null
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
diff --git a/config/agent_quickstart_reading.yaml b/config/agent_quickstart_reading.yaml
index e016162..ed3b69f 100644
--- a/config/agent_quickstart_reading.yaml
+++ b/config/agent_quickstart_reading.yaml
@@ -1,5 +1,5 @@
 defaults:
-  - benchmark: gaia-validation
+  - benchmark: example_dataset
   - override hydra/job_logging: none
   - _self_  # Allow defining variables at the top of this file
 
diff --git a/config/agent_quickstart_search.yaml b/config/agent_quickstart_search.yaml
index 96df9e5..a027d2b 100644
--- a/config/agent_quickstart_search.yaml
+++ b/config/agent_quickstart_search.yaml
@@ -1,5 +1,5 @@
 defaults:
-  - benchmark: gaia-validation
+  - benchmark: example_dataset
   - override hydra/job_logging: none
   - _self_  # Allow defining variables at the top of this file
 
diff --git a/config/agent_quickstart_single_agent.yaml b/config/agent_quickstart_single_agent.yaml
index 068da69..39d7068 100644
--- a/config/agent_quickstart_single_agent.yaml
+++ b/config/agent_quickstart_single_agent.yaml
@@ -1,5 +1,5 @@
 defaults:
-  - benchmark: gaia-validation
+  - benchmark: example_dataset
   - override hydra/job_logging: none
   - _self_  # Allow defining variables at the top of this file
 
diff --git a/config/agent_xbench-ds.yaml b/config/agent_xbench-ds_claude37sonnet.yaml
similarity index 99%
rename from config/agent_xbench-ds.yaml
rename to config/agent_xbench-ds_claude37sonnet.yaml
index 6b5213f..aeaac08 100644
--- a/config/agent_xbench-ds.yaml
+++ b/config/agent_xbench-ds_claude37sonnet.yaml
@@ -75,3 +75,4 @@ sub_agents:
 output_dir: logs/
 data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
 
+
diff --git a/config/benchmark/browsecomp-en.yaml b/config/benchmark/browsecomp-en.yaml
new file mode 100644
index 0000000..98c32bb
--- /dev/null
+++ b/config/benchmark/browsecomp-en.yaml
@@ -0,0 +1,20 @@
+# config/benchmark/browsecomp-en.yaml
+defaults:
+  - default
+  - _self_
+
+name: "browsecomp-en"
+
+data:
+  data_dir: "${data_dir}/browsecomp-test"  # Path to browsecomp-test (English) dataset
+  metadata_file: "standardized_data.jsonl"  # Metadata filename
+  whitelist: []  # Optional: List of specific task_ids to run
+
+execution:
+  max_tasks: null      # null = no limit, or specify a number
+  max_concurrent: 5    # Number of parallel tasks
+  pass_at_k: 1         # Number of attempts per task
+
+# OpenAI API key for evaluation (required for browsecomp since it has ground truth)
+openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+
diff --git a/config/benchmark/example_dataset.yaml b/config/benchmark/example_dataset.yaml
new file mode 100644
index 0000000..36eee7e
--- /dev/null
+++ b/config/benchmark/example_dataset.yaml
@@ -0,0 +1,21 @@
+# config/benchmark/example_dataset.yaml
+defaults:
+  - default
+  - _self_
+
+name: "example_dataset"
+
+data:
+  data_dir: "${data_dir}/example_dataset"  # Path to example_dataset
+  metadata_file: "standardized_data.jsonl"  # Metadata filename
+  whitelist: []  # Optional: List of specific task_ids to run
+
+execution:
+  max_tasks: null      # null = no limit, or specify a number
+  max_concurrent: 5    # Number of parallel tasks
+  pass_at_k: 1         # Number of attempts per task
+
+# OpenAI API key for evaluation (required for example_dataset since it has ground truth)
+openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+
+
diff --git a/docs/mkdocs/docs/browsecomp_en.md b/docs/mkdocs/docs/browsecomp_en.md
new file mode 100644
index 0000000..cdde3c6
--- /dev/null
+++ b/docs/mkdocs/docs/browsecomp_en.md
@@ -0,0 +1,91 @@
+# BrowseComp-EN (English)
+
+MiroFlow's evaluation on the BrowseComp-EN benchmark demonstrates advanced web browsing and information retrieval capabilities.
+
+More details: [BrowseComp: A Simple Yet Challenging Benchmark for Browsing Agents](https://arxiv.org/abs/2504.12516)
+
+---
+
+## Dataset Overview
+
+!!! abstract "Key Dataset Characteristics"
+
+    - **Total Tasks**: 1,266 tasks in the test split
+    - **Language**: English
+    - **Task Types**: Web browsing, search, and information retrieval
+    - **Evaluation**: Automated comparison with ground truth answers
+
+---
+
+## Quick Start Guide
+
+### Step 1: Prepare the BrowseComp-EN Dataset
+
+```bash title="Download BrowseComp-EN Dataset"
+uv run main.py prepare-benchmark get browsecomp-test
+```
+
+This will create the standardized dataset at `data/browsecomp-test/standardized_data.jsonl`.
+
+!!! warning "Requires HuggingFace Token"
+    Add your HuggingFace token to `.env`: `HF_TOKEN="your_token_here"`
+
+### Step 2: Configure API Keys
+
+```env title=".env Configuration"
+# Search and web scraping
+SERPER_API_KEY="xxx"
+JINA_API_KEY="xxx"
+
+# Code execution
+E2B_API_KEY="xxx"
+
+# LLM (Claude 3.7 Sonnet via OpenRouter)
+OPENROUTER_API_KEY="xxx"
+OPENROUTER_BASE_URL="https://openrouter.ai/api/v1"
+
+# Evaluation and hint generation
+OPENAI_API_KEY="xxx"
+
+# Vision capabilities
+ANTHROPIC_API_KEY="xxx"
+GEMINI_API_KEY="xxx"
+```
+
+### Step 3: Run the Evaluation
+
+```bash title="Run BrowseComp-EN Evaluation"
+uv run main.py common-benchmark --config_file_name=agent_browsecomp-en_claude37sonnet benchmark=browsecomp-en output_dir="logs/browsecomp-en/$(date +"%Y%m%d_%H%M")"
+```
+
+Results are automatically generated in the output directory:
+- `benchmark_results.jsonl` - Detailed results for each task
+- `benchmark_results_pass_at_1_accuracy.txt` - Summary accuracy statistics
+
+---
+
+## Usage Examples
+
+```bash title="Limited Task Testing"
+# Test with 10 tasks only
+uv run main.py common-benchmark --config_file_name=agent_browsecomp-en_claude37sonnet benchmark=browsecomp-en benchmark.execution.max_tasks=10 output_dir="logs/browsecomp-en/$(date +"%Y%m%d_%H%M")"
+```
+
+```bash title="Using MiroThinker Model"
+uv run main.py common-benchmark --config_file_name=agent_browsecomp-en_mirothinker benchmark=browsecomp-en output_dir="logs/browsecomp-en/$(date +"%Y%m%d_%H%M")"
+```
+
+---
+
+## Available Agent Configurations
+
+| Agent Configuration | Model | Use Case |
+|-------------------|-------|----------|
+| `agent_browsecomp-en_claude37sonnet` | Claude 3.7 Sonnet | Recommended for better performance |
+| `agent_browsecomp-en_mirothinker` | MiroThinker | For local deployment |
+
+---
+
+!!! info "Documentation Info"
+    **Last Updated:** October 2025 · **Doc Contributor:** Team @ MiroMind AI
+
diff --git a/docs/mkdocs/docs/claude-3.7-sonnet.md b/docs/mkdocs/docs/claude-3.7-sonnet.md
index 4d71679..d875683 100644
--- a/docs/mkdocs/docs/claude-3.7-sonnet.md
+++ b/docs/mkdocs/docs/claude-3.7-sonnet.md
@@ -20,18 +20,33 @@ main_agent:
   llm: 
     provider_class: "ClaudeAnthropicClient"
     model_name: "claude-3-7-sonnet-20250219"  # Use actual model name from Anthropic API
+    async_client: true
+    temperature: 0.3
+    top_p: 0.95
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 32000
     anthropic_api_key: "${oc.env:ANTHROPIC_API_KEY,???}"
     anthropic_base_url: "${oc.env:ANTHROPIC_BASE_URL,https://api.anthropic.com}"
+    disable_cache_control: false
+    keep_tool_result: -1
+    oai_tool_thinking: false
 ```
 
+!!! tip "Sampling Parameters"
+    - `min_p` and `top_k` are required in the configuration
+    - Anthropic API natively supports `top_k`, but `min_p` is not used by the API
+    - Set `min_p: 0.0` (disabled) and `top_k: -1` (disabled) or a specific value like `top_k: 40`
+
 ## Usage
 
 ```bash title="Example Command"
-# Use existing config
-uv run main.py trace --config_file_name=your_config_file \
-    --task="Your task" --task_file_name="data/file.txt"
+# Run with Claude 3.7 Sonnet (Anthropic SDK) on example dataset
+uv run main.py common-benchmark --config_file_name=agent_llm_claude37sonnet_anthropic output_dir="logs/test"
 ```
 
+The `agent_llm_claude37sonnet_anthropic.yaml` configuration file provides a ready-to-use setup with the example dataset benchmark.
+
 ---
 
 !!! info "Documentation Info"
diff --git a/docs/mkdocs/docs/finsearchcomp.md b/docs/mkdocs/docs/finsearchcomp.md
index 925909e..8d2556e 100644
--- a/docs/mkdocs/docs/finsearchcomp.md
+++ b/docs/mkdocs/docs/finsearchcomp.md
@@ -2,7 +2,7 @@
 
 MiroFlow's evaluation on the FinSearchComp benchmark demonstrates capabilities in financial information search and analysis tasks, showcasing advanced reasoning abilities in complex financial research scenarios.
 
-More details: [FinSearchComp Dataset](https://huggingface.co/datasets/ByteSeedXpert/FinSearchComp)
+More details: [FinSearchComp: Towards a Realistic, Expert-Level Evaluation of Financial Search and Reasoning](https://arxiv.org/abs/2509.13160)
 
 ---
 
@@ -59,9 +59,9 @@ JINA_API_KEY="xxx"
 # For Linux sandbox (code execution environment)
 E2B_API_KEY="xxx"
 
-# We use MiroThinker model for financial analysis
-OAI_MIROTHINKER_API_KEY="xxx"
-OAI_MIROTHINKER_BASE_URL="http://localhost:61005/v1"
+# We use Claude 3.7 Sonnet for financial analysis via OpenRouter
+OPENROUTER_API_KEY="xxx"
+OPENROUTER_BASE_URL="https://openrouter.ai/api/v1"
 
 # Used for hint generation and final answer extraction
 OPENAI_API_KEY="xxx"
@@ -80,7 +80,7 @@ GEMINI_API_KEY="xxx"
     Execute the following command to run evaluation on the FinSearchComp dataset:
 
 ```bash title="Run FinSearchComp Evaluation"
-uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark=finsearchcomp output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
+uv run main.py common-benchmark --config_file_name=agent_finsearchcomp_claude37sonnet benchmark=finsearchcomp output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
 ```
 
 !!! tip "Progress Monitoring and Resume"
@@ -93,7 +93,7 @@ uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark
     If you need to resume an interrupted evaluation, specify the same output directory to continue from where you left off.
 
     ```bash title="Resume Evaluation, e.g."
-    uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark=finsearchcomp output_dir=${PATH_TO_LOG}
+    uv run main.py common-benchmark --config_file_name=agent_finsearchcomp_claude37sonnet benchmark=finsearchcomp output_dir=${PATH_TO_LOG}
     ```
 
 ### Step 4: Extract Results
@@ -129,7 +129,7 @@ uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark
 After running evaluations, you'll find the following structure:
 
 ```
-logs/finsearchcomp/agent_finsearchcomp_YYYYMMDD_HHMM/
+logs/finsearchcomp/agent_finsearchcomp_claude37sonnet_YYYYMMDD_HHMM/
 ├── benchmark_results.jsonl              # Task results summary
 ├── benchmark_results_pass_at_1_accuracy.txt  # Accuracy statistics
 ├── task_(T1)Time_Sensitive_Data_Fetching_*.json  # T1 task traces
@@ -154,12 +154,12 @@ The progress checker provides detailed statistics:
 
 ### Single Run Evaluation
 ```bash title="Basic Evaluation"
-uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark=finsearchcomp output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
+uv run main.py common-benchmark --config_file_name=agent_finsearchcomp_claude37sonnet benchmark=finsearchcomp output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
 ```
 
 ### Limited Task Testing
 ```bash title="Test with Limited Tasks"
-uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark=finsearchcomp benchmark.execution.max_tasks=5 output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
+uv run main.py common-benchmark --config_file_name=agent_finsearchcomp_claude37sonnet benchmark=finsearchcomp benchmark.execution.max_tasks=5 output_dir="logs/finsearchcomp/$(date +"%Y%m%d_%H%M")"
 ```
 
 ### Custom Agent Configuration
diff --git a/docs/mkdocs/docs/futurex.md b/docs/mkdocs/docs/futurex.md
index eec7860..97a3da0 100644
--- a/docs/mkdocs/docs/futurex.md
+++ b/docs/mkdocs/docs/futurex.md
@@ -2,6 +2,9 @@
 
 MiroFlow's evaluation on the Futurex-Online benchmark demonstrates capabilities in future event prediction tasks.
 
+More details: [FutureX: An Advanced Live Benchmark for LLM Agents in Future Prediction](https://arxiv.org/abs/2508.11987)
+
+
 ---
 
 ## Dataset Overview
diff --git a/docs/mkdocs/docs/index.md b/docs/mkdocs/docs/index.md
index 42ea809..bdbdb4e 100644
--- a/docs/mkdocs/docs/index.md
+++ b/docs/mkdocs/docs/index.md
@@ -9,7 +9,29 @@
 
 ## 🚀 What is MiroFlow?
 
-**MiroFlow** is a comprehensive agentic foundation platform for building intelligent AI agents that achieve state-of-the-art performance on complex tasks. It provides enhanced conversation management, flexible tool integration, and extensive benchmark evaluations across multiple datasets.
+**MiroFlow** is an agentic AI platform for building intelligent agents with flexible tool integration and comprehensive benchmark evaluations.
+
+
+## 📝 Recent Updates
+
+!!! success "Latest Changes & Improvements"
+    
+    **Oct 2025** - 
+
+    - Add support for Index
+    - Add support for BrowseComp-EN evaluation
+    - Add support for MiroAPI https://github.com/MiroMindAI/MiroFlow/pull/76
+
+
+    - 📊 Added support for FinSearchComp evaluation benchmark [#51](https://github.com/MiroMindAI/MiroFlow/pull/51)
+    - 🔍 Added support for XBench-DS (Deep Search) evaluation [#47](https://github.com/MiroMindAI/MiroFlow/pull/47)
+    - 🧠 Updated o3 hints and summary to more models [#58](https://github.com/MiroMindAI/MiroFlow/pull/58)
+    - ✨ Added support for GPT-5 integration [#52](https://github.com/MiroMindAI/MiroFlow/pull/52)
+    - 🔧 Improved tool logs and per-task log storage [#69](https://github.com/MiroMindAI/MiroFlow/pull/69)
+    - 🤖 Added support for single agent mode [#67](https://github.com/MiroMindAI/MiroFlow/pull/67)
+    - 📚 Added comprehensive collection of agentic AI research papers [#65](https://github.com/MiroMindAI/MiroFlow/pull/65)
+
+
 
 
 
@@ -53,21 +75,6 @@ Explore the complete MiroMind AI ecosystem:
     | **MiroTrain** | Complete training recipes and tools | [GitHub](https://github.com/MiroMindAI/MiroTrain) :material-arrow-right: |
 
 
-## 📝 Recent Updates
-
-!!! success "Latest Changes & Improvements"
-    
-    **Oct 2025** - 
-
-    - 📊 Added support for FinSearchComp evaluation benchmark [#51](https://github.com/MiroMindAI/MiroFlow/pull/51)
-    - 🔍 Added support for XBench-DS (Deep Search) evaluation [#47](https://github.com/MiroMindAI/MiroFlow/pull/47)
-    - 🧠 Updated o3 hints and summary to more models [#58](https://github.com/MiroMindAI/MiroFlow/pull/58)
-    - ✨ Added support for GPT-5 integration [#52](https://github.com/MiroMindAI/MiroFlow/pull/52)
-    - 🔧 Improved tool logs and per-task log storage [#69](https://github.com/MiroMindAI/MiroFlow/pull/69)
-    - 🤖 Added support for single agent mode [#67](https://github.com/MiroMindAI/MiroFlow/pull/67)
-    - 📚 Added comprehensive collection of agentic AI research papers [#65](https://github.com/MiroMindAI/MiroFlow/pull/65)
-
-
 
 
 
diff --git a/docs/mkdocs/docs/mirothinker.md b/docs/mkdocs/docs/mirothinker.md
index ea7b242..93213d8 100644
--- a/docs/mkdocs/docs/mirothinker.md
+++ b/docs/mkdocs/docs/mirothinker.md
@@ -56,19 +56,17 @@ OAI_MIROTHINKER_BASE_URL="http://localhost:61005/v1"
 Test your setup with the following command:
 
 ```bash title="Test Command"
-uv run main.py trace --config_file_name=agent_mirothinker \
-    --task="What is the first country listed in the XLSX file that have names starting with Co?" \
-    --task_file_name="data/FSI-2023-DOWNLOAD.xlsx"
+uv run main.py common-benchmark --config_file_name=agent_llm_mirothinker output_dir="logs/test"
 ```
 
 This command will:
-- Use the `agent_mirothinker` configuration with the dedicated MiroThinkerSGLangClient
-- Process the specified Excel file
-- Query the model to find countries starting with "Co"
+- Use the `agent_llm_mirothinker` configuration with the dedicated MiroThinkerSGLangClient
+- Run the example dataset benchmark (configured in the YAML file)
+- Test the model's question-answering capabilities
 
 ### Configuration Details
 
-The `./config/agent_mirothinker.yaml` configuration file uses:
+The `./config/agent_llm_mirothinker.yaml` configuration file uses:
 
 - `provider_class: "MiroThinkerSGLangClient"` - A dedicated client for MiroThinker models deployed with SGLang
 - Model path and generation parameters (temperature, top_p, max_tokens, etc.)
diff --git a/docs/mkdocs/docs/openai-gpt.md b/docs/mkdocs/docs/openai-gpt.md
deleted file mode 100644
index e111449..0000000
--- a/docs/mkdocs/docs/openai-gpt.md
+++ /dev/null
@@ -1,78 +0,0 @@
-# OpenAI GPT Models
-
-OpenAI's latest models including GPT-5, GPT-4o and advanced reasoning models with strong coding, vision, and reasoning capabilities.
-
-## Client Used for GPT-5
-
-`GPT5OpenAIClient`
-
-### Environment Setup
-
-```bash title="Environment Variables"
-export OPENAI_API_KEY="your-openai-key"
-export OPENAI_BASE_URL="https://api.openai.com/v1"  # optional
-```
-
-### Configuration
-
-```yaml title="Agent Configuration"
-main_agent:
-  llm: 
-    provider_class: "GPT5OpenAIClient"
-    model_name: "gpt-5"
-    async_client: true
-    temperature: 1.0
-    top_p: 1.0
-    min_p: 0.0
-    top_k: -1
-    max_tokens: 128000
-    reasoning_effort: "high" # Use high in the main agent, and use the default medium in the sub-agent.
-    openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
-    openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
-```
-
-### Usage
-
-```bash title="Example Command"
-# Create custom OpenAI config
-uv run main.py trace --config_file_name=your_config_file \
-    --task="Your task" --task_file_name="data/file.txt"
-```
-
-## Client Used for GPT-4o
-
-`GPTOpenAIClient`
-
-### Environment Setup
-
-```bash title="Environment Variables"
-export OPENAI_API_KEY="your-openai-key"
-export OPENAI_BASE_URL="https://api.openai.com/v1"  # optional
-```
-
-### Configuration
-
-```yaml title="Agent Configuration"
-main_agent:
-  llm: 
-    provider_class: "GPTOpenAIClient"
-    model_name: "gpt-4o"  # or gpt-4o-mini, etc.
-    openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
-    openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
-```
-
-### Usage
-
-```bash title="Example Command"
-# Create custom OpenAI config
-uv run main.py trace --config_file_name=your_config_file \
-    --task="Your task" --task_file_name="data/file.txt"
-```
-
-!!! note "Configuration Notes"
-    - `GPTOpenAIClient` also supports GPT-5, but it has not been fully validated on MiroFlow yet. We recommend using `GPT5OpenAIClient`.
-
----
-
-!!! info "Documentation Info"
-    **Last Updated:** October 2025 · **Doc Contributor:** Team @ MiroMind AI
\ No newline at end of file
diff --git a/docs/mkdocs/docs/openai-gpt4o.md b/docs/mkdocs/docs/openai-gpt4o.md
new file mode 100644
index 0000000..4bc59bb
--- /dev/null
+++ b/docs/mkdocs/docs/openai-gpt4o.md
@@ -0,0 +1,54 @@
+# OpenAI GPT-4o
+
+OpenAI's GPT-4o model with multimodal capabilities, strong reasoning, and efficient performance.
+
+## Client Configuration
+
+**Client Class**: `GPTOpenAIClient`
+
+### Environment Setup
+
+```bash title="Environment Variables"
+export OPENAI_API_KEY="your-openai-key"
+export OPENAI_BASE_URL="https://api.openai.com/v1"  # optional
+```
+
+### Agent Configuration
+
+```yaml title="Agent Configuration"
+main_agent:
+  llm: 
+    provider_class: "GPTOpenAIClient"
+    model_name: "gpt-4o"  # or gpt-4o-mini
+    async_client: true
+    temperature: 0.7
+    top_p: 1.0
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 16000
+    openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+    openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
+```
+
+### Usage
+
+```bash title="Example Command"
+# Run with GPT-4o on example dataset
+uv run main.py common-benchmark --config_file_name=agent_llm_gpt4o output_dir="logs/test"
+```
+
+The `agent_llm_gpt4o.yaml` configuration file provides a ready-to-use setup with the example dataset benchmark.
+
+!!! note "Available Models"
+    The `GPTOpenAIClient` supports multiple GPT-4o variants:
+    - `gpt-4o` - Full GPT-4o model
+    - `gpt-4o-mini` - Smaller, faster variant
+
+!!! warning "GPT-5 Support"
+    `GPTOpenAIClient` also supports GPT-5, but it has not been fully validated on MiroFlow yet. We recommend using `GPT5OpenAIClient` for GPT-5.
+
+---
+
+!!! info "Documentation Info"
+    **Last Updated:** October 2025 · **Doc Contributor:** Team @ MiroMind AI
+
diff --git a/docs/mkdocs/docs/openai-gpt5.md b/docs/mkdocs/docs/openai-gpt5.md
new file mode 100644
index 0000000..4409a7e
--- /dev/null
+++ b/docs/mkdocs/docs/openai-gpt5.md
@@ -0,0 +1,53 @@
+# OpenAI GPT-5
+
+OpenAI's GPT-5 model with advanced reasoning capabilities and strong coding, vision, and problem-solving abilities.
+
+## Client Configuration
+
+**Client Class**: `GPT5OpenAIClient`
+
+### Environment Setup
+
+```bash title="Environment Variables"
+export OPENAI_API_KEY="your-openai-key"
+export OPENAI_BASE_URL="https://api.openai.com/v1"  # optional
+```
+
+### Agent Configuration
+
+```yaml title="Agent Configuration"
+main_agent:
+  llm: 
+    provider_class: "GPT5OpenAIClient"
+    model_name: "gpt-5"
+    async_client: true
+    temperature: 1.0
+    top_p: 1.0
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 16000
+    reasoning_effort: "high" # Use high in the main agent, and use the default medium in the sub-agent.
+    openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+    openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
+```
+
+### Usage
+
+```bash title="Example Command"
+# Run with GPT-5 on example dataset
+uv run main.py common-benchmark --config_file_name=agent_llm_gpt5 output_dir="logs/test"
+```
+
+The `agent_llm_gpt5.yaml` configuration file provides a ready-to-use setup with the example dataset benchmark.
+
+!!! tip "Reasoning Effort"
+    GPT-5 supports the `reasoning_effort` parameter. The configuration uses `"high"` for better reasoning performance.
+
+!!! tip "Sampling Parameters"
+    While `min_p` and `top_k` are required in the configuration, OpenAI's API does not use them. Set them to `min_p: 0.0` and `top_k: -1` (disabled).
+
+---
+
+!!! info "Documentation Info"
+    **Last Updated:** October 2025 · **Doc Contributor:** Team @ MiroMind AI
+
diff --git a/docs/mkdocs/docs/openrouter-claude-3.7-sonnet.md b/docs/mkdocs/docs/openrouter-claude-3.7-sonnet.md
index d932f0b..83c3ddf 100644
--- a/docs/mkdocs/docs/openrouter-claude-3.7-sonnet.md
+++ b/docs/mkdocs/docs/openrouter-claude-3.7-sonnet.md
@@ -20,27 +20,30 @@ main_agent:
   llm: 
     provider_class: "ClaudeOpenRouterClient"
     model_name: "anthropic/claude-3.7-sonnet"  # or openai/gpt-4, etc.
+    async_client: true
+    temperature: 0.3
+    top_p: 0.95
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 32000
     openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
     openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
     openrouter_provider: "anthropic"  # Force provider, or "" for auto
+    disable_cache_control: false
+    keep_tool_result: -1
+    oai_tool_thinking: false
 ```
 
-## Other Supported Models
-
-- `openai/gpt-4`
-- `openai/gpt-3.5-turbo`
-- `anthropic/claude-3-opus`
-- `google/gemini-pro`
-- Many others via unified OpenAI format
 
 ## Usage
 
 ```bash title="Example Command"
-# Use existing OpenRouter config
-uv run main.py trace --config_file_name=your_config_file \
-    --task="Your task" --task_file_name="data/file.txt"
+# Run with Claude 3.7 Sonnet on example dataset
+uv run main.py common-benchmark --config_file_name=agent_llm_claude37sonnet output_dir="logs/test"
 ```
 
+The `agent_llm_claude37sonnet.yaml` configuration file provides a ready-to-use setup with the example dataset benchmark.
+
 ## Benefits vs Direct API
 
 - Unified chat format
diff --git a/docs/mkdocs/docs/xbench_ds.md b/docs/mkdocs/docs/xbench_ds.md
index 2ab9dc0..c1f9bf7 100644
--- a/docs/mkdocs/docs/xbench_ds.md
+++ b/docs/mkdocs/docs/xbench_ds.md
@@ -52,7 +52,7 @@ OPENAI_BASE_URL="https://api.openai.com/v1"
 
 ```bash
 uv run main.py common-benchmark \
-  --config_file_name=agent_xbench-ds \
+  --config_file_name=agent_xbench-ds_claude37sonnet \
   output_dir="logs/xbench-ds/$(date +"%Y%m%d_%H%M")"
 ```
 
@@ -72,7 +72,7 @@ Replace `$PATH_TO_LOG` with your actual output directory path.
 
 ```bash title="Resume Interrupted Evaluation"
 uv run main.py common-benchmark \
-  --config_file_name=agent_xbench-ds \
+  --config_file_name=agent_xbench-ds_claude37sonnet \
   output_dir="logs/xbench-ds/20250922_1430"
 ```
 
diff --git a/docs/mkdocs/mkdocs.yml b/docs/mkdocs/mkdocs.yml
index 60e0ff3..f45f9d7 100644
--- a/docs/mkdocs/mkdocs.yml
+++ b/docs/mkdocs/mkdocs.yml
@@ -37,62 +37,86 @@ repo_url: https://github.com/MiroMindAI/MiroFlow
 
 
 nav:
-  - Introduction: 
+  - 🏠 Introduction: 
     - News & Updates: index.md
     - License: license.md
 
-  - Quick Start:
+  - 🚀 Quick Start:
     - Quickstart: quickstart.md
     - Core Concepts: core_concepts.md
     - YAML Configuration: yaml_config.md
 
-  - Evaluation:
+  - "📊 Evaluation":
     - Overview: evaluation_overview.md
-    - Benchmarks: 
-      - GAIA-Validation:
-        - Prerequisites: gaia_validation_prerequisites.md
-        - Claude-3.7-Sonnet: gaia_validation_claude37sonnet.md
-        - GPT-5: gaia_validation_gpt5.md
-        - MiroThinker: gaia_validation_mirothinker.md
-      - GAIA-Validation-Text-Only: gaia_validation_text_only.md
-      - GAIA-Test: gaia_test.md
-      - FutureX: futurex.md
-      - xBench-DeepSearch: xbench_ds.md
-      - FinSearchComp: finsearchcomp.md
-    - Download Datasets: download_datasets.md
-    - Add New Benchmarks: contribute_benchmarks.md
-
-  - Tools: 
+    - How to add new benchmarks: contribute_benchmarks.md
+    - "": ""
+    - "": ""
+    - "": ""
+    - "": ""
+    - GAIA-Val:
+      - Prepare Dataset: gaia_validation_prerequisites.md
+      - Claude-3.7-Sonnet: gaia_validation_claude37sonnet.md
+      - GPT-5: gaia_validation_gpt5.md
+      - MiroThinker: gaia_validation_mirothinker.md
+    - GAIA-Val-Text: gaia_validation_text_only.md
+    - GAIA-Test: gaia_test.md
+    - BrowseComp-EN: browsecomp_en.md
+    - FutureX: futurex.md
+    - xBench-DeepSearch: xbench_ds.md
+    - FinSearchComp: finsearchcomp.md
+
+    # - Benchmarks: 
+    #   - GAIA-Validation-Text-Only: gaia_validation_text_only.md
+    #   - GAIA-Test: gaia_test.md
+    #   - BrowseComp-EN: browsecomp_en.md
+    #   - FutureX: futurex.md
+    #   - xBench-DeepSearch: xbench_ds.md
+    #   - FinSearchComp: finsearchcomp.md
+    # - Download Datasets: download_datasets.md
+
+
+
+  - 🔧 Tools: 
     - Overview: tool_overview.md
-    - Tools:
-      - tool-reasoning: tool_reasoning.md
-      - tool-reasoning-os: tool_reasoning_os.md
-      - tool-image-video: tool_vqa.md
-      - tool-image-video-os: tool_vqa_os.md
-      - tool-audio-os: tool_audio_os.md
-      - tool-searching: tool_searching.md
-      - tool-python: tool_python.md
+    - How to add new tools: contribute_tools.md
+    - "": ""
+    - "": ""
+    - "": ""
+    - "": ""
+    - tool-reasoning: tool_reasoning.md
+    - tool-reasoning-os: tool_reasoning_os.md
+    - tool-image-video: tool_vqa.md
+    - tool-image-video-os: tool_vqa_os.md
+    - tool-audio-os: tool_audio_os.md
+    - tool-searching: tool_searching.md
+    - tool-python: tool_python.md
+    - "": ""
+    - "": ""
+    - "": ""
+    - "": ""
     - Advanced Features:
       - E2B Advanced Features: e2b_advanced_features.md
       - MiroAPI: miro_api.md
-    - Add New Tools: contribute_tools.md
 
-  - LLM Clients: 
+  - 🤖 LLM Clients: 
     - Overview: llm_clients_overview.md
-    - Models:
-      - MiroThinker: mirothinker.md
-      - Claude-3.7-Sonnet: 
-        - Official SDK: claude-3.7-sonnet.md
-        - OpenRouter: openrouter-claude-3.7-sonnet.md
-      - OpenAI-GPT: openai-gpt.md
-    - Add New LLM Clients: contribute_llm_clients.md
-
-  - Resources: 
-    - 📚 All About Agents: all_about_agents.md
-    - 📊 Open Source Data: data.md
-    - 📱 Applications: applications.md
-    - 🐛 FAQs: faqs.md
-    - 📝 Contributors: contributors.md
+    - How to add new LLM clients: contribute_llm_clients.md
+    - "": ""
+    - "": ""
+    - "": ""
+    - "": ""
+    - MiroThinker: mirothinker.md
+    - Claude 3.7 Sonnet (Official SDK): claude-3.7-sonnet.md
+    - Claude 3.7 Sonnet (OpenRouter): openrouter-claude-3.7-sonnet.md
+    - GPT-5: openai-gpt5.md
+    - GPT-4o: openai-gpt4o.md
+
+  - 📚 Resources: 
+    - All About Agents: all_about_agents.md
+    - Open Source Data: data.md
+    - Applications: applications.md
+    - FAQs: faqs.md
+    - Contributors: contributors.md
 
 
 extra:
diff --git a/scripts/run_evaluate_multiple_runs_finsearchcomp.sh b/scripts/run_evaluate_multiple_runs_finsearchcomp.sh
index e7c90fe..b6d65aa 100755
--- a/scripts/run_evaluate_multiple_runs_finsearchcomp.sh
+++ b/scripts/run_evaluate_multiple_runs_finsearchcomp.sh
@@ -5,14 +5,14 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Multiple runs FinSearchComp evaluation script
-# Based on the working command: uv run main.py common-benchmark --config_file_name=agent_finsearchcomp benchmark=finsearchcomp output_dir=logs/finsearchcomp/$(date +"%Y%m%d_%H%M")
+# Based on the working command: uv run main.py common-benchmark --config_file_name=agent_finsearchcomp_claude37sonnet benchmark=finsearchcomp output_dir=logs/finsearchcomp/$(date +"%Y%m%d_%H%M")
 
 # Configuration parameters
 NUM_RUNS=${NUM_RUNS:-3}
 MAX_TASKS=${MAX_TASKS:-1}
 MAX_CONCURRENT=${MAX_CONCURRENT:-5}
 BENCHMARK_NAME="finsearchcomp"
-AGENT_SET=${AGENT_SET:-"agent_finsearchcomp"}
+AGENT_SET=${AGENT_SET:-"agent_finsearchcomp_claude37sonnet"}
 
 # Set results directory with timestamp
 TIMESTAMP=$(date +%Y%m%d_%H%M)
diff --git a/scripts/run_evaluate_multiple_runs_mirothinker_gaia-validation-text-only.sh b/scripts/run_evaluate_multiple_runs_mirothinker_gaia-validation-text-only.sh
index c65f0dc..0cc11fb 100644
--- a/scripts/run_evaluate_multiple_runs_mirothinker_gaia-validation-text-only.sh
+++ b/scripts/run_evaluate_multiple_runs_mirothinker_gaia-validation-text-only.sh
@@ -5,9 +5,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Configuration parameters
-NUM_RUNS=3
-AGENT_SET="agent_gaia-validation-text-only_mirothinker"
-MAX_CONCURRENT=15
+NUM_RUNS=8
+AGENT_SET="agent_gaia-validation-text-only_mirothinker_single_agent"
+MAX_CONCURRENT=8
 
 # Set results directory with timestamp
 TIMESTAMP=$(date +%Y%m%d_%H%M)
diff --git a/scripts/run_evaluate_multiple_runs_xbench-ds.sh b/scripts/run_evaluate_multiple_runs_xbench-ds.sh
index a0026b1..bd166f7 100644
--- a/scripts/run_evaluate_multiple_runs_xbench-ds.sh
+++ b/scripts/run_evaluate_multiple_runs_xbench-ds.sh
@@ -6,7 +6,7 @@
 
 # Configuration parameters
 NUM_RUNS=3
-AGENT_SET="agent_xbench-ds"
+AGENT_SET="agent_xbench-ds_claude37sonnet"
 BENCHMARK_NAME="xbench-ds"
 MAX_CONCURRENT=5
 export CHINESE_CONTEXT="true"
diff --git a/src/llm/providers/claude_anthropic_client.py b/src/llm/providers/claude_anthropic_client.py
index d701d49..3e92537 100644
--- a/src/llm/providers/claude_anthropic_client.py
+++ b/src/llm/providers/claude_anthropic_client.py
@@ -29,17 +29,19 @@ def __post_init__(self):
 
     def _create_client(self, config: DictConfig):
         """Create Anthropic client"""
-        api_key = config.env.anthropic_api_key
+        api_key = self.cfg.llm.anthropic_api_key
 
         if self.async_client:
             return AsyncAnthropic(
                 api_key=api_key,
                 base_url=self.cfg.llm.anthropic_base_url,
+                timeout=600.0,  # 10 minutes timeout for long requests
             )
         else:
             return Anthropic(
                 api_key=api_key,
                 base_url=self.cfg.llm.anthropic_base_url,
+                timeout=600.0,  # 10 minutes timeout for long requests
             )
 
     @retry(wait=wait_fixed(10), stop=stop_after_attempt(5))
diff --git a/utils/progress_check/check_finsearchcomp_progress.py b/utils/progress_check/check_finsearchcomp_progress.py
index 52035fe..1104582 100755
--- a/utils/progress_check/check_finsearchcomp_progress.py
+++ b/utils/progress_check/check_finsearchcomp_progress.py
@@ -348,7 +348,7 @@ def main():
         print(f"Error: {e}")
         print(f"\nUsage: python {sys.argv[0]} [LOG_FOLDER_PATH]")
         print(
-            f"Example: python {sys.argv[0]} logs/finsearchcomp/agent_finsearchcomp_20250924_1555"
+            f"Example: python {sys.argv[0]} logs/finsearchcomp/agent_finsearchcomp_claude37sonnet_20250924_1555"
         )
         return 1