Skip to content

Commit 6ec4972

Browse files
authored
feat(hints, summary, docs): Refactor O3 parameters to generic names and restructure GAIA validation docs (#58)
* update o3 hints and summary to be more general and update docs for gaia validation * to pass lint * improve retry * to incorporate copilot
1 parent c3b3785 commit 6ec4972

36 files changed

+765
-228
lines changed

config/agent_finsearchcomp.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@ main_agent:
2727
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
2828

2929
input_process:
30-
o3_hint: true
30+
hint_generation: true
31+
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
3132
output_process:
32-
o3_final_answer: true
33+
final_answer_extraction: true
34+
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
3335

34-
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for o3 hints and final answer extraction
36+
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
3537
add_message_id: true
3638
keep_tool_result: -1
3739
chinese_context: "${oc.env:CHINESE_CONTEXT,false}"

config/agent_gaia-test.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,13 @@ main_agent:
2929
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
3030

3131
input_process:
32-
o3_hint: true
32+
hint_generation: true
33+
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
3334
output_process:
34-
o3_final_answer: true
35+
final_answer_extraction: true
36+
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
3537

36-
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for o3 hints and final answer extraction
38+
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
3739
add_message_id: true
3840
keep_tool_result: -1
3941
chinese_context: "${oc.env:CHINESE_CONTEXT,false}"

config/agent_gaia-validation-text-only.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,13 @@ main_agent:
2929
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
3030

3131
input_process:
32-
o3_hint: true
32+
hint_generation: true
33+
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
3334
output_process:
34-
o3_final_answer: true
35+
final_answer_extraction: true
36+
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
3537

36-
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for o3 hints and final answer extraction
38+
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
3739
add_message_id: true
3840
keep_tool_result: -1
3941
chinese_context: "${oc.env:CHINESE_CONTEXT,false}"

config/agent_gaia-validation.yaml renamed to config/agent_gaia-validation_claude37sonnet.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,13 @@ main_agent:
2929
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
3030

3131
input_process:
32-
o3_hint: true
32+
hint_generation: true
33+
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
3334
output_process:
34-
o3_final_answer: true
35+
final_answer_extraction: true
36+
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
3537

36-
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for o3 hints and final answer extraction
38+
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
3739
add_message_id: true
3840
keep_tool_result: -1
3941
chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
defaults:
2+
- benchmark: gaia-validation
3+
- override hydra/job_logging: none
4+
- _self_ # Allow defining variables at the top of this file
5+
6+
7+
main_agent:
8+
prompt_class: MainAgentPrompt_GAIA
9+
llm:
10+
provider_class: "MiroThinkerSGLangClient"
11+
model_name: "MODEL_NAME"
12+
async_client: true
13+
temperature: 0.3
14+
top_p: 1.0
15+
min_p: 0.0
16+
top_k: -1
17+
max_tokens: 4096
18+
oai_mirothinker_api_key: "${oc.env:OAI_MIROTHINKER_API_KEY,dummy_key}"
19+
oai_mirothinker_base_url: "${oc.env:OAI_MIROTHINKER_BASE_URL,http://localhost:61005/v1}"
20+
keep_tool_result: -1
21+
oai_tool_thinking: false
22+
23+
tool_config:
24+
- tool-reasoning
25+
26+
max_turns: 50 # Maximum number of turns for main agent execution
27+
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
28+
29+
input_process:
30+
hint_generation: false
31+
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
32+
33+
output_process:
34+
final_answer_extraction: true
35+
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
36+
37+
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
38+
add_message_id: true
39+
keep_tool_result: -1
40+
chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
41+
42+
43+
sub_agents:
44+
agent-worker:
45+
prompt_class: SubAgentWorkerPrompt
46+
llm:
47+
provider_class: "MiroThinkerSGLangClient"
48+
model_name: "MODEL_NAME"
49+
async_client: true
50+
temperature: 0.3
51+
top_p: 1.0
52+
min_p: 0.0
53+
top_k: -1
54+
max_tokens: 4096
55+
oai_mirothinker_api_key: "${oc.env:OAI_MIROTHINKER_API_KEY,dummy_key}"
56+
oai_mirothinker_base_url: "${oc.env:OAI_MIROTHINKER_BASE_URL,http://localhost:61005/v1}"
57+
keep_tool_result: -1
58+
oai_tool_thinking: false
59+
60+
tool_config:
61+
- tool-searching
62+
- tool-image-video
63+
- tool-reading
64+
- tool-code
65+
- tool-audio
66+
67+
max_turns: 50 # Maximum number of turns for main agent execution
68+
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
69+
70+
71+
# Can define some top-level or default parameters here
72+
output_dir: logs/
73+
data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored

config/agent_mirothinker.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@ main_agent:
2626
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
2727

2828
input_process:
29-
o3_hint: false
29+
hint_generation: false
30+
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
3031
output_process:
31-
o3_final_answer: false
32+
final_answer_extraction: false
33+
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
3234

33-
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for o3 hints and final answer extraction
35+
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
3436
add_message_id: true
3537
keep_tool_result: -1
3638
chinese_context: "${oc.env:CHINESE_CONTEXT,false}"

config/agent_quickstart_1.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,13 @@ main_agent:
2828
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
2929

3030
input_process:
31-
o3_hint: false
31+
hint_generation: false
32+
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
3233
output_process:
33-
o3_final_answer: false
34+
final_answer_extraction: false
35+
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
3436

35-
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for o3 hints and final answer extraction
37+
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
3638
add_message_id: true
3739
keep_tool_result: -1
3840
chinese_context: "${oc.env:CHINESE_CONTEXT,false}"

config/agent_xbench-ds.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,13 @@ main_agent:
2929
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
3030

3131
input_process:
32-
o3_hint: true
32+
hint_generation: true
33+
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
3334
output_process:
34-
o3_final_answer: true
35+
final_answer_extraction: true
36+
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
3537

36-
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for o3 hints and final answer extraction
38+
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
3739
add_message_id: true
3840
keep_tool_result: -1
3941
chinese_context: "true"

docs/mkdocs/docs/all_about_agents.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ Welcome to our comprehensive resource collection for AI agents. This page curate
103103
- **Terminal-Bench**: the benchmark for testing AI agents in real terminal environments
104104
- [:material-github: GitHub](https://github.com/laude-institute/terminal-bench)
105105

106+
- **Gaia2 and ARE**: Empowering the Community to Evaluate Agents
107+
- [:material-file-document: Blog Post](https://huggingface.co/blog/gaia2)
108+
106109
---
107110

108111
!!! info "Documentation Info"

docs/mkdocs/docs/finsearchcomp.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ E2B_API_KEY="xxx"
6363
OAI_MIROTHINKER_API_KEY="xxx"
6464
OAI_MIROTHINKER_BASE_URL="http://localhost:61005/v1"
6565
66-
# Used for o3 hints and final answer extraction
66+
# Used for hint generation and final answer extraction
6767
OPENAI_API_KEY="xxx"
6868
OPENAI_BASE_URL="https://api.openai.com/v1"
6969

0 commit comments

Comments
 (0)