Skip to content

Commit 9ccab07

Browse files
authored
docs(quickstart): update docs for quick start (#73)
* update quick start and add tool searching serper, config logging for single task * add single agent * pass lint
1 parent 08fd95a commit 9ccab07

13 files changed

+191
-81
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ cp .env.template .env
8888
# Edit .env and add your OPENROUTER_API_KEY
8989

9090
# 3. Run your first agent
91-
uv run main.py trace --config_file_name=agent_quickstart_1 --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx"
91+
uv run main.py trace --config_file_name=agent_quickstart_reading --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx"
9292
```
9393

9494
🎉 **Expected Output:** Your agent should return **\boxed{Congo Democratic Republic}** 😊

README_ja.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ cp .env.template .env
8686
# .env を編集して OPENROUTER_API_KEY を追加
8787

8888
# 3. 最初のエージェントを実行
89-
uv run main.py trace --config_file_name=agent_quickstart_1 --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx"
89+
uv run main.py trace --config_file_name=agent_quickstart_reading --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx"
9090
```
9191

9292
🎉 **想定出力**: エージェントは **\boxed{Congo Democratic Republic}** を返すはずです 😊

README_zh.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ cp .env.template .env
8686
# 编辑 .env 并添加您的 OPENROUTER_API_KEY
8787

8888
# 3. 运行您的第一个智能体
89-
uv run main.py trace --config_file_name=agent_quickstart_1 --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx"
89+
uv run main.py trace --config_file_name=agent_quickstart_reading --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx"
9090
```
9191

9292
🎉 **预期输出**: 您的智能体应该返回 **\boxed{Congo Democratic Republic}** 😊

config/agent_quickstart_1.yaml renamed to config/agent_quickstart_reading.yaml

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ main_agent:
2222
keep_tool_result: -1
2323
oai_tool_thinking: false
2424

25-
tool_config: []
25+
tool_config:
26+
- tool-reading
2627

2728
max_turns: -1 # Maximum number of turns for main agent execution
2829
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
@@ -40,30 +41,7 @@ main_agent:
4041
chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
4142

4243

43-
sub_agents:
44-
agent-worker:
45-
prompt_class: SubAgentWorkerPrompt
46-
llm:
47-
provider_class: "ClaudeOpenRouterClient"
48-
model_name: "anthropic/claude-3.7-sonnet"
49-
async_client: true
50-
temperature: 0.3
51-
top_p: 0.95
52-
min_p: 0.0
53-
top_k: -1
54-
max_tokens: 32000
55-
openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
56-
openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
57-
openrouter_provider: "anthropic"
58-
disable_cache_control: false
59-
keep_tool_result: -1
60-
oai_tool_thinking: false
61-
62-
tool_config:
63-
- tool-reading
64-
65-
max_turns: -1 # Maximum number of turns for main agent execution
66-
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
44+
sub_agents: null
6745

6846

6947
# Can define some top-level or default parameters here
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
defaults:
2+
- benchmark: gaia-validation
3+
- override hydra/job_logging: none
4+
- _self_ # Allow defining variables at the top of this file
5+
6+
7+
main_agent:
8+
prompt_class: MainAgentPromptBoxedAnswer
9+
llm:
10+
provider_class: "ClaudeOpenRouterClient"
11+
model_name: "anthropic/claude-3.7-sonnet"
12+
async_client: true
13+
temperature: 0.3
14+
top_p: 0.95
15+
min_p: 0.0
16+
top_k: -1
17+
max_tokens: 32000
18+
openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}"
19+
openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}"
20+
openrouter_provider: "anthropic"
21+
disable_cache_control: false
22+
keep_tool_result: -1
23+
oai_tool_thinking: false
24+
25+
tool_config:
26+
- tool-searching-serper
27+
28+
max_turns: -1 # Maximum number of turns for main agent execution
29+
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
30+
31+
input_process:
32+
hint_generation: false
33+
hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
34+
output_process:
35+
final_answer_extraction: false
36+
final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
37+
38+
openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
39+
add_message_id: true
40+
keep_tool_result: -1
41+
chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
42+
43+
44+
sub_agents: null
45+
46+
47+
# Can define some top-level or default parameters here
48+
output_dir: logs/
49+
data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
50+

config/agent_quickstart_single.yaml renamed to config/agent_quickstart_single_agent.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,8 @@ main_agent:
2323
oai_tool_thinking: false
2424

2525
tool_config:
26-
- tool-reasoning-os
27-
- tool-searching
28-
- tool-image-video-os
2926
- tool-reading
30-
- tool-code
31-
- tool-audio-os
27+
- tool-searching
3228

3329
max_turns: -1 # Maximum number of turns for main agent execution
3430
max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
name: "tool-serper-search"
1+
name: "tool-searching-serper"
22
tool_command: "npx"
33
args:
44
- "-y"
55
- "serper-search-scrape-mcp-server"
66
env:
77
# Search API key - this value will be loaded from the .env file at runtime
8-
SERPER_API_KEY: "${oc.env:SERPER_API_KEY}"
8+
SERPER_API_KEY: "${oc.env:SERPER_API_KEY}"

docs/mkdocs/docs/contribute_benchmarks.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ Start with a small subset to verify everything works correctly:
148148

149149
```bash title="Test Benchmark Integration"
150150
uv run main.py common-benchmark \
151-
--config_file_name=agent_quickstart_1 \
151+
--config_file_name=agent_quickstart_reading \
152152
benchmark=your-benchmark \
153153
benchmark.execution.max_tasks=3 \
154154
output_dir="logs/test-your-benchmark/$(date +"%Y%m%d_%H%M")"
@@ -160,7 +160,7 @@ Once testing passes, run the complete benchmark:
160160

161161
```bash title="Run Full Benchmark"
162162
uv run main.py common-benchmark \
163-
--config_file_name=agent_quickstart_1 \
163+
--config_file_name=agent_quickstart_reading \
164164
benchmark=your-benchmark \
165165
output_dir="logs/your-benchmark/$(date +"%Y%m%d_%H%M")"
166166
```

docs/mkdocs/docs/futurex.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,10 @@ OPENAI_BASE_URL="https://api.openai.com/v1"
7272
### Step 3: Run the Evaluation
7373

7474
!!! example "Evaluation Execution"
75-
Execute the following command to run evaluation on the Futurex-Online dataset. This uses the basic `agent_quickstart_1` configuration for quick start purposes.
75+
Execute the following command to run evaluation on the Futurex-Online dataset. This uses the basic `agent_quickstart_reading` configuration for quick start purposes.
7676

7777
```bash title="Run Futurex-Online Evaluation"
78-
uv run main.py common-benchmark --config_file_name=agent_quickstart_1 benchmark=futurex output_dir="logs/futurex/$(date +"%Y%m%d_%H%M")"
78+
uv run main.py common-benchmark --config_file_name=agent_quickstart_reading benchmark=futurex output_dir="logs/futurex/$(date +"%Y%m%d_%H%M")"
7979
```
8080

8181
!!! tip "Progress Monitoring and Resume"
@@ -88,7 +88,7 @@ uv run main.py common-benchmark --config_file_name=agent_quickstart_1 benchmark=
8888
If you need to resume an interrupted evaluation, specify the same output directory to continue from where you left off.
8989

9090
```bash title="Resume Evaluation, e.g."
91-
uv run main.py common-benchmark --config_file_name=agent_quickstart_1 benchmark=futurex output_dir="logs/futurex/20250918_1010"
91+
uv run main.py common-benchmark --config_file_name=agent_quickstart_reading benchmark=futurex output_dir="logs/futurex/20250918_1010"
9292
```
9393

9494
### Step 4: Extract Results
@@ -184,13 +184,13 @@ Check the generated files for voting analysis:
184184

185185
```bash title="Check Voting Results"
186186
# View submission file with voting results
187-
cat logs/futurex/agent_quickstart_1_*/futurex_submission.jsonl
187+
cat logs/futurex/agent_quickstart_reading_*/futurex_submission.jsonl
188188

189189
# Check individual run results
190-
ls logs/futurex/agent_quickstart_1_*/run_*/
190+
ls logs/futurex/agent_quickstart_reading_*/run_*/
191191

192192
# Check progress and voting statistics
193-
uv run python utils/progress_check/check_futurex_progress.py logs/futurex/agent_quickstart_1_*
193+
uv run python utils/progress_check/check_futurex_progress.py logs/futurex/agent_quickstart_reading_*
194194
```
195195

196196
### Manual Voting Aggregation
@@ -199,13 +199,13 @@ You can also manually run the voting aggregation:
199199

200200
```bash title="Manual Voting Aggregation"
201201
# Aggregate multiple runs with majority voting
202-
uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_1_* --aggregate
202+
uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_reading_* --aggregate
203203

204204
# Force single run mode (if needed)
205-
uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_1_*/run_1 --single
205+
uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_reading_*/run_1 --single
206206

207207
# Specify custom output file
208-
uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_1_* -o my_voted_predictions.jsonl
208+
uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_reading_* -o my_voted_predictions.jsonl
209209
```
210210

211211
### Voting Output Format
@@ -249,7 +249,7 @@ For example, `"vote_counts": {"No": 2}` means 2 out of 2 runs predicted "No", in
249249
After running multiple evaluations, you'll find the following structure:
250250

251251
```
252-
logs/futurex/agent_quickstart_1_YYYYMMDD_HHMM/
252+
logs/futurex/agent_quickstart_reading_YYYYMMDD_HHMM/
253253
├── futurex_submission.jsonl # Final voted predictions
254254
├── run_1/ # First run results
255255
│ ├── benchmark_results.jsonl # Individual task results

0 commit comments

Comments
 (0)