From e0926c982a26fde4630391140b1a6ead23334b33 Mon Sep 17 00:00:00 2001 From: Yue Deng Date: Thu, 16 Oct 2025 15:35:28 +0800 Subject: [PATCH] fix hle text only --- docs/mkdocs/docs/hle_text_only.md | 2 +- utils/prepare_benchmark/gen_hle_text_only.py | 24 +++++++++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/docs/mkdocs/docs/hle_text_only.md b/docs/mkdocs/docs/hle_text_only.md index ab4881b..ddf1752 100644 --- a/docs/mkdocs/docs/hle_text_only.md +++ b/docs/mkdocs/docs/hle_text_only.md @@ -9,7 +9,7 @@ More details: [HLE text only Dataset on HuggingFace](https://huggingface.co/data ## Dataset Overview !!! info "HLE Dataset (text only)" - The dataset is a text-only subset of HLE. + The experiments are conducted on the **500 text-only subset** of the HLE dataset, available from [WebThinker](https://github.com/RUC-NLPIR/WebThinker/blob/main/data/HLE/test.json). --- diff --git a/utils/prepare_benchmark/gen_hle_text_only.py b/utils/prepare_benchmark/gen_hle_text_only.py index bc43634..263d1d3 100644 --- a/utils/prepare_benchmark/gen_hle_text_only.py +++ b/utils/prepare_benchmark/gen_hle_text_only.py @@ -2,29 +2,31 @@ # # SPDX-License-Identifier: Apache-2.0 + +import json from typing import Generator, MutableMapping -from datasets import load_dataset +import requests from utils.prepare_benchmark.common import Task def gen_hle_text_only(hf_token: str) -> Generator[Task, None, None]: - dataset = load_dataset("macabdul9/hle_text_only", split="test", token=hf_token) - for x in dataset: - metadata: MutableMapping = x # type: ignore - task_id = metadata.pop("id") - question = metadata.pop("question") - gt = metadata.pop("answer") - metadata.pop("image_preview") - metadata.pop("rationale_image") + response = requests.get( + "https://raw.githubusercontent.com/RUC-NLPIR/WebThinker/refs/heads/main/data/HLE/test.json" + ) + dataset = json.loads(response.content) + for row in dataset: + metadata: MutableMapping = row + task_id = str(metadata.pop("id", "")) + question = metadata.pop("Question", "") + answer = metadata.pop("answer", "") task = Task( task_id=task_id, task_question=question, - ground_truth=gt, + ground_truth=answer, file_path=None, metadata=metadata, ) yield task - return