embeddings-benchmark · whybe-choi · Oct 8, 2025 · Oct 20, 2025 · Oct 20, 2025 · Oct 20, 2025
diff --git a/mteb/benchmarks/benchmarks/__init__.py b/mteb/benchmarks/benchmarks/__init__.py
@@ -3,6 +3,8 @@
     BEIR_NL,
     BRIGHT,
     BRIGHT_LONG,
+    BRIGHT_SUBSETS,
+    BRIGHT_SUBSETS_LONG,
     BUILT_MTEB,
     C_MTEB,
     CHEMTEB,
@@ -62,6 +64,8 @@
     "BEIR_NL",
     "BRIGHT",
     "BRIGHT_LONG",
+    "BRIGHT_SUBSETS",
+    "BRIGHT_SUBSETS_LONG",
     "BUILT_MTEB",
     "CHEMTEB",
     "CODE_RAG",

diff --git a/mteb/benchmarks/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks/benchmarks.py
@@ -1198,6 +1198,42 @@
 """,
 )
 
+BRIGHT_SUBSETS = Benchmark(
+    name="BRIGHT (subsets)",
+    display_name="Reasoning Retrieval (subsets)",
+    tasks=get_tasks(
+        tasks=[
+            "BrightBiologyRetrieval",
+            "BrightEarthScienceRetrieval",
+            "BrightEconomicsRetrieval",
+            "BrightPsychologyRetrieval",
+            "BrightRoboticsRetrieval",
+            "BrightStackoverflowRetrieval",
+            "BrightSustainableLivingRetrieval",
+            "BrightPonyRetrieval",
+            "BrightLeetcodeRetrieval",
+            "BrightAopsRetrieval",
+            "BrightTheoremQATheoremsRetrieval",
+            "BrightTheoremQAQuestionsRetrieval",
+        ],
+    ),
+    description="""BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval (Individual Subsets).
+    This benchmark contains individual subset tasks for each domain in the BRIGHT benchmark,
+    allowing for domain-specific evaluation. The subsets include: biology, earth science, economics,
+    psychology, robotics, stackoverflow, sustainable living, pony, leetcode, aops, theoremqa_theorems,
+    and theoremqa_questions.
+    """,
+    reference="https://brightbenchmark.github.io/",
+    citation=r"""
+@article{su2024bright,
+  author = {Su, Hongjin and Yen, Howard and Xia, Mengzhou and Shi, Weijia and Muennighoff, Niklas and Wang, Han-yu and Liu, Haisu and Shi, Quan and Siegel, Zachary S and Tang, Michael and others},
+  journal = {arXiv preprint arXiv:2407.12883},
+  title = {Bright: A realistic and challenging benchmark for reasoning-intensive retrieval},
+  year = {2024},
+}
+""",
+)
+
 BRIGHT_LONG = Benchmark(
     name="BRIGHT (long)",
     tasks=MTEBTasks(
@@ -1227,6 +1263,37 @@
 """,
 )
 
+BRIGHT_SUBSETS_LONG = Benchmark(
+    name="BRIGHT (long subsets)",
+    display_name="Reasoning Retrieval (long subsets)",
+    tasks=get_tasks(
+        tasks=[
+            "BrightBiologyLongRetrieval",
+            "BrightEarthScienceLongRetrieval",
+            "BrightEconomicsLongRetrieval",
+            "BrightPsychologyLongRetrieval",
+            "BrightRoboticsLongRetrieval",
+            "BrightStackoverflowLongRetrieval",
+            "BrightSustainableLivingLongRetrieval",
+            "BrightPonyLongRetrieval",
+        ],
+    ),
+    description="""BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval (Long Individual Subsets).
+    This benchmark contains individual subset tasks for each domain in the BRIGHT benchmark with long documents,
+    allowing for domain-specific evaluation with longer context. The subsets include: biology, earth science,
+    economics, psychology, robotics, stackoverflow, sustainable living, and pony.
+    """,
+    reference="https://brightbenchmark.github.io/",
+    citation=r"""
+@article{su2024bright,
+  author = {Su, Hongjin and Yen, Howard and Xia, Mengzhou and Shi, Weijia and Muennighoff, Niklas and Wang, Han-yu and Liu, Haisu and Shi, Quan and Siegel, Zachary S and Tang, Michael and others},
+  journal = {arXiv preprint arXiv:2407.12883},
+  title = {Bright: A realistic and challenging benchmark for reasoning-intensive retrieval},
+  year = {2024},
+}
+""",
+)
+
 CODE_RAG = Benchmark(
     name="CodeRAG",
     tasks=get_tasks(
@@ -1619,8 +1686,7 @@
             "TRECCOVID-NL",
         ],
     ),
-    description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated "
-    "translation.",
+    description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated translation.",
     reference="https://arxiv.org/abs/2412.08329",
     contacts=["nikolay-banar"],
     citation=r"""

diff --git a/mteb/tasks/retrieval/eng/__init__.py b/mteb/tasks/retrieval/eng/__init__.py
@@ -14,6 +14,30 @@
 from .blink_it2i_retrieval import BLINKIT2IRetrieval
 from .blink_it2t_retrieval import BLINKIT2TRetrieval
 from .bright_retrieval import BrightLongRetrieval, BrightRetrieval
+from .bright_subset_long_retrieval import (
+    BrightBiologyLongRetrieval,
+    BrightEarthScienceLongRetrieval,
+    BrightEconomicsLongRetrieval,
+    BrightPonyLongRetrieval,
+    BrightPsychologyLongRetrieval,
+    BrightRoboticsLongRetrieval,
+    BrightStackoverflowLongRetrieval,
+    BrightSustainableLivingLongRetrieval,
+)
+from .bright_subsets_retrieval import (
+    BrightAopsRetrieval,
+    BrightBiologyRetrieval,
+    BrightEarthScienceRetrieval,
+    BrightEconomicsRetrieval,
+    BrightLeetcodeRetrieval,
+    BrightPonyRetrieval,
+    BrightPsychologyRetrieval,
+    BrightRoboticsRetrieval,
+    BrightStackoverflowRetrieval,
+    BrightSustainableLivingRetrieval,
+    BrightTheoremQAQuestionsRetrieval,
+    BrightTheoremQATheoremsRetrieval,
+)
 from .built_bench_retrieval import BuiltBenchRetrieval
 from .chat_doctor_retrieval import ChatDoctorRetrieval
 from .chem_hotpot_qa_retrieval import ChemHotpotQARetrieval
@@ -226,8 +250,28 @@
     "BarExamQARetrieval",
     "BillSumCARetrieval",
     "BillSumUSRetrieval",
+    "BrightAopsRetrieval",
+    "BrightBiologyLongRetrieval",
+    "BrightBiologyRetrieval",
+    "BrightEarthScienceLongRetrieval",
+    "BrightEarthScienceRetrieval",
+    "BrightEconomicsLongRetrieval",
+    "BrightEconomicsRetrieval",
+    "BrightLeetcodeRetrieval",
     "BrightLongRetrieval",
+    "BrightPonyLongRetrieval",
+    "BrightPonyRetrieval",
+    "BrightPsychologyLongRetrieval",
+    "BrightPsychologyRetrieval",
     "BrightRetrieval",
+    "BrightRoboticsLongRetrieval",
+    "BrightRoboticsRetrieval",
+    "BrightStackoverflowLongRetrieval",
+    "BrightStackoverflowRetrieval",
+    "BrightSustainableLivingLongRetrieval",
+    "BrightSustainableLivingRetrieval",
+    "BrightTheoremQAQuestionsRetrieval",
+    "BrightTheoremQATheoremsRetrieval",
     "BuiltBenchRetrieval",
     "CIRRIT2IRetrieval",
     "CQADupstackAndroidRetrieval",