[Fixbug] Fix accuracy for DeepSeek-V2-Lite (#3016)

zhangxinyuehfad · web-flow · commit 0c04bf1e3692 · 2025-09-18T23:58:23.000+08:00
### What this PR does / why we need it? Fix accuracy for DeepSeek-V2-Lite ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? CI passed - vLLM version: v0.10.2 - vLLM main: vllm-project/vllm@66072b3 Signed-off-by: hfadzxy <starmoon_zhang@163.com>
diff --git a/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml b/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml
@@ -7,6 +7,8 @@ tasks:
   - name: "exact_match,flexible-extract"
     value: 0.375
 tensor_parallel_size: 2
+batch_size: 8
+gpu_memory_utilization: 0.7
 apply_chat_template: False
 fewshot_as_multiturn: False
 trust_remote_code: True
diff --git a/tests/e2e/models/test_lm_eval_correctness.py b/tests/e2e/models/test_lm_eval_correctness.py
@@ -84,7 +84,7 @@ def generate_report(tp_size, eval_config, report_data, report_dir, env_config):
         apply_chat_template=eval_config.get("apply_chat_template", True),
         fewshot_as_multiturn=eval_config.get("fewshot_as_multiturn", True),
         limit=eval_config.get("limit", "N/A"),
-        batch_size="auto",
+        batch_size=eval_config.get("batch_size", "auto"),
         num_fewshot=eval_config.get("num_fewshot", "N/A"),
         rows=report_data["rows"],
         parallel_mode=parallel_mode)
@@ -110,7 +110,7 @@ def test_lm_eval_correctness_param(config_filename, tp_size, report_dir,
         "apply_chat_template": eval_config.get("apply_chat_template", True),
         "fewshot_as_multiturn": eval_config.get("fewshot_as_multiturn", True),
         "limit": eval_config.get("limit", None),
-        "batch_size": "auto",
+        "batch_size": eval_config.get("batch_size", "auto"),
     }
     for s in ["num_fewshot", "fewshot_as_multiturn", "apply_chat_template"]:
         val = eval_config.get(s, None)