Skip to content

Commit 516c3f7

Browse files
authored
[Bugfix] Fix logic for choosing default prefix caching setting (vllm-project#29393)
Signed-off-by: Thomas Parnell <[email protected]>
1 parent 51fc9e0 commit 516c3f7

File tree

2 files changed

+7
-2
lines changed

2 files changed

+7
-2
lines changed

tests/engine/test_arg_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,8 +277,9 @@ def test_prefix_cache_default():
277277
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
278278
args = parser.parse_args([])
279279

280+
# should be None by default (depends on model).
280281
engine_args = EngineArgs.from_cli_args(args=args)
281-
assert engine_args.enable_prefix_caching, "prefix caching should default to on."
282+
assert engine_args.enable_prefix_caching is None
282283

283284
# with flag to turn it on.
284285
args = parser.parse_args(["--enable-prefix-caching"])

vllm/engine/arg_utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,11 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
880880
"--num-gpu-blocks-override", **cache_kwargs["num_gpu_blocks_override"]
881881
)
882882
cache_group.add_argument(
883-
"--enable-prefix-caching", **cache_kwargs["enable_prefix_caching"]
883+
"--enable-prefix-caching",
884+
**{
885+
**cache_kwargs["enable_prefix_caching"],
886+
"default": None,
887+
},
884888
)
885889
cache_group.add_argument(
886890
"--prefix-caching-hash-algo", **cache_kwargs["prefix_caching_hash_algo"]

0 commit comments

Comments
 (0)