@@ -163,6 +163,23 @@ def get_inductor_factors() -> list[Any]:
163163 return factors
164164
165165
166+ def is_compile_cache_enabled (
167+ vllm_additional_inductor_config : dict [str , Any ],
168+ ) -> bool :
169+ vllm_inductor_config_disable_cache = vllm_additional_inductor_config .get (
170+ "force_disable_caches" , False
171+ )
172+
173+ # TODO(gmagogsfm): Replace torch._inductor.config.force_disable_caches
174+ # with torch.compiler.config.force_disable_caches when minimum PyTorch
175+ # version reaches 2.10
176+ return (
177+ not envs .VLLM_DISABLE_COMPILE_CACHE
178+ and not torch ._inductor .config .force_disable_caches
179+ and not vllm_inductor_config_disable_cache
180+ )
181+
182+
166183class InductorStandaloneAdaptor (CompilerInterface ):
167184 """
168185 The adaptor for the Inductor compiler.
@@ -222,7 +239,8 @@ def compile(
222239 # Save the compiled artifact to disk in the specified path
223240 assert key is not None
224241 path = os .path .join (self .cache_dir , key )
225- if not envs .VLLM_DISABLE_COMPILE_CACHE :
242+
243+ if is_compile_cache_enabled (compiler_config ):
226244 compiled_graph .save (path = path , format = self .save_format )
227245 compilation_counter .num_compiled_artifacts_saved += 1
228246 return compiled_graph , (key , path )
@@ -472,10 +490,8 @@ def _get_shape_env() -> AlwaysHitShapeEnv:
472490 config_patches = current_config ,
473491 )
474492
475- # We treat VLLM_DISABLE_COMPILE_CACHE as the overall switch for torch
476- # compilation cache. So turn off the checks if we disable the
477- # compilation cache.
478- if not envs .VLLM_DISABLE_COMPILE_CACHE :
493+ # Turn off the checks if we disable the compilation cache.
494+ if is_compile_cache_enabled (compiler_config ):
479495 if hash_str is None :
480496 raise RuntimeError (
481497 "vLLM failed to compile the model. The most "
0 commit comments