Skip to content

Commit f917d5e

Browse files
authored
Remove useless env (#4858)
cleanup useless env. These envs are not used anymore `VLLM_ASCEND_TRACE_RECOMPILES`, `VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE`, `VLLM_ASCEND_MLA_PA`, `PHYSICAL_DEVICES` - vLLM version: v0.12.0 - vLLM main: vllm-project/vllm@ad32e3e Signed-off-by: wangxiyuan <[email protected]>
1 parent 08441ba commit f917d5e

File tree

4 files changed

+0
-28
lines changed

4 files changed

+0
-28
lines changed

tests/e2e/multicard/test_offline_inference_distributed.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ def test_models_distributed_Qwen3_W4A8DYNAMIC_new_version(model):
138138

139139

140140
@pytest.mark.parametrize("model", DEEPSEEK_W4A8_MODELS)
141-
@patch.dict(os.environ, {"VLLM_ASCEND_MLA_PA": "1"})
142141
@patch.dict(os.environ, {"HCCL_BUFFSIZE": "1024"})
143142
def test_models_distributed_DeepSeek_W4A8DYNAMIC(model):
144143
prompts = [

tests/ut/kv_connector/test_mooncake_connector.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1055,7 +1055,6 @@ def register_memory(self, *args, **kwargs):
10551055

10561056
class MockEnvsAscend:
10571057
MOONCAKE_CONNECTOR_PROTOCOL = "mock_protocol"
1058-
PHYSICAL_DEVICES = "10,11"
10591058

10601059

10611060
def mock_get_tensor_model_parallel_rank():

tests/ut/kv_connector/test_mooncake_layerwise_connector.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -893,12 +893,3 @@ def test_register_kv_caches_mla_case(self):
893893
worker.register_kv_caches(mla_caches)
894894
self.assertTrue(worker.use_mla)
895895
self.assertEqual(len(worker.block_len), 2)
896-
897-
def test_device_id_selection_with_physical_devices(self):
898-
worker = MooncakeLayerwiseConnectorWorker(self.vllm_config,
899-
self.engine_id)
900-
self.assertIsNotNone(worker.engine)
901-
902-
903-
if __name__ == '__main__':
904-
unittest.main()

vllm_ascend/envs.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,6 @@
6868
# that the correct package is installed.
6969
"VLLM_VERSION":
7070
lambda: os.getenv("VLLM_VERSION", None),
71-
# Whether to enable the trace recompiles from pytorch.
72-
"VLLM_ASCEND_TRACE_RECOMPILES":
73-
lambda: bool(int(os.getenv("VLLM_ASCEND_TRACE_RECOMPILES", '0'))),
7471
# Whether to enable fused_experts_allgather_ep. MoeInitRoutingV3 and
7572
# GroupedMatmulFinalizeRouting operators are combined to implement EP.
7673
"VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP":
@@ -86,16 +83,6 @@
8683
# value to False to disable the optimized model.
8784
"USE_OPTIMIZED_MODEL":
8885
lambda: bool(int(os.getenv('USE_OPTIMIZED_MODEL', '1'))),
89-
# The tolerance of the kv cache size, if the difference between the
90-
# actual kv cache size and the cached kv cache size is less than this value,
91-
# then the cached kv cache size will be used.
92-
"VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE":
93-
lambda: int(
94-
os.getenv("VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE", 64)),
95-
# Whether to enable mla_pa for deepseek mla decode, this flag will be removed after its available torch_npu is public accessible
96-
# and the mla_pa will be the default path of deepseek decode path.
97-
"VLLM_ASCEND_MLA_PA":
98-
lambda: int(os.getenv("VLLM_ASCEND_MLA_PA", 0)),
9986
# Whether to enable MatmulAllReduce fusion kernel when tensor parallel is enabled.
10087
# this feature is supported in A2, and eager mode will get better performance.
10188
"VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE":
@@ -130,10 +117,6 @@
130117
# this feature in eager mode will get better performance.
131118
"VLLM_ASCEND_ENABLE_MLP_OPTIMIZE":
132119
lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_MLP_OPTIMIZE", '0'))),
133-
# Determine the number of physical devices in a non-full-use scenario
134-
# caused by the initialization of the Mooncake connector.
135-
"PHYSICAL_DEVICES":
136-
lambda: os.getenv("PHYSICAL_DEVICES", None),
137120
# Whether to enable msMonitor tool to monitor the performance of vllm-ascend.
138121
"MSMONITOR_USE_DAEMON":
139122
lambda: bool(int(os.getenv("MSMONITOR_USE_DAEMON", '0'))),

0 commit comments

Comments
 (0)