@@ -84,67 +84,3 @@ def test_prefix_cache_with_v1_scheduler(model: str, max_tokens: int) -> None:
8484 name_0 = "vllm_output" ,
8585 name_1 = "prefix_cache_output" ,
8686 )
87-
88-
89- @pytest .mark .skip (reason = "Fix me, the accuracy is not correct" )
90- @pytest .mark .parametrize ("model" , MODELS )
91- @pytest .mark .parametrize ("max_tokens" , [50 ])
92- def test_prefix_cache_with_ascend_scheduler (model : str ,
93- max_tokens : int ) -> None :
94-
95- with VllmRunner (model ,
96- additional_config = {
97- 'ascend_scheduler_config' : {
98- 'enabled' : True ,
99- },
100- },
101- enforce_eager = False ,
102- max_model_len = 2048 ,
103- tensor_parallel_size = 2 ,
104- gpu_memory_utilization = 0.7 ) as vllm_model :
105- vllm_output = vllm_model .generate_greedy (INPUT_PROMPTS , max_tokens )
106-
107- with VllmRunner (model ,
108- additional_config = {
109- 'ascend_scheduler_config' : {
110- 'enabled' : True ,
111- 'enable_prefix_caching' : True ,
112- },
113- },
114- enforce_eager = False ,
115- max_model_len = 2048 ,
116- tensor_parallel_size = 2 ,
117- gpu_memory_utilization = 0.7 ) as vllm_model :
118- prefix_cache_output = vllm_model .generate_greedy (
119- INPUT_PROMPTS , max_tokens )
120-
121- # TODO: enable apc and chunked prefill with ascend scheduler will lead accuracy problem.
122- # Disable it now. Fix it or drop the ascend scheduler in the future.
123- # with VllmRunner(model,
124- # additional_config={
125- # 'ascend_scheduler_config': {
126- # 'enabled': True,
127- # 'enable_prefix_caching': True,
128- # "enable_chunked_prefill": True,
129- # },
130- # },
131- # enforce_eager=True,
132- # max_model_len=2048,
133- # tensor_parallel_size=2,
134- # gpu_memory_utilization=0.7) as vllm_model:
135- # chunk_prefill_prefix_cache_output = vllm_model.generate_greedy(
136- # INPUT_PROMPTS, max_tokens)
137-
138- check_outputs_equal (
139- outputs_0_lst = vllm_output ,
140- outputs_1_lst = prefix_cache_output ,
141- name_0 = "vllm_output" ,
142- name_1 = "prefix_cache_output" ,
143- )
144-
145- # check_outputs_equal(
146- # outputs_0_lst=chunk_prefill_prefix_cache_output,
147- # outputs_1_lst=prefix_cache_output,
148- # name_0="chunk_prefill_prefix_cache_output",
149- # name_1="prefix_cache_output",
150- # )
0 commit comments