@@ -313,6 +313,15 @@ steps:
313313 - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
314314 - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
315315
316+ - label : V1 Test attention (H100) # 10min
317+ timeout_in_minutes : 30
318+ gpu : h100
319+ source_file_dependencies :
320+ - vllm/v1/attention
321+ - tests/v1/attention
322+ commands :
323+ - pytest -v -s v1/attention
324+
316325- label : V1 Test others (CPU) # 5 mins
317326 source_file_dependencies :
318327 - vllm/
@@ -435,6 +444,18 @@ steps:
435444 - pytest -v -s compile/test_full_graph.py
436445 - pytest -v -s compile/test_fusions_e2e.py
437446
447+ - label : Cudagraph test
448+ timeout_in_minutes : 20
449+ mirror_hardwares : [amdexperimental]
450+ source_file_dependencies :
451+ - tests/v1/cudagraph
452+ - vllm/v1/cudagraph_dispatcher.py
453+ - vllm/config/compilation.py
454+ - vllm/compilation
455+ commands :
456+ - pytest -v -s v1/cudagraph/test_cudagraph_dispatch.py
457+ - pytest -v -s v1/cudagraph/test_cudagraph_mode.py
458+
438459- label : Kernels Core Operation Test # 48min
439460 timeout_in_minutes : 75
440461 mirror_hardwares : [amdexperimental]
@@ -687,8 +708,10 @@ steps:
687708 - vllm/
688709 - tests/models/language/generation
689710 commands :
690- # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
691- -
pip install 'git+https://github.com/Dao-AILab/[email protected] ' 711+ # Install fast path packages for testing against transformers
712+ # Note: also needed to run plamo2 model in vLLM
713+ -
uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/[email protected] ' 714+ -
uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/[email protected] ' 692715 - pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
693716
694717- label : Language Models Test (PPL)
0 commit comments