Skip to content

Commit ecca3fe

Browse files
[Frontend] Add vllm bench sweep to CLI (#27639)
Signed-off-by: DarkLight1337 <[email protected]> Signed-off-by: Harry Mellor <[email protected]> Co-authored-by: Harry Mellor <[email protected]>
1 parent 9a0d2f0 commit ecca3fe

File tree

19 files changed

+340
-168
lines changed

19 files changed

+340
-168
lines changed

docs/cli/.nav.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ nav:
55
- complete.md
66
- run-batch.md
77
- vllm bench:
8-
- bench/*.md
8+
- bench/**/*.md

docs/cli/bench/sweep/plot.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# vllm bench sweep plot
2+
3+
## JSON CLI Arguments
4+
5+
--8<-- "docs/cli/json_tip.inc.md"
6+
7+
## Options
8+
9+
--8<-- "docs/argparse/bench_sweep_plot.md"

docs/cli/bench/sweep/serve.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# vllm bench sweep serve
2+
3+
## JSON CLI Arguments
4+
5+
--8<-- "docs/cli/json_tip.inc.md"
6+
7+
## Options
8+
9+
--8<-- "docs/argparse/bench_sweep_serve.md"

docs/cli/bench/sweep/serve_sla.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# vllm bench sweep serve_sla
2+
3+
## JSON CLI Arguments
4+
5+
--8<-- "docs/cli/json_tip.inc.md"
6+
7+
## Options
8+
9+
--8<-- "docs/argparse/bench_sweep_serve_sla.md"

docs/contributing/benchmarks.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,7 @@ Follow these steps to run the script:
10611061
Example command:
10621062

10631063
```bash
1064-
python -m vllm.benchmarks.sweep.serve \
1064+
vllm bench sweep serve \
10651065
--serve-cmd 'vllm serve meta-llama/Llama-2-7b-chat-hf' \
10661066
--bench-cmd 'vllm bench serve --model meta-llama/Llama-2-7b-chat-hf --backend vllm --endpoint /v1/completions --dataset-name sharegpt --dataset-path benchmarks/ShareGPT_V3_unfiltered_cleaned_split.json' \
10671067
--serve-params benchmarks/serve_hparams.json \
@@ -1109,7 +1109,7 @@ For example, to ensure E2E latency within different target values for 99% of req
11091109
Example command:
11101110

11111111
```bash
1112-
python -m vllm.benchmarks.sweep.serve_sla \
1112+
vllm bench sweep serve_sla \
11131113
--serve-cmd 'vllm serve meta-llama/Llama-2-7b-chat-hf' \
11141114
--bench-cmd 'vllm bench serve --model meta-llama/Llama-2-7b-chat-hf --backend vllm --endpoint /v1/completions --dataset-name sharegpt --dataset-path benchmarks/ShareGPT_V3_unfiltered_cleaned_split.json' \
11151115
--serve-params benchmarks/serve_hparams.json \
@@ -1138,7 +1138,7 @@ The algorithm for adjusting the SLA variable is as follows:
11381138
Example command:
11391139

11401140
```bash
1141-
python -m vllm.benchmarks.sweep.plot benchmarks/results/<timestamp> \
1141+
vllm bench sweep plot benchmarks/results/<timestamp> \
11421142
--var-x max_concurrency \
11431143
--row-by random_input_len \
11441144
--col-by random_output_len \

docs/mkdocs/hooks/generate_argparse.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,20 @@ def auto_mock(module, attr, max_mocks=50):
5656
)
5757

5858

59-
latency = auto_mock("vllm.benchmarks", "latency")
60-
serve = auto_mock("vllm.benchmarks", "serve")
61-
throughput = auto_mock("vllm.benchmarks", "throughput")
59+
bench_latency = auto_mock("vllm.benchmarks", "latency")
60+
bench_serve = auto_mock("vllm.benchmarks", "serve")
61+
bench_sweep_plot = auto_mock("vllm.benchmarks.sweep.plot", "SweepPlotArgs")
62+
bench_sweep_serve = auto_mock("vllm.benchmarks.sweep.serve", "SweepServeArgs")
63+
bench_sweep_serve_sla = auto_mock(
64+
"vllm.benchmarks.sweep.serve_sla", "SweepServeSLAArgs"
65+
)
66+
bench_throughput = auto_mock("vllm.benchmarks", "throughput")
6267
AsyncEngineArgs = auto_mock("vllm.engine.arg_utils", "AsyncEngineArgs")
6368
EngineArgs = auto_mock("vllm.engine.arg_utils", "EngineArgs")
6469
ChatCommand = auto_mock("vllm.entrypoints.cli.openai", "ChatCommand")
6570
CompleteCommand = auto_mock("vllm.entrypoints.cli.openai", "CompleteCommand")
66-
cli_args = auto_mock("vllm.entrypoints.openai", "cli_args")
67-
run_batch = auto_mock("vllm.entrypoints.openai", "run_batch")
71+
openai_cli_args = auto_mock("vllm.entrypoints.openai", "cli_args")
72+
openai_run_batch = auto_mock("vllm.entrypoints.openai", "run_batch")
6873
FlexibleArgumentParser = auto_mock(
6974
"vllm.utils.argparse_utils", "FlexibleArgumentParser"
7075
)
@@ -114,6 +119,9 @@ def add_arguments(self, actions):
114119
self._markdown_output.append(f"{action.help}\n\n")
115120

116121
if (default := action.default) != SUPPRESS:
122+
# Make empty string defaults visible
123+
if default == "":
124+
default = '""'
117125
self._markdown_output.append(f"Default: `{default}`\n\n")
118126

119127
def format_help(self):
@@ -150,17 +158,23 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
150158

151159
# Create parsers to document
152160
parsers = {
161+
# Engine args
153162
"engine_args": create_parser(EngineArgs.add_cli_args),
154163
"async_engine_args": create_parser(
155164
AsyncEngineArgs.add_cli_args, async_args_only=True
156165
),
157-
"serve": create_parser(cli_args.make_arg_parser),
166+
# CLI
167+
"serve": create_parser(openai_cli_args.make_arg_parser),
158168
"chat": create_parser(ChatCommand.add_cli_args),
159169
"complete": create_parser(CompleteCommand.add_cli_args),
160-
"bench_latency": create_parser(latency.add_cli_args),
161-
"bench_throughput": create_parser(throughput.add_cli_args),
162-
"bench_serve": create_parser(serve.add_cli_args),
163-
"run-batch": create_parser(run_batch.make_arg_parser),
170+
"run-batch": create_parser(openai_run_batch.make_arg_parser),
171+
# Benchmark CLI
172+
"bench_latency": create_parser(bench_latency.add_cli_args),
173+
"bench_serve": create_parser(bench_serve.add_cli_args),
174+
"bench_sweep_plot": create_parser(bench_sweep_plot.add_cli_args),
175+
"bench_sweep_serve": create_parser(bench_sweep_serve.add_cli_args),
176+
"bench_sweep_serve_sla": create_parser(bench_sweep_serve_sla.add_cli_args),
177+
"bench_throughput": create_parser(bench_throughput.add_cli_args),
164178
}
165179

166180
# Generate documentation for each parser

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ def _read_requirements(filename: str) -> list[str]:
709709
ext_modules=ext_modules,
710710
install_requires=get_requirements(),
711711
extras_require={
712-
"bench": ["pandas", "datasets"],
712+
"bench": ["pandas", "matplotlib", "seaborn", "datasets"],
713713
"tensorizer": ["tensorizer==2.10.1"],
714714
"fastsafetensors": ["fastsafetensors >= 0.1.10"],
715715
"runai": ["runai-model-streamer[s3,gcs] >= 0.14.0"],

tools/profiler/visualize_layerwise_profile.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def all_the_same(items) -> bool:
141141
"""
142142

143143

144-
def group_trace_by_operations(trace_df: pd.DataFrame) -> pd.DataFrame:
144+
def group_trace_by_operations(trace_df: "pd.DataFrame") -> "pd.DataFrame":
145145
def is_rms_norm(op_name: str):
146146
if "rms_norm_kernel" in op_name:
147147
return True
@@ -370,12 +370,12 @@ def is_reduce_kernel(op_name: str):
370370

371371

372372
def plot_trace_df(
373-
traces_df: pd.DataFrame,
373+
traces_df: "pd.DataFrame",
374374
plot_metric: str,
375375
plot_title: str,
376376
output: Path | None = None,
377377
):
378-
def get_phase_description(traces_df: pd.DataFrame, phase: str) -> str:
378+
def get_phase_description(traces_df: "pd.DataFrame", phase: str) -> str:
379379
phase_df = traces_df.query(f'phase == "{phase}"')
380380
descs = phase_df["phase_desc"].to_list()
381381
assert all([desc == descs[0] for desc in descs])
@@ -438,7 +438,7 @@ def main(
438438
top_k: int,
439439
json_nodes_to_fold: list[str],
440440
):
441-
def prepare_data(profile_json: dict, step_keys: list[str]) -> pd.DataFrame:
441+
def prepare_data(profile_json: dict, step_keys: list[str]) -> "pd.DataFrame":
442442
def get_entries_and_traces(key: str):
443443
entries_and_traces: list[tuple[Any, Any]] = []
444444
for root in profile_json[key]["summary_stats"]:
@@ -449,8 +449,8 @@ def get_entries_and_traces(key: str):
449449
return entries_and_traces
450450

451451
def keep_only_top_entries(
452-
df: pd.DataFrame, metric: str, top_k: int = 9
453-
) -> pd.DataFrame:
452+
df: "pd.DataFrame", metric: str, top_k: int = 9
453+
) -> "pd.DataFrame":
454454
df.loc[df.nsmallest(len(df) - top_k + 1, metric).index, ["name"]] = "others"
455455
return df
456456

vllm/benchmarks/sweep/cli.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
import argparse
4+
5+
from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG
6+
7+
from .plot import SweepPlotArgs
8+
from .plot import main as plot_main
9+
from .serve import SweepServeArgs
10+
from .serve import main as serve_main
11+
from .serve_sla import SweepServeSLAArgs
12+
from .serve_sla import main as serve_sla_main
13+
14+
SUBCOMMANDS = (
15+
(SweepServeArgs, serve_main),
16+
(SweepServeSLAArgs, serve_sla_main),
17+
(SweepPlotArgs, plot_main),
18+
)
19+
20+
21+
def add_cli_args(parser: argparse.ArgumentParser):
22+
subparsers = parser.add_subparsers(required=True, dest="sweep_type")
23+
24+
for cmd, entrypoint in SUBCOMMANDS:
25+
cmd_subparser = subparsers.add_parser(
26+
cmd.parser_name,
27+
description=cmd.parser_help,
28+
usage=f"vllm bench sweep {cmd.parser_name} [options]",
29+
)
30+
cmd_subparser.set_defaults(dispatch_function=entrypoint)
31+
cmd.add_cli_args(cmd_subparser)
32+
cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(
33+
subcmd=f"sweep {cmd.parser_name}"
34+
)
35+
36+
37+
def main(args: argparse.Namespace):
38+
args.dispatch_function(args)

0 commit comments

Comments
 (0)