diff --git a/examples/serve/aiperf_client.sh b/examples/serve/aiperf_client.sh index 8a150714de2..5d4fafb1736 100755 --- a/examples/serve/aiperf_client.sh +++ b/examples/serve/aiperf_client.sh @@ -2,7 +2,7 @@ aiperf profile \ -m TinyLlama-1.1B-Chat-v1.0 \ - --tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \ + --tokenizer ${TOKENIZER_PATH:-TinyLlama/TinyLlama-1.1B-Chat-v1.0} \ --endpoint-type chat \ --random-seed 123 \ --synthetic-input-tokens-mean 128 \ diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py index 66677fcead3..83fdec7fd25 100644 --- a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py +++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py @@ -1,17 +1,14 @@ import json import os import subprocess -import sys import tempfile import pytest import yaml +from ..test_llm import get_model_path from .openai_server import RemoteOpenAIServer -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) -from test_llm import get_model_path - @pytest.fixture(scope="module", ids=["TinyLlama-1.1B-Chat"]) def model_name(): @@ -36,6 +33,7 @@ def temp_extra_llm_api_options_file(): @pytest.fixture(scope="module") def server(model_name: str, temp_extra_llm_api_options_file: str): model_path = get_model_path(model_name) + os.environ["TOKENIZER_PATH"] = model_path # fix port to facilitate concise trtllm-serve examples args = ["--extra_llm_api_options", temp_extra_llm_api_options_file] with RemoteOpenAIServer(model_path, args, port=8000) as remote_server: