NVIDIA · LinPoly · Dec 23, 2025
@@ -2,7 +2,7 @@
 
 aiperf profile \
     -m TinyLlama-1.1B-Chat-v1.0 \
-    --tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
+    --tokenizer ${TOKENIZER_PATH:-TinyLlama/TinyLlama-1.1B-Chat-v1.0} \
     --endpoint-type chat \
     --random-seed 123 \
     --synthetic-input-tokens-mean 128 \

diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
@@ -1,17 +1,14 @@
 import json
 import os
 import subprocess
-import sys
 import tempfile
 
 import pytest
 import yaml
 
+from ..test_llm import get_model_path
 from .openai_server import RemoteOpenAIServer
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
-from test_llm import get_model_path
-
 
 @pytest.fixture(scope="module", ids=["TinyLlama-1.1B-Chat"])
 def model_name():
@@ -36,6 +33,7 @@ def temp_extra_llm_api_options_file():
 @pytest.fixture(scope="module")
 def server(model_name: str, temp_extra_llm_api_options_file: str):
     model_path = get_model_path(model_name)
+    os.environ["TOKENIZER_PATH"] = model_path
     # fix port to facilitate concise trtllm-serve examples
     args = ["--extra_llm_api_options", temp_extra_llm_api_options_file]
     with RemoteOpenAIServer(model_path, args, port=8000) as remote_server: