You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}
42
-
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}
40
+
model: "llama-prefill"
41
+
hf_token: <hf-token>
43
42
# Decode node configuration
44
-
- name: "opt125m-decode"
43
+
- name: "llama-decode"
45
44
repository: "lmcache/vllm-openai"
46
-
tag: "2025-05-27-v1"
47
-
modelURL: "facebook/opt-125m"
45
+
tag: "nightly-2025-09-04"
46
+
modelURL: "meta-llama/Llama-3.1-8B-Instruct"
48
47
replicaCount: 1
49
48
requestCPU: 8
50
49
requestMemory: "30Gi"
51
50
# requestGPU: 1
52
51
pvcStorage: "50Gi"
53
52
vllmConfig:
54
-
enablePrefixCaching: true
55
-
maxModelLen: 1024
56
-
v1: 1
53
+
enablePrefixCaching: false
54
+
# maxModelLen: 2048
55
+
extraArgs:
56
+
- "--enforce-eager"
57
+
- "--disable-log-requests"
57
58
lmcacheConfig:
58
59
cudaVisibleDevices: "1"
59
60
enabled: true
60
61
kvRole: "kv_consumer"# Set decode node as consumer
62
+
localCpu: false
63
+
maxLocalCpuSize: 0
61
64
enableNixl: true
65
+
enableXpyd: true
62
66
nixlRole: "receiver"
63
67
nixlPeerHost: "0.0.0.0"
64
-
nixlPeerPort: "55555"
65
-
nixlBufferSize: "1073741824"# 1GB
68
+
nixlPeerInitPort: 7300
69
+
nixlPeerAllocPort: 7400
70
+
nixlBufferSize: "2147483648"
66
71
nixlBufferDevice: "cuda"
67
-
nixlEnableGc: true
72
+
# nixlBackends: ["UCX"]
68
73
enablePD: true
74
+
rpcPort: "consumer1"
75
+
skipLastNTokens: 1
76
+
hf_token: <hf-token>
69
77
labels:
70
-
model: "opt125m-decode"
71
-
chatTemplate: "chat.jinja2"
72
-
chatTemplateConfigMap: |-
73
-
{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}
74
-
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}
0 commit comments