Update README.md
Browse files
README.md
CHANGED
@@ -145,8 +145,7 @@ sampling_params = SamplingParams(temperature=0.3, max_tokens=256)
|
|
145 |
|
146 |
llm = LLM(
|
147 |
model=model_path,
|
148 |
-
|
149 |
-
kv_cache_dtype='fp8',
|
150 |
tensor_parallel_size=8,
|
151 |
gpu_memory_utilization=0.95,
|
152 |
enforce_eager=True,
|
|
|
145 |
|
146 |
llm = LLM(
|
147 |
model=model_path,
|
148 |
+
kv_cache_dtype='auto',
|
|
|
149 |
tensor_parallel_size=8,
|
150 |
gpu_memory_utilization=0.95,
|
151 |
enforce_eager=True,
|