Spaces:
Runtime error
Runtime error
gorkemgoknar
commited on
Commit
•
cfc0775
1
Parent(s):
331538a
Update app.py
Browse files
app.py
CHANGED
@@ -158,14 +158,14 @@ from llama_cpp import Llama
|
|
158 |
# set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
|
159 |
# else 35 full layers + XTTS works fine on T4 16GB
|
160 |
# 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
|
161 |
-
GPU_LAYERS=int(os.environ.get("GPU_LAYERS",
|
162 |
|
163 |
LLAMA_VERBOSE=False
|
164 |
print("Running LLM Mistral")
|
165 |
-
llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS
|
166 |
|
167 |
print("Running LLM Zephyr")
|
168 |
-
llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
169 |
|
170 |
|
171 |
# Mistral formatter
|
|
|
158 |
# set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
|
159 |
# else 35 full layers + XTTS works fine on T4 16GB
|
160 |
# 5gb per llm, 4gb XTTS -> full layers should fit T4 16GB , 2LLM + XTTS
|
161 |
+
GPU_LAYERS=int(os.environ.get("GPU_LAYERS", 35))
|
162 |
|
163 |
LLAMA_VERBOSE=False
|
164 |
print("Running LLM Mistral")
|
165 |
+
llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
166 |
|
167 |
print("Running LLM Zephyr")
|
168 |
+
llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS-10,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
169 |
|
170 |
|
171 |
# Mistral formatter
|