Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,8 +4,8 @@ import time
|
|
4 |
|
5 |
# Load your LLaMA model with GPU support, quantization, and multi-threading
|
6 |
llm = Llama.from_pretrained(
|
7 |
-
repo_id="Ahil1991/Bee-
|
8 |
-
filename="Bee
|
9 |
use_gpu=True, # Enable GPU if available
|
10 |
quantize="4bit", # Quantization for speed (4-bit or 8-bit, adjust based on needs)
|
11 |
num_threads=4 # Adjust based on CPU cores available (only for CPU use)
|
|
|
4 |
|
5 |
# Load your LLaMA model with GPU support, quantization, and multi-threading
|
6 |
llm = Llama.from_pretrained(
|
7 |
+
repo_id="Ahil1991/Bee-8.3B",
|
8 |
+
filename="Bee 8.3B Q4_K_M.gguf",
|
9 |
use_gpu=True, # Enable GPU if available
|
10 |
quantize="4bit", # Quantization for speed (4-bit or 8-bit, adjust based on needs)
|
11 |
num_threads=4 # Adjust based on CPU cores available (only for CPU use)
|