Ahil1991 commited on
Commit
f9c6b3d
·
verified ·
1 Parent(s): 27b8cd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -4,8 +4,8 @@ import time
4
 
5
  # Load your LLaMA model with GPU support, quantization, and multi-threading
6
  llm = Llama.from_pretrained(
7
- repo_id="Ahil1991/Bee-V.01-7B",
8
- filename="Bee-V.01.gguf",
9
  use_gpu=True, # Enable GPU if available
10
  quantize="4bit", # Quantization for speed (4-bit or 8-bit, adjust based on needs)
11
  num_threads=4 # Adjust based on CPU cores available (only for CPU use)
 
4
 
5
  # Load your LLaMA model with GPU support, quantization, and multi-threading
6
  llm = Llama.from_pretrained(
7
+ repo_id="Ahil1991/Bee-8.3B",
8
+ filename="Bee 8.3B Q4_K_M.gguf",
9
  use_gpu=True, # Enable GPU if available
10
  quantize="4bit", # Quantization for speed (4-bit or 8-bit, adjust based on needs)
11
  num_threads=4 # Adjust based on CPU cores available (only for CPU use)