Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,6 @@ import faiss
|
|
11 |
from transformers import (
|
12 |
AutoTokenizer,
|
13 |
AutoModelForCausalLM,
|
14 |
-
BitsAndBytesConfig,
|
15 |
AutoModel,
|
16 |
TextIteratorStreamer
|
17 |
)
|
@@ -384,13 +383,13 @@ else:
|
|
384 |
index = faiss.read_index('./storage/faiss_index.faiss')
|
385 |
|
386 |
# Load the model
|
387 |
-
nf4_config = BitsAndBytesConfig(
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
|
392 |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
|
393 |
-
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it",
|
394 |
|
395 |
|
396 |
def make_inference(query, hist):
|
|
|
11 |
from transformers import (
|
12 |
AutoTokenizer,
|
13 |
AutoModelForCausalLM,
|
|
|
14 |
AutoModel,
|
15 |
TextIteratorStreamer
|
16 |
)
|
|
|
383 |
index = faiss.read_index('./storage/faiss_index.faiss')
|
384 |
|
385 |
# Load the model
|
386 |
+
# nf4_config = BitsAndBytesConfig(
|
387 |
+
# load_in_4bit=True,
|
388 |
+
# bnb_4bit_quant_type="nf4",
|
389 |
+
# )quantization_config = nf4_config,
|
390 |
|
391 |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
|
392 |
+
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
|
393 |
|
394 |
|
395 |
def make_inference(query, hist):
|