pablorocg commited on
Commit
1dae1bd
1 Parent(s): 1308d0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -11,7 +11,6 @@ import faiss
11
  from transformers import (
12
  AutoTokenizer,
13
  AutoModelForCausalLM,
14
- BitsAndBytesConfig,
15
  AutoModel,
16
  TextIteratorStreamer
17
  )
@@ -384,13 +383,13 @@ else:
384
  index = faiss.read_index('./storage/faiss_index.faiss')
385
 
386
  # Load the model
387
- nf4_config = BitsAndBytesConfig(
388
- load_in_4bit=True,
389
- bnb_4bit_quant_type="nf4",
390
- )
391
 
392
  tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
393
- model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", quantization_config = nf4_config, token=HF_TOKEN)
394
 
395
 
396
  def make_inference(query, hist):
 
11
  from transformers import (
12
  AutoTokenizer,
13
  AutoModelForCausalLM,
 
14
  AutoModel,
15
  TextIteratorStreamer
16
  )
 
383
  index = faiss.read_index('./storage/faiss_index.faiss')
384
 
385
  # Load the model
386
+ # nf4_config = BitsAndBytesConfig(
387
+ # load_in_4bit=True,
388
+ # bnb_4bit_quant_type="nf4",
389
+ # )quantization_config = nf4_config,
390
 
391
  tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
392
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
393
 
394
 
395
  def make_inference(query, hist):