Spaces:

Carlosito16
/

aitGPT

Paused

App Files Files Community

Carlosito16 commited on Sep 17, 2023

Commit

e106a6d

1 Parent(s): 00662e9

add quantization config

Browse files

Files changed (1) hide show

app.py +19 -11

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ import csv
 import json
 import torch
 from tqdm.auto import tqdm
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -33,6 +34,8 @@ from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_
 prompt_template = """
 You are the chatbot and the face of Asian Institute of Technology (AIT). Your job is to give answers to prospective and current students about the school.
@@ -59,7 +62,10 @@ st.set_page_config(
     page_title = 'aitGPT',
     page_icon = '✅')
 @st.cache_data
@@ -91,19 +97,21 @@ def load_faiss_index():
 @st.cache_resource
 def load_llm_model():
-    # llm = HuggingFacePipeline.from_model_id(model_id= 'lmsys/fastchat-t5-3b-v1.0',
-    #                                         task= 'text2text-generation',
-    #                                         model_kwargs={ "device_map": "auto",
-    #                                                     "load_in_8bit": True,"max_length": 256, "temperature": 0,
-    #                                                     "repetition_penalty": 1.5})
-    llm = HuggingFacePipeline.from_model_id(model_id= 'lmsys/fastchat-t5-3b-v1.0',
-                                        task= 'text2text-generation',
-                                        model_kwargs={ "max_length": 256, "temperature": 0,
-                                                      "torch_dtype":torch.float32,
-                                                    "repetition_penalty": 1.3})
     return llm

 import json
 import torch
 from tqdm.auto import tqdm
+from transformers import BitsAndBytesConfig
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 prompt_template = """
 You are the chatbot and the face of Asian Institute of Technology (AIT). Your job is to give answers to prospective and current students about the school.
     page_title = 'aitGPT',
     page_icon = '✅')
+bitsandbyte_config = BitsAndBytesConfig(
+                                                    load_in_4bit=True,
+                                                    bnb_4bit_quant_type="nf4",
+                                                    bnb_4bit_compute_dtype=torch.float16)
 @st.cache_data
 @st.cache_resource
 def load_llm_model():
+    #this one is for running with GPT
+    llm = HuggingFacePipeline.from_model_id(model_id= 'lmsys/fastchat-t5-3b-v1.0',
+                                            task= 'text2text-generation',
+                                            model_kwargs={ "device_map": "auto",
+                                                        "max_length": 256, "temperature": 0,
+                                                        "repetition_penalty": 1.5,
+                                                         "quantization_config": bitsandbyte_config}) #add this quantization config
+    # llm = HuggingFacePipeline.from_model_id(model_id= 'lmsys/fastchat-t5-3b-v1.0',
+    #                                     task= 'text2text-generation',
+    #                                     model_kwargs={ "max_length": 256, "temperature": 0,
+    #                                                   "torch_dtype":torch.float32,
+    #                                                 "repetition_penalty": 1.3})
     return llm