Spaces:
Runtime error
Runtime error
import gradio as gr | |
from datasets import load_dataset | |
import os | |
import spaces | |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig | |
import torch | |
from threading import Thread | |
from sentence_transformers import SentenceTransformer | |
from datasets import load_dataset | |
import time | |
token = os.environ["HF_TOKEN"] | |
ST = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1") | |
dataset = load_dataset("AI-4-Health/embedded-dataset") | |
data = dataset["train"] | |
data = data.add_faiss_index("embeddings") # column name that has the embeddings of the dataset | |
model_id = "meta-llama/Meta-Llama-3-8B-Instruct" | |
# use quantization to lower GPU usage | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_id,token=token) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
torch_dtype=torch.bfloat16, | |
device_map="auto", | |
quantization_config=bnb_config, | |
token=token | |
) | |
terminators = [ | |
tokenizer.eos_token_id, | |
tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
] | |
SYS_PROMPT = """You are an expert biomedical researcher. For answering the Question at the end with brevity, you need to first read the Context provided. Then give your final answer briefly, by citing the Provenance information from the context. You can find Provenance from the Context statement 'Provenance of this association is <Provenance>'. Do not forget to cite the Provenance information. Note that, if Provenance is 'GWAS' report it as 'GWAS Catalog'. If Provenance is 'DISEASES' report it | |
as 'DISEASES database - https://diseases.jensenlab.org'. Additionally, when providing drug or medication suggestions, give maximum information available and then advise the user to seek guidance from a healthcare professional as a precautionary measure.""" | |
def search(query: str, k: int = 3 ): | |
"""a function that embeds a new query and returns the most probable results""" | |
embedded_query = ST.encode(query) # embed new query | |
scores, retrieved_examples = data.get_nearest_examples( # retrieve results | |
"embeddings", embedded_query, # compare our new embedded query with the dataset embeddings | |
k=k # get only top k results | |
) | |
return scores, retrieved_examples | |
def format_prompt(prompt,retrieved_documents,k): | |
"""using the retrieved documents we will prompt the model to generate our responses""" | |
PROMPT = f"Question:{prompt}\nContext:" | |
for idx in range(k) : | |
PROMPT+= f"{retrieved_documents['text'][idx]}\n" | |
return PROMPT | |
TITLE = "# RAG" | |
DESCRIPTION = """ | |
HPP Chatbot | |
""" | |
def talk(prompt): | |
k = 1 # number of retrieved documents | |
scores, retrieved_documents = search(prompt, k) | |
filename = retrieved_documents['filename'][0] # Assuming filename is in the returned dictionary | |
print("filename is ", filename) | |
formatted_prompt = format_prompt(prompt, retrieved_documents, k) | |
formatted_prompt = formatted_prompt[:2000] # to avoid GPU OOM | |
messages = [{"role":"system", "content":SYS_PROMPT}, {"role":"user", "content":formatted_prompt}] | |
# Tell the model to generate | |
input_ids = tokenizer.apply_chat_template( | |
messages, | |
add_generation_prompt=True, | |
return_tensors="pt" | |
).to(model.device) | |
outputs = model.generate( | |
input_ids, | |
max_new_tokens=1024, | |
eos_token_id=terminators, | |
do_sample=True, | |
temperature=0.6, | |
top_p=0.9, | |
) | |
streamer = TextIteratorStreamer( | |
tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True | |
) | |
generate_kwargs = dict( | |
input_ids=input_ids, | |
streamer=streamer, | |
max_new_tokens=1024, | |
do_sample=True, | |
top_p=0.95, | |
temperature=0.75, | |
eos_token_id=terminators, | |
) | |
t = Thread(target=model.generate, kwargs=generate_kwargs) | |
t.start() | |
outputs = [] | |
for text in streamer: | |
outputs.append(text) | |
#print(outputs) | |
return "".join(outputs), filename, filename | |
def update_document(filename): | |
# Reads the content of the specified file for display | |
with open('datasets/'+filename, "r", encoding='iso-8859-15') as file: | |
content = file.read() | |
return content | |
TITLE = "# RAG" | |
DESCRIPTION = """ | |
HPP Chatbot | |
""" | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
prompt_input = gr.Textbox( | |
label="Enter your prompt", | |
value="What are the current challenges in developing effective gene therapy for hypophosphatasia?", | |
lines=3, | |
) | |
submit_button = gr.Button("Submit") | |
chat_output = gr.Textbox(label="Chat Response", lines=10) | |
filename = gr.Textbox(label="File Name", lines=1) | |
file_display = gr.Textbox(label="File Content", lines=10) | |
submit_button.click( | |
fn=talk, | |
inputs=prompt_input, | |
outputs=[chat_output, filename, file_display] | |
) | |
file_display.change( | |
fn=update_document, | |
inputs=filename, | |
outputs=file_display | |
) | |
if __name__ == "__main__": | |
demo.launch(debug=True, share=True) | |