from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.chains import RetrievalQAWithSourcesChain from langchain.chat_models import ChatOpenAI from langchain.prompts.chat import ( ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ) import chainlit as cl from PIL import Image from io import BytesIO from bs4 import BeautifulSoup import requests from langchain.document_loaders import WebBaseLoader from utils import get_completion,model_info,model_load from duckduckgo_search import ddg from file_handle import upload_chroma,get_qa_chain_answers_llm model_name = "medalpaca/medalpaca-7b" # get model info and print it model_current = model_info() print("Current model: ", model_current) if model_current != model_name: print("Loading model: ", model_name) model_load(model_name) model_current = model_info() print("Current model: ", model_current) def get_sources(query): results = ddg(query, max_results=4) # headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} sources = [] for link in results: # cookies_jar = requests.cookies.RequestsCookieJar() # cookies_jar.set('name', 'jerry', domain=link, path='/cookies') # page = requests.get(results[0]['href'], headers=headers).text # soup = BeautifulSoup(page,"html.parser") loader = WebBaseLoader(results[0]['href']) data = loader.load() #text = soup.text.replace("\n","").strip()[:600] text = data[0].page_content.replace("\n","") if len(text) > 100: sources.append(text) else: print(text) if len(sources[0]) > 892: return sources[0][:890] else: return sources[1][:890] text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) system_template = """Use the following pieces of context to answer the users question. If you don't know the answer, just say that you don't know, don't try to make up an answer. ALWAYS return a "SOURCES" part in your answer. The "SOURCES" part should be a reference to the source of the document from which you got your answer. Example of your response should be: ``` The answer is foo SOURCES: xyz ``` Begin! ---------------- {summaries}""" messages = [ SystemMessagePromptTemplate.from_template(system_template), HumanMessagePromptTemplate.from_template("{question}"), ] prompt = ChatPromptTemplate.from_messages(messages) chain_type_kwargs = {"prompt": prompt} @cl.on_chat_start async def init(): files = None # Wait for the user to upload a file while files == None: files = await cl.AskFileMessage( content="Upload your medical reports and files to begin!", accept= { 'image/png': ['.png','.jpg','.jpeg'], 'text/html': ['.html', '.htm'], "application/pdf": ['.pdf'], } ).send() file = files[0] stream = BytesIO(file.content) msg = cl.Message(content=f"Processing `{file.name}`...") await msg.send() # print(file, file.name) upload_chroma(stream,file.name, "test") msg.content = f"Processing `{file.name}` done. You can now ask questions!" await msg.update() @cl.on_message async def main(message): res = get_qa_chain_answers_llm(message, "test") print(res) await cl.Message( content=f"{res}", ).send()