aie2-midterm / app.py
shoshana-levitt's picture
Upload 3 files
1cb8be6 verified
# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
# Import dependencies
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Qdrant
from langchain import hub
from langchain.prompts import ChatPromptTemplate
from dotenv import load_dotenv
import chainlit as cl
from langchain.retrievers import MultiQueryRetriever
# Get API keyß
# Load Data
loader = PyMuPDFLoader(
"./meta_pdf.pdf"
)
documents = loader.load()
# Chunking
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 400,
chunk_overlap = 100
)
documents = text_splitter.split_documents(documents)
# Load OpenAI Embeddings Model
embeddings = OpenAIEmbeddings(
model="text-embedding-3-small"
)
# Create QDrant VectorStore
qdrant_vector_store = Qdrant.from_documents(
documents,
embeddings,
location=":memory:",
collection_name="meta-10-k-filings",
)
# Create Retriever
retriever = qdrant_vector_store.as_retriever()
# Template
template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
Context:
{context}
Question:
{question}
"""
prompt = ChatPromptTemplate.from_template(template)
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
primary_qa_llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=primary_qa_llm)
retrieval_augmented_qa_chain = (
{"context": itemgetter("question") | retriever, "question": itemgetter("question")}
| RunnablePassthrough.assign(context=itemgetter("context"))
| {"response": prompt | primary_qa_llm, "context": itemgetter("context")}
)
# Chainlit App
@cl.on_chat_start
async def start_chat():
settings = {
"model": "gpt-3.5-turbo",
"temperature": 0,
"max_tokens": 500,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
}
cl.user_session.set("settings", settings)
@cl.on_message
async def main(message: cl.Message):
chainlit_question = message.content
response = retrieval_augmented_qa_chain.invoke({"question": chainlit_question})
chainlit_answer = response["response"].content
msg = cl.Message(content=chainlit_answer)
await msg.send()