|
import io |
|
import os |
|
from openai import OpenAI |
|
from langchain.tools import StructuredTool, Tool |
|
from io import BytesIO |
|
import requests |
|
import json |
|
from io import BytesIO |
|
|
|
import chainlit as cl |
|
|
|
|
|
import os |
|
import openai |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.vectorstores import Chroma |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.document_loaders import UnstructuredPDFLoader |
|
|
|
|
|
openai.api_key = os.environ["OPENAI_API_KEY"] |
|
|
|
|
|
def rag(query): |
|
|
|
loader = UnstructuredPDFLoader("data/The Goal - A Process of Ongoing Improvement (Third Revised Edition).pdf") |
|
docs = loader.load() |
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
splits = text_splitter.split_documents(docs) |
|
|
|
|
|
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings()) |
|
retriever = vectorstore.as_retriever() |
|
|
|
|
|
prompt = hub.pull("rlm/rag-prompt") |
|
llm = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0) |
|
|
|
|
|
def format_docs(docs): |
|
return "\n\n".join(doc.page_content for doc in docs) |
|
|
|
|
|
rag_chain = ( |
|
{"context": retriever | format_docs, "question": RunnablePassthrough()} |
|
| prompt |
|
| llm |
|
| StrOutputParser() |
|
) |
|
|
|
response = "" |
|
for chunk in rag_chain.stream(query): |
|
cl.user_session(chunk, end="", flush=True) |
|
response += f"\n{chunk}" |
|
|
|
|
|
|
|
return response |
|
|
|
|
|
|
|
|
|
|
|
rag_format = '{{"prompt": "prompt"}}' |
|
rag_tool = Tool.from_function( |
|
func=rag, |
|
name="RAG", |
|
description=f"Useful for retrieving contextual information about the PDF to answer user questions. Input should be a single string strictly in the following JSON format: {generate_image_format}", |
|
return_direct=True, |
|
) |
|
|