zahraanaji commited on
Commit
80f6d5f
1 Parent(s): 7477fad

Upload 2_rag_skeleton.py

Browse files
Files changed (1) hide show
  1. 2_rag_skeleton.py +65 -0
2_rag_skeleton.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from langchain_community.embeddings import HuggingFaceEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import Chroma
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain.chains import ConversationalRetrievalChain
7
+ from langchain_community.chat_message_histories import ChatMessageHistory
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain_core.prompts import PromptTemplate
10
+
11
+ # Access the OpenAI API key from the environment
12
+ open_ai_key = os.getenv("OPENAI_API_KEY")
13
+
14
+ llm = ChatOpenAI(api_key=open_ai_key)
15
+
16
+ template = """Use the following pieces of information to answer the user's question.
17
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
18
+
19
+ Context: {context}
20
+ Question: {question}
21
+
22
+ Only return the helpful answer below and nothing else.
23
+ Helpful answer:
24
+ """
25
+
26
+ prompt = PromptTemplate(template=template, input_variables=["context", "question"])
27
+
28
+
29
+ # Load and process the PDF
30
+ loader = PyPDFLoader(pdf_file.name)
31
+ pdf_data = loader.load()
32
+
33
+ # Split the text into chunks
34
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
35
+ docs = text_splitter.split_documents(pdf_data)
36
+
37
+ # Create a Chroma vector store
38
+ embeddings = HuggingFaceEmbeddings(model_name="embaas/sentence-transformers-multilingual-e5-base")
39
+ db = Chroma.from_documents(docs, embeddings)
40
+
41
+ # Initialize message history for conversation
42
+ message_history = ChatMessageHistory()
43
+
44
+ # Memory for conversational context
45
+ memory = ConversationBufferMemory(
46
+ memory_key="chat_history",
47
+ output_key="answer",
48
+ chat_memory=message_history,
49
+ return_messages=True,
50
+ )
51
+
52
+ # Create a chain that uses the Chroma vector store
53
+ chain = ConversationalRetrievalChain.from_llm(
54
+ llm=llm,
55
+ chain_type="stuff",
56
+ retriever=db.as_retriever(),
57
+ memory=memory,
58
+ return_source_documents=False,
59
+ combine_docs_chain_kwargs={'prompt': prompt}
60
+ )
61
+
62
+ # Process the question
63
+ res = chain({"question": question})
64
+ answer = res["answer"]
65
+