ByteBrewer commited on
Commit
51e4f8a
·
verified ·
1 Parent(s): bdf0854

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +65 -0
  2. requirements.txt +24 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from langchain_openai import ChatOpenAI
4
+ from langchain_openai import OpenAIEmbeddings
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.chains.combine_documents import create_stuff_documents_chain
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain.chains import create_retrieval_chain
9
+ from langchain_objectbox.vectorstores import ObjectBox
10
+ from langchain_community.document_loaders import PyPDFDirectoryLoader
11
+
12
+ from dotenv import load_dotenv
13
+ load_dotenv()
14
+ ## load the Groq And OpenAI Api Key
15
+ os.environ['OPEN_API_KEY']=os.getenv("OPENAI_API_KEY")
16
+ groq_api_key=os.getenv('GROQ_API_KEY')
17
+
18
+ st.title("Objectbox VectorstoreDB With Llama3 Demo")
19
+ llm = ChatOpenAI(model="gpt-4o") ## Calling Gpt-4o
20
+ prompt=ChatPromptTemplate.from_template(
21
+ """
22
+ Answer the questions based on the provided context only.
23
+ Please provide the most accurate response based on the question
24
+ <context>
25
+ {context}
26
+ <context>
27
+ Questions:{input}
28
+ """
29
+ )
30
+
31
+ ## Vector Enbedding and Objectbox Vectorstore db
32
+ def vector_embedding():
33
+ if "vectors" not in st.session_state:
34
+ st.session_state.embeddings=OpenAIEmbeddings()
35
+ st.session_state.loader=PyPDFDirectoryLoader("./us_census") ## Data Ingestion
36
+ st.session_state.docs=st.session_state.loader.load() ## Documents Loading
37
+ st.session_state.text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
38
+ st.session_state.final_documents=st.session_state.text_splitter.split_documents(st.session_state.docs[:20])
39
+ st.session_state.vectors=ObjectBox.from_documents(st.session_state.final_documents,st.session_state.embeddings,embedding_dimensions=768)
40
+
41
+
42
+ input_prompt=st.text_input("Enter Your Question From Documents")
43
+
44
+ if st.button("Documents Embedding"):
45
+ vector_embedding()
46
+ st.write("ObjectBox Database is ready")
47
+
48
+ import time
49
+ if input_prompt:
50
+ document_chain=create_stuff_documents_chain(llm,prompt)
51
+ retriever=st.session_state.vectors.as_retriever()
52
+ retrieval_chain=create_retrieval_chain(retriever,document_chain)
53
+ start=time.process_time()
54
+
55
+ response=retrieval_chain.invoke({'input':input_prompt})
56
+
57
+ print("Response time :",time.process_time()-start)
58
+ st.write(response['answer'])
59
+
60
+ # With a streamlit expander
61
+ with st.expander("Document Similarity Search"):
62
+ # Find the relevant chunks
63
+ for i, doc in enumerate(response["context"]):
64
+ st.write(doc.page_content)
65
+ st.write("--------------------------------")
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain_openai
2
+ langchain_core
3
+ python-dotenv
4
+ streamlit
5
+ langchain_community
6
+ langserve
7
+ fastapi
8
+ uvicorn
9
+ sse_starlette
10
+ bs4
11
+ pypdf
12
+ chromadb
13
+ faiss-cpu
14
+ groq
15
+ cassio
16
+ beautifulsoup4
17
+ langchain-groq
18
+ wikipedia
19
+ arxiv
20
+ langchainhub
21
+ sentence_transformers
22
+ PyPDF2
23
+ langchain-objectbox
24
+ langchain