Nand1OpenPDF / app.py
Rajagopal's picture
Update app.py
689989a
raw
history blame
4.24 kB
import os
from langchain.chains import RetrievalQA
from langchain.llms import AzureOpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import AzureOpenAI
from langchain.chains.question_answering import load_qa_chain
import streamlit as st
from PIL import Image
import time
image = Image.open('Wipro logo.png')
st.image(image)
st.title("Wipro impact | The inquisitive sustainability leader: Learn some of the best practices in sustainability from success stories of leading companies.. ")
st.header("Welcome!. Today, What company's sustainability story is inspiring you ?.. ")
myurl = st.text_input("Give the URL to find a sustainability or annual report", "Type URL of any company PDF document here")
index = None
if st.button("Load this report "):
loader1 = PyPDFLoader(myurl)
langchainembeddings = OpenAIEmbeddings(deployment="textembedding", chunk_size=1)
index = VectorstoreIndexCreator(
# split the documents into chunks
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0),
# select which embeddings we want to use
embedding=langchainembeddings,
# use Chroma as the vectorestore to index and search embeddings
vectorstore_cls=Chroma
).from_loaders([loader1])
st.write("loaded")
st.write(langchainembeddings)
st.write(index)
yourquestion = st.text_input('Ask your question on best practices', 'What is Wipro plans for Biodiversity in 2024?')
st.write('Your input is ', yourquestion)
aimethod = st.radio(
"Choose a AI brain",
('GPT3', 'GPT3.5' ), index=1)
os.environ['OPENAI_API_TYPE'] = 'azure'
os.environ['OPENAI_API_VERSION'] = '2023-03-15-preview'
#llmgpt3 = AzureOpenAI( deployment_name="testdavanci", model_name="text-davinci-003" )
llmchatgpt = AzureOpenAI( deployment_name="esujnand", model_name="gpt-35-turbo" )
if st.button("Ask Best practices "):
answer = index.query(llm=llmchatgpt, question=yourquestion, chain_type="map_reduce")
st.write(answer)
aimethod = st.radio(
"Choose a AI brain or document comprehension method",
('2 minutes AI method map_reduce', '4 minutes AI method refine' ), index=0)
mychain_type = "map_reduce"
if aimethod == '2 minutes AI method map_reduce':
mychain_type = "map_reduce"
if aimethod == '4 minutes AI method refine':
mychain_type = "refine"
loader1 = PyPDFLoader(url)
def history():
mycount = 0
if 'count' not in st.session_state:
st.session_state['count'] = 0
else:
mycount = st.session_state['count']
if True:
st.write(mycount)
mycount = mycount + 1
st.session_state['count'] = mycount
for i in range(mycount):
mystatekeyindex = "element" + str(i)
mystatekeyanswerindex = "elementANS" + str(i)
if mystatekeyindex not in st.session_state:
st.session_state[mystatekeyindex] = yourquestion
st.session_state[mystatekeyanswerindex] = answer
if mystatekeyindex in st.session_state:
with st.expander(st.session_state[mystatekeyindex]):
st.write( st.session_state[mystatekeyanswerindex] )
def colorizedtext(acolor, astring):
formattedcolor = ":" + acolor + "[" + astring + "]"
return formattedcolor
if st.button("Ask QA "):
documents = loader1.load()
answer = ""
with st.spinner(" Finding answer for your question .... AI will get you answer in 2 more minutes... " ):
with st.expander( "Employing your choice of AI method ... " + aimethod + "..."):
st.write(str(chain)[:700])
st.write("AI is reading this [link](%s)" % url)
prgpgress = st.progress(0)
st.subheader(colorizedtext("red", yourquestion))
for i in range(100):
time.sleep(0.9)
prgpgress.progress(i+1)
answer = chain.run(input_documents=documents, question=yourquestion)
st.subheader(colorizedtext("blue", answer))
history()