AISandbox / qa /qa.py
fracapuano's picture
Add files via upload
51fe9d2
raw
history blame
4.56 kB
import streamlit as st
from streamlit_chat import message
from openai.error import OpenAIError
from .utils import (
parse_docx,
parse_pdf,
parse_txt,
search_docs,
embed_docs,
text_to_docs,
get_answer,
)
from uuid import uuid4
def clear_submit():
st.session_state["submit"] = False
def set_openai_api_key(api_key: str):
st.session_state["OPENAI_API_KEY"] = api_key
def qa_main():
st.markdown("<h1>This app allows to chat with files!</h1>", unsafe_allow_html=True)
st.markdown(\
"""
Developed using LangChain and OpenAI Embeddings.</p>
Before hitting on "Submit", please make sure you have uploaded a file and entered a question.
You can upload files using the sidebar on the left.
""",
unsafe_allow_html=True
)
index = None
doc = None
with st.sidebar:
user_secret = st.text_input(
"OpenAI API Key",
type="password",
placeholder="Paste your OpenAI API key here (sk-...)",
help="You can get your API key from https://platform.openai.com/account/api-keys.",
value=st.session_state.get("OPENAI_API_KEY", ""),
)
if user_secret:
set_openai_api_key(user_secret)
uploaded_file = st.file_uploader(
"Upload a pdf, docx, or txt file",
type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
help="Scanned documents are not supported yet!",
on_change=clear_submit,
accept_multiple_files=False,
)
# reading the files
if uploaded_file is not None:
if uploaded_file.name.endswith(".pdf"):
doc = parse_pdf(uploaded_file)
elif uploaded_file.name.endswith(".docx"):
doc = parse_docx(uploaded_file)
elif uploaded_file.name.endswith(".txt"):
doc = parse_txt(uploaded_file)
else:
st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt]")
doc = None
text = text_to_docs(text=tuple(doc))
st.write(text[:1])
try:
with st.spinner("Indexing document(s)... This may take some time."):
index = embed_docs(tuple(text))
st.session_state["api_key_configured"] = True
except OpenAIError as e:
st.error(e._message)
tab1, tab2 = st.tabs(["Chat With File", "About the Application"])
with tab1:
if 'generated' not in st.session_state:
st.session_state['generated'] = []
if 'past' not in st.session_state:
st.session_state['past'] = []
def get_text():
if user_secret:
st.header("Ask me something about the document:")
input_text = st.text_area("You:", on_change=clear_submit)
return input_text
user_input = get_text()
button = st.button("Submit")
if button or st.session_state.get("submit"):
if not user_input:
st.error("Please enter a question!")
else:
st.session_state["submit"] = True
sources = search_docs(index, user_input)
try:
answer = get_answer(sources, user_input)
st.session_state.past.append(user_input)
st.session_state.generated.append(answer["output_text"])
except OpenAIError as e:
st.error(e._message)
if st.session_state['past']:
for i in range(len(st.session_state['past'])-1, -1, -1):
message(st.session_state['generated'][i], key=str(uuid4()))
message(st.session_state['past'][i], is_user=True, key=str(uuid4()))
with tab2:
st.write('See sources')
# st.write('Chat with Files enables user to extract all the information from a file. User can obtain the transcription, the embedding of each segment and also ask questions to the file through a chat.')
# st.write('Features include- ')
# st.write('1. Reading any pdf, docx or plain txt (such as python programs) file')
# st.write('2. Embedding texts segments with Langchain and OpenAI')
# st.write('3. Chatting with the file using streamlit-chat and LangChain QA with source and the GPT4 model')