import streamlit as st
from streamlit_chat import message
from openai.error import OpenAIError
from .utils import *
from uuid import uuid4
from typing import Text, Union
multiple_files = False
def clear_submit():
"""
Toggles the file_submitted internal session state variable to False.
"""
st.session_state["file_submitted"] = False
def set_openai_api_key(api_key:Text):
"""Sets the internal OpenAI API key to the given value.
Args:
api_key (Text): OpenAI API key
"""
st.session_state["OPENAI_API_KEY"] = api_key
st.session_state["api_key_configured"] = True
def file_to_doc(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:
"""Converts a file to a document using specialized parsers."""
if file.name.endswith(".pdf"):
doc = parse_pdf(file)
elif file.name.endswith(".docx"):
doc = parse_docx(file)
elif file.name.split["."][1] in [".txt", ".py", ".json", ".html", ".css", ".md" ]:
doc = parse_txt(file)
else:
st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt, .py, .json, .html, .css, .md]")
doc = None
return doc
# this function can be used to define a single doc processing pipeline
# def document_embedding_pipeline(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:
def qa_main():
st.markdown("
This app allows to chat with files!
", unsafe_allow_html=True)
st.write("Just upload something using and start chatting with a version of GPT4 that has read the file!")
index = None
doc = None
# OpenAI API Key - TODO: consider adding a key valid for everyone
st.header("Configure OpenAI API Key")
user_secret = st.text_input(
"Insert your OpenAI API key here ([get your API key](https://platform.openai.com/account/api-keys)).",
type="password",
placeholder="Paste your OpenAI API key here (sk-...)",
help="You can get your API key from https://platform.openai.com/account/api-keys.",
value=st.session_state.get("OPENAI_API_KEY", ""),
)
if user_secret:
set_openai_api_key(user_secret)
# File that needs to be queried
st.header("Upload a file")
uploaded_file = st.file_uploader(
"Upload a pdf, docx, or txt file (scanned documents not supported)",
type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
help="Scanned documents are not supported yet 🥲",
on_change=clear_submit,
accept_multiple_files=multiple_files,
)
# reading the uploaded file
if uploaded_file is not None:
# toggle internal file submission state to True
st.session_state["file_submitted"] = True
# parse the file using custom parsers
doc = file_to_doc(uploaded_file)
# converts the files into a list of documents
text = text_to_docs(text=tuple(doc))
try:
with st.spinner("Indexing the document... This might take a while!"):
index = embed_docs(tuple(text))
st.session_state["api_key_configured"] = True
except OpenAIError as e:
st.error("OpenAI error encountered: ", e._message)
if "messages" not in st.session_state:
st.session_state["messages"] = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Ask the document something..."):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
# retrieving the most relevant sources
sources = search_docs(index, prompt)
# producing the answer, live
answer = get_answer(sources, prompt)
# retrieving the answer
message_placeholder.markdown(answer["output_text"])
st.session_state.messages.append({"role": "assistant", "content": answer["output_text"]})