Spaces:
Runtime error
Runtime error
import streamlit as st | |
from streamlit_chat import message | |
from openai.error import OpenAIError | |
from .utils import * | |
from uuid import uuid4 | |
from typing import Text, Union | |
multiple_files = False | |
def clear_submit(): | |
""" | |
Toggles the file_submitted internal session state variable to False. | |
""" | |
st.session_state["file_submitted"] = False | |
def set_openai_api_key(api_key:Text): | |
"""Sets the internal OpenAI API key to the given value. | |
Args: | |
api_key (Text): OpenAI API key | |
""" | |
st.session_state["OPENAI_API_KEY"] = api_key | |
st.session_state["api_key_configured"] = True | |
def file_to_doc(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None: | |
"""Converts a file to a document using specialized parsers.""" | |
if file.name.endswith(".pdf"): | |
doc = parse_pdf(file) | |
elif file.name.endswith(".docx"): | |
doc = parse_docx(file) | |
elif file.name.split["."][1] in [".txt", ".py", ".json", ".html", ".css", ".md" ]: | |
doc = parse_txt(file) | |
else: | |
st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt, .py, .json, .html, .css, .md]") | |
doc = None | |
return doc | |
# this function can be used to define a single doc processing pipeline | |
# def document_embedding_pipeline(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None: | |
def qa_main(): | |
st.markdown("<h2>This app allows to chat with files!</h2>", unsafe_allow_html=True) | |
st.write("Just upload something using and start chatting with a version of GPT4 that has read the file!") | |
index = None | |
doc = None | |
# OpenAI API Key - TODO: consider adding a key valid for everyone | |
st.header("Configure OpenAI API Key") | |
user_secret = st.text_input( | |
"Insert your OpenAI API key here ([get your API key](https://platform.openai.com/account/api-keys)).", | |
type="password", | |
placeholder="Paste your OpenAI API key here (sk-...)", | |
help="You can get your API key from https://platform.openai.com/account/api-keys.", | |
value=st.session_state.get("OPENAI_API_KEY", ""), | |
) | |
if user_secret: | |
set_openai_api_key(user_secret) | |
# File that needs to be queried | |
st.header("Upload a file") | |
uploaded_file = st.file_uploader( | |
"Upload a pdf, docx, or txt file (scanned documents not supported)", | |
type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"], | |
help="Scanned documents are not supported yet 🥲", | |
on_change=clear_submit, | |
accept_multiple_files=multiple_files, | |
) | |
# reading the uploaded file | |
if uploaded_file is not None: | |
# toggle internal file submission state to True | |
st.session_state["file_submitted"] = True | |
# parse the file using custom parsers | |
doc = file_to_doc(uploaded_file) | |
# converts the files into a list of documents | |
text = text_to_docs(text=tuple(doc)) | |
try: | |
with st.spinner("Indexing the document... This might take a while!"): | |
index = embed_docs(tuple(text)) | |
st.session_state["api_key_configured"] = True | |
except OpenAIError as e: | |
st.error("OpenAI error encountered: ", e._message) | |
if "messages" not in st.session_state: | |
st.session_state["messages"] = [] | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
if prompt := st.chat_input("Ask the document something..."): | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
with st.chat_message("assistant"): | |
message_placeholder = st.empty() | |
# retrieving the most relevant sources | |
sources = search_docs(index, prompt) | |
# producing the answer, live | |
answer = get_answer(sources, prompt) | |
# retrieving the answer | |
message_placeholder.markdown(answer["output_text"]) | |
st.session_state.messages.append({"role": "assistant", "content": answer["output_text"]}) | |