Spaces:
Runtime error
Runtime error
File size: 4,756 Bytes
51fe9d2 0489db2 51fe9d2 0489db2 7a7c4d5 0489db2 51fe9d2 0489db2 7a7c4d5 51fe9d2 0489db2 7a7c4d5 51fe9d2 0489db2 51fe9d2 0489db2 51fe9d2 0489db2 7a7c4d5 0489db2 7a7c4d5 0489db2 51fe9d2 0489db2 51fe9d2 0489db2 51fe9d2 0489db2 51fe9d2 0489db2 51fe9d2 0489db2 7a7c4d5 0489db2 7a7c4d5 51fe9d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import streamlit as st
from openai.error import OpenAIError
from .utils import *
from typing import Text, Union
multiple_files = False
def clear_submit():
"""
Toggles the file_submitted internal session state variable to False.
"""
st.session_state["file_submitted"] = False
def set_openai_api_key(api_key:Text)->bool:
"""Sets the internal OpenAI API key to the given value.
Args:
api_key (Text): OpenAI API key
"""
if not (api_key.startswith('sk-') and len(api_key)==51):
st.error("Invalid OpenAI API key! Please provide a valid key.")
return False
st.session_state["OPENAI_API_KEY"] = api_key
st.session_state["api_key_configured"] = True
return True
def file_to_doc(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:
"""Converts a file to a document using specialized parsers."""
if file.name.endswith(".pdf"):
doc = parse_pdf(file)
elif file.name.endswith(".docx"):
doc = parse_docx(file)
elif file.name.split["."][1] in [".txt", ".py", ".json", ".html", ".css", ".md" ]:
doc = parse_txt(file)
else:
st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt, .py, .json, .html, .css, .md]")
doc = None
return doc
# this function can be used to define a single doc processing pipeline
# def document_embedding_pipeline(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:
def qa_main():
st.markdown("<h2>This app allows to chat with files!</h2>", unsafe_allow_html=True)
st.write("Just upload something using and start chatting with a version of GPT4 that has read the file!")
index = None
doc = None
# OpenAI API Key - TODO: consider adding a key valid for everyone
st.header("Configure OpenAI API Key")
st.warning('Please enter your OpenAI API Key!', icon='⚠️')
user_secret = st.text_input(
"Insert your OpenAI API key here ([get your API key](https://platform.openai.com/account/api-keys)).",
type="password",
placeholder="Paste your OpenAI API key here (sk-...)",
help="You can get your API key from https://platform.openai.com/account/api-keys.",
value=st.session_state.get("OPENAI_API_KEY", ""),
)
if user_secret:
if set_openai_api_key(user_secret):
st.success('OpenAI API key successfully provided!', icon='✅')
# File that needs to be queried
st.header("Upload a file")
uploaded_file = st.file_uploader(
"Upload a pdf, docx, or txt file (scanned documents not supported)",
type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
help="Scanned documents are not supported yet 🥲",
on_change=clear_submit,
accept_multiple_files=multiple_files,
)
# reading the uploaded file
if uploaded_file is not None:
# toggle internal file submission state to True
st.session_state["file_submitted"] = True
# parse the file using custom parsers
doc = file_to_doc(uploaded_file)
# converts the files into a list of documents
text = text_to_docs(text=tuple(doc))
try:
with st.spinner("Indexing the document... This might take a while!"):
index = embed_docs(tuple(text))
st.session_state["api_key_configured"] = True
except OpenAIError as e:
st.error("OpenAI error encountered: ", e._message)
if "messages" not in st.session_state:
st.session_state["messages"] = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Ask the document something..."):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
# retrieving the most relevant sources
sources = search_docs(index, prompt)
# producing the answer, live
full_response = ""
for answer_bit in get_answer(sources, prompt)["output_text"]:
full_response += answer_bit
message_placeholder.markdown(full_response + "▌")
message_placeholder.markdown(full_response)
# answer = get_answer(sources, prompt)
# message_placeholder.markdown(answer["output_text"])
# st.session_state.messages.append({"role": "assistant", "content": answer["output_text"]})
st.session_state.messages.append({"role": "assistant", "content": full_response})
|