Spaces:

fracapuano
/

AISandbox

Runtime error

App Files Files Community

AISandbox / qa /qa.py

$fracapuano's picture$

fracapuano HF Staff

Add files via upload

51fe9d2 almost 2 years ago

raw

history blame

4.56 kB

	import streamlit as st
	from streamlit_chat import message
	from openai.error import OpenAIError
	from .utils import (
	parse_docx,
	parse_pdf,
	parse_txt,
	search_docs,
	embed_docs,
	text_to_docs,
	get_answer,
	)
	from uuid import uuid4

	def clear_submit():
	st.session_state["submit"] = False

	def set_openai_api_key(api_key: str):
	st.session_state["OPENAI_API_KEY"] = api_key

	def qa_main():
	st.markdown("<h1>This app allows to chat with files!</h1>", unsafe_allow_html=True)
	st.markdown(\
	"""
	Developed using LangChain and OpenAI Embeddings.</p>
	Before hitting on "Submit", please make sure you have uploaded a file and entered a question.

	You can upload files using the sidebar on the left.
	""",
	unsafe_allow_html=True
	)
	index = None
	doc = None

	with st.sidebar:
	user_secret = st.text_input(
	"OpenAI API Key",
	type="password",
	placeholder="Paste your OpenAI API key here (sk-...)",
	help="You can get your API key from https://platform.openai.com/account/api-keys.",
	value=st.session_state.get("OPENAI_API_KEY", ""),
	)
	if user_secret:
	set_openai_api_key(user_secret)

	uploaded_file = st.file_uploader(
	"Upload a pdf, docx, or txt file",
	type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
	help="Scanned documents are not supported yet!",
	on_change=clear_submit,
	accept_multiple_files=False,
	)
	# reading the files
	if uploaded_file is not None:
	if uploaded_file.name.endswith(".pdf"):
	doc = parse_pdf(uploaded_file)
	elif uploaded_file.name.endswith(".docx"):
	doc = parse_docx(uploaded_file)
	elif uploaded_file.name.endswith(".txt"):
	doc = parse_txt(uploaded_file)
	else:
	st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt]")
	doc = None

	text = text_to_docs(text=tuple(doc))
	st.write(text[:1])

	try:
	with st.spinner("Indexing document(s)... This may take some time."):
	index = embed_docs(tuple(text))
	st.session_state["api_key_configured"] = True
	except OpenAIError as e:
	st.error(e._message)

	tab1, tab2 = st.tabs(["Chat With File", "About the Application"])
	with tab1:
	if 'generated' not in st.session_state:
	st.session_state['generated'] = []

	if 'past' not in st.session_state:
	st.session_state['past'] = []

	def get_text():
	if user_secret:
	st.header("Ask me something about the document:")
	input_text = st.text_area("You:", on_change=clear_submit)
	return input_text

	user_input = get_text()

	button = st.button("Submit")
	if button or st.session_state.get("submit"):
	if not user_input:
	st.error("Please enter a question!")
	else:
	st.session_state["submit"] = True
	sources = search_docs(index, user_input)
	try:
	answer = get_answer(sources, user_input)

	st.session_state.past.append(user_input)
	st.session_state.generated.append(answer["output_text"])

	except OpenAIError as e:
	st.error(e._message)

	if st.session_state['past']:
	for i in range(len(st.session_state['past'])-1, -1, -1):
	message(st.session_state['generated'][i], key=str(uuid4()))
	message(st.session_state['past'][i], is_user=True, key=str(uuid4()))

	with tab2:
	st.write('See sources')

	# st.write('Chat with Files enables user to extract all the information from a file. User can obtain the transcription, the embedding of each segment and also ask questions to the file through a chat.')
	# st.write('Features include- ')
	# st.write('1. Reading any pdf, docx or plain txt (such as python programs) file')
	# st.write('2. Embedding texts segments with Langchain and OpenAI')
	# st.write('3. Chatting with the file using streamlit-chat and LangChain QA with source and the GPT4 model')