Spaces:

kaleidoscope-data
/

data-cleaning-llm

App Files Files Community

data-cleaning-llm / app /app.py

cmagganas's picture

Upload folder using huggingface_hub

57da852 over 1 year ago

2.57 kB

	""" this app is streamlit app for the current project hosted on HuggingFace spaces """

	import streamlit as st
	from openai_chat_completion import OpenAIChatCompletions
	from dataclean_hf import main
	from util import json_to_dict #, join_dicts

	st.title("Kaleidoscope Data - Data Cleaning LLM App")

	st.write("This app is a demo of the LLM model for data cleaning. It is a work in progress and is not yet ready for production use.")

	# text box or csv upload
	text_input = st.text_input("Enter text", "")
	csv_file = st.file_uploader("Upload CSV", type=['csv'])

	# button to run data cleaning API on text via c class in openai_chat_completion.py
	if st.button("Run Data Cleaning API"):

	# if text_input is not empty, run data cleaning API on text_input
	if text_input:

	MODEL = "gpt-4" # "gpt-3.5-turbo"
	try:
	with open('prompts/gpt4-system-message2.txt', 'r', encoding='utf8') as f:
	sys_mes = f.read()
	f.close()

	except FileNotFoundError:
	with open('../prompts/gpt4-system-message2.txt', 'r', encoding='utf8') as f:
	sys_mes = f.read()
	f.close()

	# instantiate OpenAIChatCompletions class
	# get response from openai_chat_completion method
	chat = OpenAIChatCompletions(model=MODEL, system_message=sys_mes)
	response = chat.openai_chat_completion(text_input, n_shot=None)


	# display response
	# st.write(response['choices'][0]['message']['content'])
	response_content = response['choices'][0]['message']['content']
	st.write(json_to_dict(response_content))

	# if csv_file is not empty, run data cleaning API on csv_file
	elif csv_file:

	# run data cleaning API on csv_file
	output_df = main(csv_file)

	@st.cache_data
	def convert_df(df):
	"""coverting dataframe to csv

	Args:
	df (_type_): pd.DataFrame

	Returns:
	_type_: csv
	"""
	# IMPORTANT: Cache the conversion to prevent computation on every rerun
	return df.to_csv().encode('utf-8')

	csv = convert_df(output_df)

	st.download_button(
	label="Download data as CSV",
	data=csv,
	file_name='cleaned_df.csv',
	mime='text/csv',
	)

	# if both text_input and csv_file are empty, display error message
	else:
	st.write("Please enter text or upload a CSV file.")