|
""" this app is streamlit app for the current project hosted on HuggingFace spaces """ |
|
|
|
import streamlit as st |
|
from openai_chat_completion import OpenAIChatCompletions |
|
from dataclean_hf import main |
|
from util import json_to_dict |
|
|
|
st.title("Kaleidoscope Data - Data Cleaning LLM App") |
|
|
|
st.write("This app is a demo of the LLM model for data cleaning. It is a work in progress and is not yet ready for production use.") |
|
|
|
|
|
text_input = st.text_input("Enter text", "") |
|
csv_file = st.file_uploader("Upload CSV", type=['csv']) |
|
|
|
|
|
if st.button("Run Data Cleaning API"): |
|
|
|
|
|
if text_input: |
|
|
|
MODEL = "gpt-4" |
|
try: |
|
with open('prompts/gpt4-system-message2.txt', 'r', encoding='utf8') as f: |
|
sys_mes = f.read() |
|
f.close() |
|
|
|
except FileNotFoundError: |
|
with open('../prompts/gpt4-system-message2.txt', 'r', encoding='utf8') as f: |
|
sys_mes = f.read() |
|
f.close() |
|
|
|
|
|
|
|
chat = OpenAIChatCompletions(model=MODEL, system_message=sys_mes) |
|
response = chat.openai_chat_completion(text_input, n_shot=None) |
|
|
|
|
|
|
|
|
|
response_content = response['choices'][0]['message']['content'] |
|
st.write(json_to_dict(response_content)) |
|
|
|
|
|
elif csv_file: |
|
|
|
|
|
output_df = main(csv_file) |
|
|
|
@st.cache_data |
|
def convert_df(df): |
|
"""coverting dataframe to csv |
|
|
|
Args: |
|
df (_type_): pd.DataFrame |
|
|
|
Returns: |
|
_type_: csv |
|
""" |
|
|
|
return df.to_csv().encode('utf-8') |
|
|
|
csv = convert_df(output_df) |
|
|
|
st.download_button( |
|
label="Download data as CSV", |
|
data=csv, |
|
file_name='cleaned_df.csv', |
|
mime='text/csv', |
|
) |
|
|
|
|
|
else: |
|
st.write("Please enter text or upload a CSV file.") |