Spaces:

sartajbhuvaji
/

resonate

Sleeping

File size: 12,626 Bytes

5f773d1

# Description: This file contains the main Streamlit application for the Resonate project.
# Run command: streamlit run app.py

import os
import pandas as pd
import streamlit as st
from dotenv import load_dotenv
from streamlit import session_state as ss
from streamlit_chat import message 
from src.clustering.resonate_bert_summarizer import summarize_runner
from src.clustering.resonate_clustering import Clustering
from src.langchain.resonate_langchain_functions import LangChain
from src.utils.resonate_streamlitUtils import (
    aws_transcribe,
    convert_video_to_audio,
    pinecone_init_upsert,
    transcript_text_editor_minutes_to_hhmmss,
)

def initialize_session_state():
    # Initialize API keys in session state if not present
    if "api_keys" not in ss:
        ss.api_keys = {}
        ss.api_keys["openai_api_key"] = None
        ss.api_keys["pinecone_api_key"] = None
        ss.api_keys["aws_access_key"] = None
        ss.api_keys["aws_secret_access_key"] = None

    if "api_key_set" not in ss:
        ss.api_key_set = False 
    if "add_meeting" not in ss:
        ss.add_meeting = False
    if "Clustering_obj" not in ss:
        ss.Clustering_obj = Clustering()
    # Initialize - Main Screen - Transcript Editor
    if "transcript_speaker_editor" not in ss:
        ss.transcript_speaker_editor = False
    if "transcript_text_editor" not in ss:
        ss.transcript_text_editor = False
    if "meeting_name" not in ss:
        ss.meeting_name = ""
    if "df_transcript_speaker" not in ss: 
        ss.df_transcript_speaker = pd.DataFrame()
    if "df_transcript_text" not in ss:
        ss.df_transcript_text = pd.DataFrame()
    if "updated_df" not in ss:
        ss.updated_transcript_df_to_embed = pd.DataFrame()
    if "chat_view" not in ss:
        ss.chat_view = True
    if "langchain_obj" not in ss and ss.api_key_set:
        ss.langchain_obj = LangChain()
    if "query" not in ss:
        ss.query = ""
    if "responses" not in ss:
        ss["responses"] = ["How can I assist you?"]
    if "requests" not in ss:
        ss["requests"] = []


def chat_view():
    st.header("Chat")
    response_container = st.container()
    textcontainer = st.container()
    with textcontainer:
        query = st.text_input(
            "Chat Here",
            placeholder="Message Resonate ... ",
            value=ss.query,
            key="query_input",
        )
        # Clear button
        if st.button("Clear"):
            ss.langchain_obj.conversation_bufw.memory.clear() # Clear conversation buffer
            ss.query = "" 
            ss.requests = []
            ss.responses = []
            ss["responses"] = ["How can I assist you?"]
            st.rerun()

        elif query:
            with st.spinner("typing..."):
                uuid_list = ss.Clustering_obj.uuid_for_query(query=query)
                print(f"Meeting Unique ID : {uuid_list}")
                response = ss.langchain_obj.chat(
                    query=query, in_filter=uuid_list, complete_db_flag=False
                )
                response = response["response"]
            ss.requests.append(query)
            ss.responses.append(response)
            ss.query = ""
    with response_container:
        if ss["responses"]:
            for i in range(len(ss["responses"])):
                message(ss["responses"][i], key=str(i))
                if i < len(ss["requests"]):
                    message(
                        ss["requests"][i],
                        is_user=True,
                        key=str(i) + "_user",
                    )


def api_keys_input():
    with st.form("keys_input_form"):
        # Retrieve values from session state
        openai_api_key = st.text_input(
            "OpenAPI Key:",
            type="password",
            value=ss.api_keys.get(
                "openai_api_key", ""
            ),  # Use default value if key is not present
        )
        pinecone_api_key = st.text_input(
            "Pinecone Key:",
            type="password",
            value=ss.api_keys.get(
                "pinecone_api_key", ""
            ),  # Use default value if key is not present
        )
        aws_access_key = st.text_input(
            "AWS Access Key:",
            type="password",
            value=ss.api_keys.get(
                "aws_access_key", ""
            ),  # Use default value if key is not present
        )
        aws_secret_access_key = st.text_input(
            "AWS Secret Access Key:",
            type="password",
            value=ss.api_keys.get(
                "aws_secret_access_key", ""
            ),  # Use default value if key is not present
        )
        # Add a button to save the keys
        save_button = st.form_submit_button("Save API Keys")
        if save_button:
            # Update session state with provided keys
            ss.api_keys["openai_api_key"] = openai_api_key
            ss.api_keys["pinecone_api_key"] = pinecone_api_key
            ss.api_keys["aws_access_key"] = aws_access_key
            ss.api_keys["aws_secret_access_key"] = aws_secret_access_key
            # Set environment variables only if the keys are not None
            if openai_api_key:
                os.environ["OPENAI_API_KEY"] = ss.api_keys["openai_api_key"]
            if pinecone_api_key:
                os.environ["PINECONE_API_KEY"] = ss.api_keys["pinecone_api_key"]
            if aws_access_key:
                os.environ["AWS_ACCESS_KEY"] = ss.api_keys["aws_access_key"]
            if aws_secret_access_key:
                os.environ["AWS_SECRET_ACCESS_KEY"] = ss.api_keys[
                    "aws_secret_access_key"
                ]

            ss.api_key_set = True
            print("API KEYS ARE: ", ss.api_keys)    
            st.rerun()


def add_meeting():
    with st.form("add_meeting_form"):
        uploaded_file = st.file_uploader("Choose a file", type=["wav", "mp4"])
        # Get user input
        meeting_name = st.text_input("Enter Meeting Name:")
        save_meeting_button = st.form_submit_button("Save Meeting")
        if save_meeting_button:
            if not meeting_name:
                st.warning("Please enter Meeting Name.")
            elif uploaded_file is None:
                st.warning("Please upload a meeting recording.")
            elif meeting_name and uploaded_file:
                with st.spinner("Processing..."):
                    file_name = uploaded_file.name.replace(" ", "_")
                    if file_name.endswith(".mp4") or file_name.endswith(".mpeg4"):
                        print("in video")
                        with open("data/videoFiles/" + file_name, "wb") as f:
                            f.write(uploaded_file.getbuffer())
                            f.close()
                        # Convert video file to audio file
                        audio_path = "data/audioFiles/" + file_name[:-4] + ".wav"
                        convert_video_to_audio(
                            "data/videoFiles/" + file_name, audio_path
                        )
                        file_name = file_name[:-4] + ".wav"
                    elif file_name.endswith(".wav"):
                        print("in audio")
                        with open("data/audioFiles/" + file_name, "wb") as f:
                            f.write(uploaded_file.getbuffer())
                            f.close()
                    ss.df_transcript_speaker = aws_transcribe(file_name)
                    ss.meeting_name = meeting_name
                    ss.transcript_speaker_editor = True


def transcript_speaker_editor():
    ss.add_meeting = False
    with st.form("transcript_speaker_editor_form"):
        st.write("Transcript Speaker Editor:")
        st.dataframe(ss.df_transcript_speaker)
        df = ss.df_transcript_speaker.copy(deep=True)
        # Create a list of unique speaker labels
        speaker_labels = df["speaker_label"].unique()
        # Create a dictionary to store the updated values
        updated_speaker_names = {}
        # Display text input boxes for each speaker label
        for speaker_label in speaker_labels:
            new_name = st.text_input(
                f"Edit speaker label '{speaker_label}'", speaker_label
            )
            updated_speaker_names[speaker_label] = new_name
        # Update the DataFrame with the new speaker label names
        for old_name, new_name in updated_speaker_names.items():
            df["speaker_label"] = df["speaker_label"].replace(old_name, new_name)
        update_speaker_button = st.form_submit_button("Update Speakers")
    if update_speaker_button and df is not None:
        ss.df_transcript_speaker = pd.DataFrame()
        ss.df_transcript_text = df.copy(deep=True)
        del df
        ss.transcript_text_editor = True
        ss.transcript_speaker_editor = False
        st.rerun()


# Function to update the text column
def transcript_text_editor_update_text(row_index, new_text):
    ss.updated_transcript_df_to_embed.at[row_index, "text"] = new_text


def transcript_text_editor():
    ss.transcript_speaker_editor = False
    st.write("Transcript Text Editor:")
    st.write(ss.df_transcript_text)
    df = ss.df_transcript_text.copy(deep=True)
    ss.updated_transcript_df_to_embed = df.copy(deep=True)
    # Convert start_time and end_time to HH:MM:SS format
    df["start_time"] = df["start_time"].apply(transcript_text_editor_minutes_to_hhmmss)
    df["end_time"] = df["end_time"].apply(transcript_text_editor_minutes_to_hhmmss)
    row_index = st.number_input(
        "Enter the row index:",
        min_value=0,
        max_value=len(df) - 1,
        value=0,
        step=1,
    )
    new_text = st.text_area("Enter the new text:", df.at[row_index, "text"])
    update_text_button_inner = st.button("Update Text")
    if update_text_button_inner:
        transcript_text_editor_update_text(row_index, new_text)
        st.success("Text updated successfully!")
    # Display the updated dataframe
    st.header("Updated Transcript")
    st.table(ss.updated_transcript_df_to_embed)
    update_text_button = st.button("Finish Transcript Editing")
    if update_text_button:
        with st.spinner("Uploading..."):
            ss.df_transcript_text = pd.DataFrame()
            meeting_summary, meeting_uuid = summarize_runner(
                ss.updated_transcript_df_to_embed
            )
            ss.Clustering_obj.create_Cluster()
            pinecone_init_upsert(
                ss.updated_transcript_df_to_embed,
                meeting_title=ss.meeting_name,
                meeting_summary=meeting_summary,
                meeting_uuid=meeting_uuid,
            )
            ss.meeting_name = "unnamed"
            st.success("Pinecone upsert completed successfully!")
            ss.transcript_text_editor = False
            ss.updated_transcript_df_to_embed = pd.DataFrame()
            ss.chat_view = True
            st.rerun()


def init_streamlit():
    initialize_session_state()
    if os.path.exists("./config/.env"):
        load_dotenv("./config/.env")

    else:
        print(".env file does not exist, API keys must be set manually.")

    # Set initial state of the sidebar
    st.set_page_config(
        initial_sidebar_state="collapsed",
        layout="wide",
        )
    st.title("RESONATE")

    # Initializing sidebar and its components
    with st.sidebar:
        api_keys_input()
    if st.button("Upload Meeting / Chat"):
        ss.add_meeting = not ss.add_meeting
        ss.chat_view = not ss.chat_view
        ss.transcript_speaker_editor = False
        ss.transcript_text_editor = False

    if not ss.api_key_set:
        st.header("Pre-requisites:")
        st.write("Please set the API keys to enable the chat view.")
        st.write("Please ensure that you have already run the 'pinecone_sample_dataloader.py'")

    if ss.add_meeting and ss.api_key_set:
        add_meeting()
    if ss.transcript_speaker_editor:
        transcript_speaker_editor()
    if ss.df_transcript_text is not None and ss.transcript_text_editor:
        transcript_text_editor()
    if ss.chat_view and ss.api_key_set:
        chat_view()  # Chat view



if __name__ == "__main__":
    # Please ensure you have data loaded in Pinecone before running the Streamlit app
    # Please refer https://github.com/SartajBhuvaji/Resonate/blob/master/init_one_time_utils/PREREQUISITE.md
    init_streamlit()

# Test questions:
# What was discussed about cyberbullying?
# What is one new feature planned for GitLab's code search?
# What is the goal of defining maintainability for the new diffs architecture?