Spaces:

awacke1
/

GPT-4o-omni-text-audio-image-video

Running

App Files Files Community

awacke1 commited on Jun 9

Commit

36e0d75

•

1 Parent(s): ac37df8

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -24

app.py CHANGED Viewed

@@ -1800,18 +1800,19 @@ if AddAFileForContext:
 def main():
-    #st.markdown("### OpenAI GPT-4o Model")
     st.markdown("##### GPT-4o Omni Model: Text, Audio, Image, & Video")
     option = st.selectbox("Select an option", ("Text", "Image", "Audio", "Video"))
     if option == "Text":
         text_input = st.text_input("Enter your text:")
         if (text_input > ''):
             textResponse = process_text(text_input)
     elif option == "Image":
         text = "Help me understand what is in this picture and list ten facts as markdown outline with appropriate emojis that describes what you see."
         text_input = st.text_input(label="Enter text prompt to use with Image context.", value=text)
         image_input = st.file_uploader("Upload an image", type=["png"])
-        image_response = process_image(image_input, text_input)
     elif option == "Audio":
         text = "You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."
@@ -1824,7 +1825,6 @@ def main():
                 process_audio(audio_input, text_input)
     elif option == "Audio old":
-        #text = "Transcribe and answer questions as a helpful audio music and speech assistant.  "
         text = "You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."
         text_input = st.text_input(label="Enter text prompt to use with Audio context.", value=text)
@@ -1851,7 +1851,6 @@ def main():
         video_input = st.file_uploader("Upload a video file", type=["mp4"])
         process_audio_and_video(video_input)
 # Enter the GPT-4o omni model in streamlit chatbot
 current_messages=[]
 for message in st.session_state.messages:
@@ -1859,8 +1858,6 @@ for message in st.session_state.messages:
         current_messages.append(message)
         st.markdown(message["content"])
 # 🎵 Wav Audio files - Transcription History in Wav
 audio_files = glob.glob("*.wav")
 audio_files = [file for file in audio_files if len(os.path.splitext(file)[0]) >= 10]  # exclude files with short names
@@ -1881,9 +1878,6 @@ video_files_mp3 = glob.glob("*.mp3")
 video_files_mp3 = [file for file in video_files_mp3 if len(os.path.splitext(file)[0]) >= 10]  # exclude files with short names
 video_files_mp3.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True)  # sort by file type and file name in descending order
 main()
 # Delete All button for each file type
@@ -1962,10 +1956,6 @@ for file in video_files_mp3:
             os.remove(file)
             st.rerun()
 # ChatBot Entry
 if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
     st.session_state.messages.append({"role": "user", "content": prompt})
@@ -1980,10 +1970,6 @@ if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with
         response = process_text2(text_input=prompt)
     st.session_state.messages.append({"role": "assistant", "content": response})
 # Image and Video Galleries
 num_columns_images=st.slider(key="num_columns_images", label="Choose Number of Image Columns", min_value=1, max_value=15, value=3)
 display_images_and_wikipedia_summaries(num_columns_images)   # Image Jump Grid
@@ -1991,7 +1977,6 @@ display_images_and_wikipedia_summaries(num_columns_images)   # Image Jump Grid
 num_columns_video=st.slider(key="num_columns_video", label="Choose Number of Video Columns", min_value=1, max_value=15, value=3)
 display_videos_and_links(num_columns_video)   # Video Jump Grid
 # Optional UI's
 showExtendedTextInterface=False
 if showExtendedTextInterface:
@@ -1999,9 +1984,3 @@ if showExtendedTextInterface:
     num_columns_text=st.slider(key="num_columns_text", label="Choose Number of Text Columns", min_value=1, max_value=15, value=4)
     display_buttons_with_scores(num_columns_text)  # Feedback Jump Grid
     st.markdown(personality_factors)
-#if __name__ == "__main__":

 def main():
     st.markdown("##### GPT-4o Omni Model: Text, Audio, Image, & Video")
     option = st.selectbox("Select an option", ("Text", "Image", "Audio", "Video"))
     if option == "Text":
         text_input = st.text_input("Enter your text:")
         if (text_input > ''):
             textResponse = process_text(text_input)
     elif option == "Image":
         text = "Help me understand what is in this picture and list ten facts as markdown outline with appropriate emojis that describes what you see."
         text_input = st.text_input(label="Enter text prompt to use with Image context.", value=text)
         image_input = st.file_uploader("Upload an image", type=["png"])
+        if (image_input is not None):
+            image_response = process_image(image_input, text_input)
     elif option == "Audio":
         text = "You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."
                 process_audio(audio_input, text_input)
     elif option == "Audio old":
         text = "You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."
         text_input = st.text_input(label="Enter text prompt to use with Audio context.", value=text)
         video_input = st.file_uploader("Upload a video file", type=["mp4"])
         process_audio_and_video(video_input)
 # Enter the GPT-4o omni model in streamlit chatbot
 current_messages=[]
 for message in st.session_state.messages:
         current_messages.append(message)
         st.markdown(message["content"])
 # 🎵 Wav Audio files - Transcription History in Wav
 audio_files = glob.glob("*.wav")
 audio_files = [file for file in audio_files if len(os.path.splitext(file)[0]) >= 10]  # exclude files with short names
 video_files_mp3 = [file for file in video_files_mp3 if len(os.path.splitext(file)[0]) >= 10]  # exclude files with short names
 video_files_mp3.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True)  # sort by file type and file name in descending order
 main()
 # Delete All button for each file type
             os.remove(file)
             st.rerun()
 # ChatBot Entry
 if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
     st.session_state.messages.append({"role": "user", "content": prompt})
         response = process_text2(text_input=prompt)
     st.session_state.messages.append({"role": "assistant", "content": response})
 # Image and Video Galleries
 num_columns_images=st.slider(key="num_columns_images", label="Choose Number of Image Columns", min_value=1, max_value=15, value=3)
 display_images_and_wikipedia_summaries(num_columns_images)   # Image Jump Grid
 num_columns_video=st.slider(key="num_columns_video", label="Choose Number of Video Columns", min_value=1, max_value=15, value=3)
 display_videos_and_links(num_columns_video)   # Video Jump Grid
 # Optional UI's
 showExtendedTextInterface=False
 if showExtendedTextInterface:
     num_columns_text=st.slider(key="num_columns_text", label="Choose Number of Text Columns", min_value=1, max_value=15, value=4)
     display_buttons_with_scores(num_columns_text)  # Feedback Jump Grid
     st.markdown(personality_factors)