Spaces:

ivanfioravanti
/

phidata-multimodal-ai-agent

Running

File size: 2,622 Bytes

f0b2bc6

import streamlit as st
from phi.agent import Agent
from phi.model.google import Gemini
from phi.tools.duckduckgo import DuckDuckGo
from google.generativeai import upload_file, get_file
import time
from pathlib import Path
import tempfile

st.set_page_config(
    page_title="Multimodal AI Agent",
    page_icon="🎥",
    layout="wide"
)

st.title("Phidata Multimodal AI Agent 🎥🎤💬")
st.header("Powered by Gemini 2.0 Flash Exp")

@st.cache_resource
def initialize_agent():
    return Agent(
        name="Multimodal Analyst",
        model=Gemini(id="gemini-2.0-flash-exp"),
        tools=[DuckDuckGo()],
        markdown=True,
    )

agent = initialize_agent()

uploaded_file = st.file_uploader("Upload a video file", type=['mp4', 'mov', 'avi'])

if uploaded_file:
    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file:
        tmp_file.write(uploaded_file.read())
        upload_path = tmp_file.name
    
    st.video(upload_path)
    
    user_prompt = st.text_area(
        "What are you looking for in this video?",
        placeholder="Ask anything about the video - AI Agent will analyze everything and search the web if needed",
        help="You can ask questions about the video content and get information from the web"
    )
    
    if st.button("Analyze"):
        if not user_prompt:
            st.warning("Please enter your question.")
        else:
            try:
                with st.spinner("Analyzing video and searching the web..."):
                    video_file = upload_file(upload_path)
                    while video_file.state.name == "PROCESSING":
                        time.sleep(1)
                        video_file = get_file(video_file.name)

                    prompt = f"""
                    First analyze the video and then answer this question using both 
                    the video analysis and web research, question: {user_prompt}
                    
                    Provide a comprehensive response focusing on practical, actionable information.
                    """
                    
                    result = agent.run(prompt, videos=[video_file])
                    
                st.subheader("Result")
                st.markdown(result.content)

            except Exception as e:
                st.error(f"An error occurred: {str(e)}")
            finally:
                Path(upload_path).unlink(missing_ok=True)
else:
    st.info("Please upload a video to begin analysis.")

st.markdown("""
    <style>
    .stTextArea textarea {
        height: 100px;
    }
    </style>
    """, unsafe_allow_html=True)