import streamlit as st from phi.agent import Agent from phi.model.google import Gemini from phi.tools.duckduckgo import DuckDuckGo from google.generativeai import upload_file, get_file import time from pathlib import Path import tempfile st.set_page_config( page_title="Multimodal AI Agent", page_icon="🎥", layout="wide" ) st.title("Phidata Multimodal AI Agent 🎥🎤💬") st.header("Powered by Gemini 2.0 Flash Exp") @st.cache_resource def initialize_agent(): return Agent( name="Multimodal Analyst", model=Gemini(id="gemini-2.0-flash-exp"), tools=[DuckDuckGo()], markdown=True, ) agent = initialize_agent() uploaded_file = st.file_uploader("Upload a video file", type=['mp4', 'mov', 'avi']) if uploaded_file: with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file: tmp_file.write(uploaded_file.read()) upload_path = tmp_file.name st.video(upload_path) user_prompt = st.text_area( "What are you looking for in this video?", placeholder="Ask anything about the video - AI Agent will analyze everything and search the web if needed", help="You can ask questions about the video content and get information from the web" ) if st.button("Analyze"): if not user_prompt: st.warning("Please enter your question.") else: try: with st.spinner("Analyzing video and searching the web..."): video_file = upload_file(upload_path) while video_file.state.name == "PROCESSING": time.sleep(1) video_file = get_file(video_file.name) prompt = f""" First analyze the video and then answer this question using both the video analysis and web research, question: {user_prompt} Provide a comprehensive response focusing on practical, actionable information. """ result = agent.run(prompt, videos=[video_file]) st.subheader("Result") st.markdown(result.content) except Exception as e: st.error(f"An error occurred: {str(e)}") finally: Path(upload_path).unlink(missing_ok=True) else: st.info("Please upload a video to begin analysis.") st.markdown(""" """, unsafe_allow_html=True)