import gradio as gr import os from dotenv import load_dotenv import google.generativeai as genai from llama_index.embeddings.huggingface import HuggingFaceEmbedding from sentence_transformers import SentenceTransformer from qdrant_client import QdrantClient load_dotenv() genai.configure(api_key=os.getenv("GEMINI_API_KEY")) gemini_model = genai.GenerativeModel('gemini-2.0-flash') model = HuggingFaceEmbedding( model_name="llamaindex/vdr-2b-multi-v1", device="cpu", trust_remote_code=True, ) os.environ["TOKENIZERS_PARALLELISM"] = "false" sbert_model = SentenceTransformer('all-MiniLM-L6-v2') client = QdrantClient( url=os.getenv("QDRANT_URL"), api_key=os.getenv("QDRANT_API_KEY") ) TXT_COLLECTION_NAME = "llama-summaries" AUD_COLLECTION_NAME = "sbert-audio" def retrieve_and_generate(query, history): find = model.get_query_embedding(query) audio_find = sbert_model.encode(query) text_results = client.query_points( collection_name=TXT_COLLECTION_NAME, query=find, using="text", with_payload=["text"], limit=5 ) audio_results = client.query_points( collection_name=AUD_COLLECTION_NAME, query=audio_find, using="text", with_payload=["text"], limit=5 ) context = [] for idx, point in enumerate(audio_results.points): if text := point.payload.get('text'): context.append(f"[Audio Excerpt {idx+1}]: {text}") for idx, point in enumerate(text_results.points): if text := point.payload.get('text'): context.append(f"[Slide Content {idx+1}]: {text}") context_text = "\n\n".join(context) prompt = f""" You are a financial expert assistant. Synthesize a comprehensive answer using the provided context from multiple sources. If information is insufficient, state that clearly. Question: {query} Context from various sources: {context_text} Provide a structured, professional response with clear sections when appropriate: """ response = gemini_model.generate_content([prompt], generation_config={"temperature": 0.5}) return response.text demo = gr.ChatInterface( fn=retrieve_and_generate, title="Financial AI Assistant", description="Ask financial questions and receive AI-powered responses based on multimodal data.", ) demo.launch(share=True)