|
import gradio as gr |
|
import os |
|
from dotenv import load_dotenv |
|
import google.generativeai as genai |
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
|
from sentence_transformers import SentenceTransformer |
|
from qdrant_client import QdrantClient |
|
|
|
load_dotenv() |
|
|
|
genai.configure(api_key=os.getenv("GEMINI_API_KEY")) |
|
gemini_model = genai.GenerativeModel('gemini-2.0-flash') |
|
|
|
model = HuggingFaceEmbedding( |
|
model_name="llamaindex/vdr-2b-multi-v1", |
|
device="cpu", |
|
trust_remote_code=True, |
|
) |
|
|
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" |
|
sbert_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
client = QdrantClient( |
|
url=os.getenv("QDRANT_URL"), |
|
api_key=os.getenv("QDRANT_API_KEY") |
|
) |
|
|
|
TXT_COLLECTION_NAME = "llama-summaries" |
|
AUD_COLLECTION_NAME = "sbert-audio" |
|
|
|
def retrieve_and_generate(query, history): |
|
|
|
find = model.get_query_embedding(query) |
|
audio_find = sbert_model.encode(query) |
|
|
|
text_results = client.query_points( |
|
collection_name=TXT_COLLECTION_NAME, |
|
query=find, |
|
using="text", |
|
with_payload=["text"], |
|
limit=5 |
|
) |
|
|
|
audio_results = client.query_points( |
|
collection_name=AUD_COLLECTION_NAME, |
|
query=audio_find, |
|
using="text", |
|
with_payload=["text"], |
|
limit=5 |
|
) |
|
|
|
context = [] |
|
|
|
for idx, point in enumerate(audio_results.points): |
|
if text := point.payload.get('text'): |
|
context.append(f"[Audio Excerpt {idx+1}]: {text}") |
|
|
|
for idx, point in enumerate(text_results.points): |
|
if text := point.payload.get('text'): |
|
context.append(f"[Slide Content {idx+1}]: {text}") |
|
|
|
context_text = "\n\n".join(context) |
|
|
|
prompt = f""" |
|
You are a financial expert assistant. Synthesize a comprehensive answer using the |
|
provided context from multiple sources. If information is insufficient, state that clearly. |
|
|
|
Question: {query} |
|
|
|
Context from various sources: |
|
{context_text} |
|
|
|
Provide a structured, professional response with clear sections when appropriate: |
|
""" |
|
|
|
response = gemini_model.generate_content([prompt], generation_config={"temperature": 0.5}) |
|
return response.text |
|
|
|
demo = gr.ChatInterface( |
|
fn=retrieve_and_generate, |
|
title="Financial AI Assistant", |
|
description="Ask financial questions and receive AI-powered responses based on multimodal data.", |
|
) |
|
|
|
demo.launch(share=True) |