sainathBelagavi's picture
Update app.py
cab4e03 verified
# app.py
import gradio as gr
import json
import re
import os
from datetime import datetime
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from huggingface_hub import login
# First, login with the Hugging Face token from secrets
try:
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
if hf_token:
login(token=hf_token)
else:
raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")
except Exception as e:
print(f"Error during Hugging Face login: {str(e)}")
raise
class TranscriptAnalyzer:
def __init__(self):
try:
# Initialize the model and tokenizer with auth token
self.model_name = "microsoft/Phi-3.5-mini-instruct"
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_name,
use_auth_token=hf_token,
trust_remote_code=True
)
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name,
use_auth_token=hf_token,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
except Exception as e:
print(f"Error initializing model: {str(e)}")
raise
def extract_dates(self, text: str):
date_patterns = [
r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}',
r'\d{4}[-/]\d{1,2}[-/]\d{1,2}',
r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b'
]
dates = []
for pattern in date_patterns:
matches = re.finditer(pattern, text)
for match in matches:
dates.append(match.group())
return dates
def extract_claim_numbers(self, text: str):
claim_patterns = [
r'claim\s+#?\s*\d+[-\w]*',
r'#\s*\d+[-\w]*',
r'case\s+#?\s*\d+[-\w]*'
]
claims = []
for pattern in claim_patterns:
matches = re.finditer(pattern, text, re.IGNORECASE)
for match in matches:
claims.append(match.group())
return claims
def generate_prompt(self, transcript: str):
dates = self.extract_dates(transcript)
claims = self.extract_claim_numbers(transcript)
return f"""<s>[INST] Please analyze this meeting transcript with extreme precision and provide a structured analysis.
Remember to:
1. Only include information explicitly stated
2. Mark unclear information as "UNCLEAR"
3. Preserve exact numbers, dates, and claims
4. Focus on factual content
Identified dates: {', '.join(dates) if dates else 'None'}
Identified claims: {', '.join(claims) if claims else 'None'}
Please analyze:
{transcript}
Provide your analysis in this format:
PARTICIPANTS:
- List participants and their roles
CONTEXT:
- Meeting purpose
- Duration (if mentioned)
KEY POINTS:
- Main topics
- Decisions made
- Important numbers/metrics
ACTION ITEMS:
- Tasks and assignments
- Deadlines
- Responsible parties
FOLLOW UP:
- Next meetings
- Pending items [/INST]</s>"""
def analyze_transcript(self, transcript: str):
try:
# Generate prompt
prompt = self.generate_prompt(transcript)
# Tokenize input
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
# Generate response
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_new_tokens=1000,
temperature=0.1,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id
)
# Decode response
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract the assistant's response (after the prompt)
response = response.split("[/INST]")[-1].strip()
return response
except Exception as e:
return f"Error analyzing transcript: {str(e)}"
def process_transcript(transcript: str):
try:
analyzer = TranscriptAnalyzer()
analysis = analyzer.analyze_transcript(transcript)
return analysis
except Exception as e:
return f"Error processing transcript: {str(e)}"
# Create Gradio interface
iface = gr.Interface(
fn=process_transcript,
inputs=[
gr.Textbox(
lines=10,
label="Enter Meeting Transcript",
placeholder="Paste your meeting transcript here..."
)
],
outputs=gr.Textbox(
label="Analysis Result",
lines=20
),
title="Meeting Transcript Analyzer",
description="Analyze meeting transcripts to extract key information, dates, claims, and action items.",
examples=[
["Meeting started on March 15, 2024 at 2:30 PM\nClaim #12345-ABC discussed regarding property damage\nJohn (Project Manager): Let's review the Q1 budget..."],
["Sarah (Team Lead): Good morning everyone. Today's meeting is about the new product launch.\nMike (Marketing): We're targeting April 1st, 2024 for the release.\nClaim #789-XYZ needs to be resolved before launch."]
]
)
# Launch the app
if __name__ == "__main__":
iface.launch()