# app.py
import gradio as gr
import json
import re
from datetime import datetime
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

class TranscriptAnalyzer:
    def __init__(self):
        # Initialize the model and tokenizer
        self.model_name = "mistralai/Mistral-7B-Instruct-v0.2"
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        
    def extract_dates(self, text: str):
        date_patterns = [
            r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}',
            r'\d{4}[-/]\d{1,2}[-/]\d{1,2}',
            r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b'
        ]
        dates = []
        for pattern in date_patterns:
            matches = re.finditer(pattern, text)
            for match in matches:
                dates.append(match.group())
        return dates

    def extract_claim_numbers(self, text: str):
        claim_patterns = [
            r'claim\s+#?\s*\d+[-\w]*',
            r'#\s*\d+[-\w]*',
            r'case\s+#?\s*\d+[-\w]*'
        ]
        claims = []
        for pattern in claim_patterns:
            matches = re.finditer(pattern, text, re.IGNORECASE)
            for match in matches:
                claims.append(match.group())
        return claims

    def generate_prompt(self, transcript: str):
        dates = self.extract_dates(transcript)
        claims = self.extract_claim_numbers(transcript)
        
        return f"""<s>[INST] Please analyze this meeting transcript with extreme precision and provide a structured analysis.
Remember to:
1. Only include information explicitly stated
2. Mark unclear information as "UNCLEAR"
3. Preserve exact numbers, dates, and claims
4. Focus on factual content

Identified dates: {', '.join(dates) if dates else 'None'}
Identified claims: {', '.join(claims) if claims else 'None'}

Please analyze:
{transcript}

Provide your analysis in this format:
PARTICIPANTS:
- List participants and their roles

CONTEXT:
- Meeting purpose
- Duration (if mentioned)

KEY POINTS:
- Main topics
- Decisions made
- Important numbers/metrics

ACTION ITEMS:
- Tasks and assignments
- Deadlines
- Responsible parties

FOLLOW UP:
- Next meetings
- Pending items [/INST]</s>"""

    def analyze_transcript(self, transcript: str):
        # Generate prompt
        prompt = self.generate_prompt(transcript)
        
        # Tokenize input
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        
        # Generate response
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=1000,
                temperature=0.1,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
        
        # Decode response
        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract the assistant's response (after the prompt)
        response = response.split("[/INST]")[-1].strip()
        
        return response

def process_transcript(transcript: str):
    analyzer = TranscriptAnalyzer()
    analysis = analyzer.analyze_transcript(transcript)
    return analysis

# Create Gradio interface
iface = gr.Interface(
    fn=process_transcript,
    inputs=[
        gr.Textbox(
            lines=10,
            label="Enter Meeting Transcript",
            placeholder="Paste your meeting transcript here..."
        )
    ],
    outputs=gr.Textbox(
        label="Analysis Result",
        lines=20
    ),
    title="Meeting Transcript Analyzer",
    description="Analyze meeting transcripts to extract key information, dates, claims, and action items.",
    examples=[
        ["Meeting started on March 15, 2024 at 2:30 PM\nClaim #12345-ABC discussed regarding property damage\nJohn (Project Manager): Let's review the Q1 budget..."],
        ["Sarah (Team Lead): Good morning everyone. Today's meeting is about the new product launch.\nMike (Marketing): We're targeting April 1st, 2024 for the release.\nClaim #789-XYZ needs to be resolved before launch."]
    ]
)

# Launch the app
iface.launch()