sainathBelagavi commited on
Commit
a363c1c
·
verified ·
1 Parent(s): 79669cc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import json
4
+ import re
5
+ from datetime import datetime
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
7
+ import torch
8
+
9
+ class TranscriptAnalyzer:
10
+ def __init__(self):
11
+ # Initialize the model and tokenizer
12
+ self.model_name = "mistralai/Mistral-7B-Instruct-v0.2"
13
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
14
+ self.model = AutoModelForCausalLM.from_pretrained(
15
+ self.model_name,
16
+ torch_dtype=torch.float16,
17
+ device_map="auto"
18
+ )
19
+
20
+ def extract_dates(self, text: str):
21
+ date_patterns = [
22
+ r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}',
23
+ r'\d{4}[-/]\d{1,2}[-/]\d{1,2}',
24
+ r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b'
25
+ ]
26
+ dates = []
27
+ for pattern in date_patterns:
28
+ matches = re.finditer(pattern, text)
29
+ for match in matches:
30
+ dates.append(match.group())
31
+ return dates
32
+
33
+ def extract_claim_numbers(self, text: str):
34
+ claim_patterns = [
35
+ r'claim\s+#?\s*\d+[-\w]*',
36
+ r'#\s*\d+[-\w]*',
37
+ r'case\s+#?\s*\d+[-\w]*'
38
+ ]
39
+ claims = []
40
+ for pattern in claim_patterns:
41
+ matches = re.finditer(pattern, text, re.IGNORECASE)
42
+ for match in matches:
43
+ claims.append(match.group())
44
+ return claims
45
+
46
+ def generate_prompt(self, transcript: str):
47
+ dates = self.extract_dates(transcript)
48
+ claims = self.extract_claim_numbers(transcript)
49
+
50
+ return f"""<s>[INST] Please analyze this meeting transcript with extreme precision and provide a structured analysis.
51
+ Remember to:
52
+ 1. Only include information explicitly stated
53
+ 2. Mark unclear information as "UNCLEAR"
54
+ 3. Preserve exact numbers, dates, and claims
55
+ 4. Focus on factual content
56
+
57
+ Identified dates: {', '.join(dates) if dates else 'None'}
58
+ Identified claims: {', '.join(claims) if claims else 'None'}
59
+
60
+ Please analyze:
61
+ {transcript}
62
+
63
+ Provide your analysis in this format:
64
+ PARTICIPANTS:
65
+ - List participants and their roles
66
+
67
+ CONTEXT:
68
+ - Meeting purpose
69
+ - Duration (if mentioned)
70
+
71
+ KEY POINTS:
72
+ - Main topics
73
+ - Decisions made
74
+ - Important numbers/metrics
75
+
76
+ ACTION ITEMS:
77
+ - Tasks and assignments
78
+ - Deadlines
79
+ - Responsible parties
80
+
81
+ FOLLOW UP:
82
+ - Next meetings
83
+ - Pending items [/INST]</s>"""
84
+
85
+ def analyze_transcript(self, transcript: str):
86
+ # Generate prompt
87
+ prompt = self.generate_prompt(transcript)
88
+
89
+ # Tokenize input
90
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
91
+
92
+ # Generate response
93
+ with torch.no_grad():
94
+ outputs = self.model.generate(
95
+ **inputs,
96
+ max_new_tokens=1000,
97
+ temperature=0.1,
98
+ do_sample=True,
99
+ pad_token_id=self.tokenizer.eos_token_id
100
+ )
101
+
102
+ # Decode response
103
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
104
+
105
+ # Extract the assistant's response (after the prompt)
106
+ response = response.split("[/INST]")[-1].strip()
107
+
108
+ return response
109
+
110
+ def process_transcript(transcript: str):
111
+ analyzer = TranscriptAnalyzer()
112
+ analysis = analyzer.analyze_transcript(transcript)
113
+ return analysis
114
+
115
+ # Create Gradio interface
116
+ iface = gr.Interface(
117
+ fn=process_transcript,
118
+ inputs=[
119
+ gr.Textbox(
120
+ lines=10,
121
+ label="Enter Meeting Transcript",
122
+ placeholder="Paste your meeting transcript here..."
123
+ )
124
+ ],
125
+ outputs=gr.Textbox(
126
+ label="Analysis Result",
127
+ lines=20
128
+ ),
129
+ title="Meeting Transcript Analyzer",
130
+ description="Analyze meeting transcripts to extract key information, dates, claims, and action items.",
131
+ examples=[
132
+ ["Meeting started on March 15, 2024 at 2:30 PM\nClaim #12345-ABC discussed regarding property damage\nJohn (Project Manager): Let's review the Q1 budget..."],
133
+ ["Sarah (Team Lead): Good morning everyone. Today's meeting is about the new product launch.\nMike (Marketing): We're targeting April 1st, 2024 for the release.\nClaim #789-XYZ needs to be resolved before launch."]
134
+ ]
135
+ )
136
+
137
+ # Launch the app
138
+ iface.launch()