Spaces:
Sleeping
Sleeping
beweinreich
commited on
Commit
•
cc41c90
1
Parent(s):
cd7af42
improvements to cleaning traits
Browse files- audio_analyzer.py +7 -4
- utils.py +14 -0
- video_analyzer.py +6 -14
audio_analyzer.py
CHANGED
@@ -5,6 +5,7 @@ import moviepy.editor as mp
|
|
5 |
from tqdm import tqdm
|
6 |
from pydub import AudioSegment
|
7 |
from openai import OpenAI
|
|
|
8 |
from transformers import pipeline
|
9 |
from dotenv import load_dotenv
|
10 |
from pydub.silence import split_on_silence
|
@@ -89,7 +90,6 @@ class AudioAnalyzer:
|
|
89 |
processed_audio.export(mp3_file, format="mp3", bitrate=bitrate)
|
90 |
print("Conversion complete")
|
91 |
|
92 |
-
|
93 |
def transcribe_audio_to_text(self, audio_file):
|
94 |
with open(audio_file, "rb") as audio:
|
95 |
transcription = client.audio.transcriptions.create(
|
@@ -102,7 +102,7 @@ class AudioAnalyzer:
|
|
102 |
|
103 |
def ask_gpt(self, text):
|
104 |
prompt = (
|
105 |
-
f"I have a transcript of one side of a video call. I'd like to determine personality traits from it. Can you tell me 5 personality traits you gather from this transcript? Please responsd in JSON format with the key `traits` and the value as a list of 5 personality traits.\n\n"
|
106 |
f"The transcript is: \"{text}\""
|
107 |
)
|
108 |
|
@@ -116,13 +116,15 @@ class AudioAnalyzer:
|
|
116 |
)
|
117 |
response = completion.choices[0].message.content
|
118 |
|
119 |
-
parsed =
|
120 |
try:
|
121 |
result = json.loads(response)
|
122 |
parsed = result['traits']
|
|
|
|
|
123 |
except (json.JSONDecodeError, KeyError) as e:
|
124 |
print(f"Error parsing response: {e}")
|
125 |
-
parsed =
|
126 |
return parsed
|
127 |
|
128 |
|
@@ -135,6 +137,7 @@ class AudioAnalyzer:
|
|
135 |
transcript = self.transcribe_audio_to_text(self.media_path)
|
136 |
print("Transcription complete")
|
137 |
traits = self.ask_gpt(transcript)
|
|
|
138 |
|
139 |
# print("Running through personality pipeline...")
|
140 |
# result = self.personality_pipeline(transcript, candidate_labels=self.personality_labels)
|
|
|
5 |
from tqdm import tqdm
|
6 |
from pydub import AudioSegment
|
7 |
from openai import OpenAI
|
8 |
+
from utils import clean_trait
|
9 |
from transformers import pipeline
|
10 |
from dotenv import load_dotenv
|
11 |
from pydub.silence import split_on_silence
|
|
|
90 |
processed_audio.export(mp3_file, format="mp3", bitrate=bitrate)
|
91 |
print("Conversion complete")
|
92 |
|
|
|
93 |
def transcribe_audio_to_text(self, audio_file):
|
94 |
with open(audio_file, "rb") as audio:
|
95 |
transcription = client.audio.transcriptions.create(
|
|
|
102 |
|
103 |
def ask_gpt(self, text):
|
104 |
prompt = (
|
105 |
+
f"I have a transcript of one side of a video call. I'd like to determine personality traits from it. Can you tell me 5 personality traits you gather from this transcript? Please responsd in JSON format with the key `traits` and the value as a list of 5 personality traits.\n\nIf you can't determine it, say 'NA'.\n\n"
|
106 |
f"The transcript is: \"{text}\""
|
107 |
)
|
108 |
|
|
|
116 |
)
|
117 |
response = completion.choices[0].message.content
|
118 |
|
119 |
+
parsed = []
|
120 |
try:
|
121 |
result = json.loads(response)
|
122 |
parsed = result['traits']
|
123 |
+
if parsed == ["NA"]:
|
124 |
+
parsed = []
|
125 |
except (json.JSONDecodeError, KeyError) as e:
|
126 |
print(f"Error parsing response: {e}")
|
127 |
+
parsed = []
|
128 |
return parsed
|
129 |
|
130 |
|
|
|
137 |
transcript = self.transcribe_audio_to_text(self.media_path)
|
138 |
print("Transcription complete")
|
139 |
traits = self.ask_gpt(transcript)
|
140 |
+
traits = [clean_trait(trait) for trait in traits]
|
141 |
|
142 |
# print("Running through personality pipeline...")
|
143 |
# result = self.personality_pipeline(transcript, candidate_labels=self.personality_labels)
|
utils.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
def clean_trait(self, trait):
|
4 |
+
# Remove line breaks, leading/trailing whitespace, and unnecessary dashes
|
5 |
+
cleaned_trait = trait.replace('\n', '').strip()
|
6 |
+
cleaned_trait = re.sub(r'^-+', '', cleaned_trait).strip()
|
7 |
+
cleaned_trait = cleaned_trait.rstrip('.')
|
8 |
+
cleaned_trait = cleaned_trait.lower()
|
9 |
+
|
10 |
+
# If the cleaned trait has more than 4 words, it's likely a sentence, so let's remove it
|
11 |
+
if len(cleaned_trait.split()) > 4:
|
12 |
+
return None
|
13 |
+
|
14 |
+
return cleaned_trait
|
video_analyzer.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import os
|
2 |
import json
|
3 |
import base64
|
@@ -5,8 +6,8 @@ import requests
|
|
5 |
from tqdm import tqdm
|
6 |
from dotenv import load_dotenv
|
7 |
from moviepy.editor import VideoFileClip
|
|
|
8 |
import imageio
|
9 |
-
import re
|
10 |
import urllib.request
|
11 |
|
12 |
load_dotenv()
|
@@ -66,7 +67,7 @@ class VideoAnalyzer:
|
|
66 |
"content": [
|
67 |
{
|
68 |
"type": "text",
|
69 |
-
"text": "Describe the person in this image. Focus on their physical and emotional state. Exclude details about their surroundings. Provide the description as a comma-separated list."
|
70 |
},
|
71 |
{
|
72 |
"type": "image_url",
|
@@ -84,27 +85,18 @@ class VideoAnalyzer:
|
|
84 |
response_data = response.json()
|
85 |
description = response_data['choices'][0]['message']['content']
|
86 |
traits = description.split(",")
|
|
|
|
|
87 |
traits_list.extend(traits)
|
88 |
|
89 |
return traits_list
|
90 |
|
91 |
-
def clean_trait(self, trait):
|
92 |
-
# Remove line breaks, leading/trailing whitespace, and unnecessary dashes
|
93 |
-
cleaned_trait = trait.replace('\n', '').strip()
|
94 |
-
cleaned_trait = re.sub(r'^-+', '', cleaned_trait).strip()
|
95 |
-
cleaned_trait = cleaned_trait.rstrip('.')
|
96 |
-
|
97 |
-
# If the cleaned trait has more than 4 words, it's likely a sentence, so let's remove it
|
98 |
-
if len(cleaned_trait.split()) > 4:
|
99 |
-
return None
|
100 |
-
|
101 |
-
return cleaned_trait
|
102 |
|
103 |
def retrieve_traits(self):
|
104 |
self.extract_images()
|
105 |
traits = self.analyze_images()
|
106 |
|
107 |
-
cleaned_traits = [
|
108 |
cleaned_traits = [trait for trait in cleaned_traits if trait]
|
109 |
common_traits = list(set(cleaned_traits))
|
110 |
return common_traits
|
|
|
1 |
+
import re
|
2 |
import os
|
3 |
import json
|
4 |
import base64
|
|
|
6 |
from tqdm import tqdm
|
7 |
from dotenv import load_dotenv
|
8 |
from moviepy.editor import VideoFileClip
|
9 |
+
from utils import clean_trait
|
10 |
import imageio
|
|
|
11 |
import urllib.request
|
12 |
|
13 |
load_dotenv()
|
|
|
67 |
"content": [
|
68 |
{
|
69 |
"type": "text",
|
70 |
+
"text": "Describe the person in this image. Focus on their physical and emotional state. Exclude details about their surroundings. Provide the description as a comma-separated list. If you can't determine it, say 'NA'."
|
71 |
},
|
72 |
{
|
73 |
"type": "image_url",
|
|
|
85 |
response_data = response.json()
|
86 |
description = response_data['choices'][0]['message']['content']
|
87 |
traits = description.split(",")
|
88 |
+
if 'NA' in traits:
|
89 |
+
continue
|
90 |
traits_list.extend(traits)
|
91 |
|
92 |
return traits_list
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
def retrieve_traits(self):
|
96 |
self.extract_images()
|
97 |
traits = self.analyze_images()
|
98 |
|
99 |
+
cleaned_traits = [clean_trait(trait) for trait in traits]
|
100 |
cleaned_traits = [trait for trait in cleaned_traits if trait]
|
101 |
common_traits = list(set(cleaned_traits))
|
102 |
return common_traits
|