Spaces:

Filteroff
/

video_audio_analyzer

Sleeping

App Files Files Community

beweinreich commited on Jul 24

Commit

cc41c90

•

1 Parent(s): cd7af42

improvements to cleaning traits

Browse files

Files changed (3) hide show

audio_analyzer.py +7 -4
utils.py +14 -0
video_analyzer.py +6 -14

audio_analyzer.py CHANGED Viewed

@@ -5,6 +5,7 @@ import moviepy.editor as mp
 from tqdm import tqdm
 from pydub import AudioSegment
 from openai import OpenAI
 from transformers import pipeline
 from dotenv import load_dotenv
 from pydub.silence import split_on_silence
@@ -89,7 +90,6 @@ class AudioAnalyzer:
         processed_audio.export(mp3_file, format="mp3", bitrate=bitrate)
         print("Conversion complete")
     def transcribe_audio_to_text(self, audio_file):
         with open(audio_file, "rb") as audio:
             transcription = client.audio.transcriptions.create(
@@ -102,7 +102,7 @@ class AudioAnalyzer:
     def ask_gpt(self, text):
         prompt = (
-            f"I have a transcript of one side of a video call. I'd like to determine personality traits from it. Can you tell me 5 personality traits you gather from this transcript? Please responsd in JSON format with the key `traits` and the value as a list of 5 personality traits.\n\n"
             f"The transcript is: \"{text}\""
         )
@@ -116,13 +116,15 @@ class AudioAnalyzer:
         )
         response = completion.choices[0].message.content
-        parsed = None
         try:
             result = json.loads(response)
             parsed = result['traits']
         except (json.JSONDecodeError, KeyError) as e:
             print(f"Error parsing response: {e}")
-            parsed = None
         return parsed
@@ -135,6 +137,7 @@ class AudioAnalyzer:
         transcript = self.transcribe_audio_to_text(self.media_path)
         print("Transcription complete")
         traits = self.ask_gpt(transcript)
         # print("Running through personality pipeline...")
         # result = self.personality_pipeline(transcript, candidate_labels=self.personality_labels)

 from tqdm import tqdm
 from pydub import AudioSegment
 from openai import OpenAI
+from utils import clean_trait
 from transformers import pipeline
 from dotenv import load_dotenv
 from pydub.silence import split_on_silence
         processed_audio.export(mp3_file, format="mp3", bitrate=bitrate)
         print("Conversion complete")
     def transcribe_audio_to_text(self, audio_file):
         with open(audio_file, "rb") as audio:
             transcription = client.audio.transcriptions.create(
     def ask_gpt(self, text):
         prompt = (
+            f"I have a transcript of one side of a video call. I'd like to determine personality traits from it. Can you tell me 5 personality traits you gather from this transcript? Please responsd in JSON format with the key `traits` and the value as a list of 5 personality traits.\n\nIf you can't determine it, say 'NA'.\n\n"
             f"The transcript is: \"{text}\""
         )
         )
         response = completion.choices[0].message.content
+        parsed = []
         try:
             result = json.loads(response)
             parsed = result['traits']
+            if parsed == ["NA"]:
+                parsed = []
         except (json.JSONDecodeError, KeyError) as e:
             print(f"Error parsing response: {e}")
+            parsed = []
         return parsed
         transcript = self.transcribe_audio_to_text(self.media_path)
         print("Transcription complete")
         traits = self.ask_gpt(transcript)
+        traits = [clean_trait(trait) for trait in traits]
         # print("Running through personality pipeline...")
         # result = self.personality_pipeline(transcript, candidate_labels=self.personality_labels)

utils.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import re
+def clean_trait(self, trait):
+    # Remove line breaks, leading/trailing whitespace, and unnecessary dashes
+    cleaned_trait = trait.replace('\n', '').strip()
+    cleaned_trait = re.sub(r'^-+', '', cleaned_trait).strip()
+    cleaned_trait = cleaned_trait.rstrip('.')
+    cleaned_trait = cleaned_trait.lower()
+    # If the cleaned trait has more than 4 words, it's likely a sentence, so let's remove it
+    if len(cleaned_trait.split()) > 4:
+        return None
+    return cleaned_trait

video_analyzer.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import json
 import base64
@@ -5,8 +6,8 @@ import requests
 from tqdm import tqdm
 from dotenv import load_dotenv
 from moviepy.editor import VideoFileClip
 import imageio
-import re
 import urllib.request
 load_dotenv()
@@ -66,7 +67,7 @@ class VideoAnalyzer:
                         "content": [
                             {
                                 "type": "text",
-                                "text": "Describe the person in this image. Focus on their physical and emotional state. Exclude details about their surroundings. Provide the description as a comma-separated list."
                             },
                             {
                                 "type": "image_url",
@@ -84,27 +85,18 @@ class VideoAnalyzer:
             response_data = response.json()
             description = response_data['choices'][0]['message']['content']
             traits = description.split(",")
             traits_list.extend(traits)
         return traits_list
-    def clean_trait(self, trait):
-        # Remove line breaks, leading/trailing whitespace, and unnecessary dashes
-        cleaned_trait = trait.replace('\n', '').strip()
-        cleaned_trait = re.sub(r'^-+', '', cleaned_trait).strip()
-        cleaned_trait = cleaned_trait.rstrip('.')
-        # If the cleaned trait has more than 4 words, it's likely a sentence, so let's remove it
-        if len(cleaned_trait.split()) > 4:
-            return None
-        return cleaned_trait
     def retrieve_traits(self):
         self.extract_images()
         traits = self.analyze_images()
-        cleaned_traits = [self.clean_trait(trait) for trait in traits]
         cleaned_traits = [trait for trait in cleaned_traits if trait]
         common_traits = list(set(cleaned_traits))
         return common_traits

+import re
 import os
 import json
 import base64
 from tqdm import tqdm
 from dotenv import load_dotenv
 from moviepy.editor import VideoFileClip
+from utils import clean_trait
 import imageio
 import urllib.request
 load_dotenv()
                         "content": [
                             {
                                 "type": "text",
+                                "text": "Describe the person in this image. Focus on their physical and emotional state. Exclude details about their surroundings. Provide the description as a comma-separated list. If you can't determine it, say 'NA'."
                             },
                             {
                                 "type": "image_url",
             response_data = response.json()
             description = response_data['choices'][0]['message']['content']
             traits = description.split(",")
+            if 'NA' in traits:
+                continue
             traits_list.extend(traits)
         return traits_list
     def retrieve_traits(self):
         self.extract_images()
         traits = self.analyze_images()
+        cleaned_traits = [clean_trait(trait) for trait in traits]
         cleaned_traits = [trait for trait in cleaned_traits if trait]
         common_traits = list(set(cleaned_traits))
         return common_traits