beweinreich commited on
Commit
1727a7e
0 Parent(s):
Files changed (9) hide show
  1. .gitignore +5 -0
  2. Dockerfile +16 -0
  3. Procfile +1 -0
  4. app.py +38 -0
  5. audio_analyzer.py +79 -0
  6. playground.py +118 -0
  7. requirements.txt +8 -0
  8. runtime.txt +1 -0
  9. video_analyzer.py +114 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .DS_Store
2
+ .env
3
+ raw/*
4
+ tmp/*
5
+ __pycache__/*
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: python app.py
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from audio_analyzer import AudioAnalyzer
3
+ from video_analyzer import VideoAnalyzer
4
+
5
+ app = Flask(__name__)
6
+
7
+ @app.route('/', methods=['GET'])
8
+ def hello_world():
9
+ return jsonify({"message": "Hello, World!"})
10
+
11
+ @app.route('/v1/analyze_audio', methods=['POST'])
12
+ def analyze_audio():
13
+ data = request.get_json()
14
+ audio_url = data.get('audio_url')
15
+
16
+ if not audio_url:
17
+ return jsonify({"error": "audio_url is required"}), 400
18
+
19
+ analyzer = AudioAnalyzer(media_url=audio_url, media_type="audio")
20
+ traits = analyzer.retrieve_traits()
21
+
22
+ return jsonify(traits)
23
+
24
+ @app.route('/v1/analyze_video', methods=['POST'])
25
+ def analyze_video():
26
+ data = request.get_json()
27
+ video_url = data.get('video_url')
28
+
29
+ if not video_url:
30
+ return jsonify({"error": "video_url is required"}), 400
31
+
32
+ analyzer = VideoAnalyzer(video_url=video_url)
33
+ traits = analyzer.retrieve_traits()
34
+
35
+ return jsonify(traits)
36
+
37
+ if __name__ == '__main__':
38
+ app.run(debug=True)
audio_analyzer.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import urllib.request
3
+ import moviepy.editor as mp
4
+ from openai import OpenAI
5
+ from transformers import pipeline
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
11
+ TWILIO_ACCOUNT_SID = os.getenv("TWILIO_ACCOUNT_SID")
12
+ TWILIO_AUTH_TOKEN = os.getenv("TWILIO_AUTH_TOKEN")
13
+ client = OpenAI(api_key=OPENAI_API_KEY)
14
+
15
+ class AudioAnalyzer:
16
+ def __init__(self, media_path=None, media_url=None, media_type=None):
17
+ self.personality_labels = [
18
+ "Empathetic","Resilient","Optimistic","Pessimistic","Introverted","Extroverted","Curious","Creative","Analytical","Dependable","Impulsive","Adaptable","Meticulous","Assertive","Agreeable","Courageous","Cautious","Patient","Ambitious", "Generous"
19
+ ]
20
+ self.personality_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
21
+ self.media_path = media_path
22
+ self.media_url = media_url
23
+ self.media_type = media_type
24
+
25
+ if media_url is not None:
26
+ self.download_content()
27
+
28
+ if media_type == "video":
29
+ self.extract_audio_from_video(self.media_path, "./tmp/audio.mp3")
30
+ self.media_path = "./tmp/audio.mp3"
31
+
32
+ def download_content(self):
33
+ download_url = self.media_url
34
+ local_path = self.media_path
35
+
36
+ # Set default paths if not provided
37
+ if self.media_type == "video" and not self.media_path:
38
+ self.media_path = "./tmp/video.mp4"
39
+ if self.media_type == "audio" and not self.media_path:
40
+ self.media_path = "./tmp/audio.mp3"
41
+
42
+ password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
43
+ password_mgr.add_password(None, download_url, TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN)
44
+ handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
45
+ opener = urllib.request.build_opener(handler)
46
+ urllib.request.install_opener(opener)
47
+ urllib.request.urlretrieve(download_url, self.media_path)
48
+
49
+ def extract_audio_from_video(self, video_file, audio_file):
50
+ clip = mp.VideoFileClip(video_file)
51
+ clip.audio.write_audiofile(audio_file)
52
+
53
+ def transcribe_audio_to_text(self, audio_file):
54
+ with open(audio_file, "rb") as audio:
55
+ transcription = client.audio.transcriptions.create(
56
+ model="whisper-1",
57
+ file=audio
58
+ )
59
+ video_text = transcription.text.strip()
60
+ return video_text
61
+
62
+ def retrieve_traits(self):
63
+ # Ensure we have an audio path
64
+ if not self.media_path:
65
+ raise ValueError("Media path is not specified.")
66
+
67
+ print("Transcribing audio to text...")
68
+ transcript = self.transcribe_audio_to_text(self.media_path)
69
+ print("Transcription complete")
70
+
71
+ traits = []
72
+ print("Running through personality pipeline...")
73
+ result = self.personality_pipeline(transcript, candidate_labels=self.personality_labels)
74
+
75
+ top_traits = sorted(zip(result['labels'], result['scores']), key=lambda x: x[1], reverse=True)[:5]
76
+ traits = {label: score for label, score in top_traits}
77
+
78
+ print(traits)
79
+ return traits
playground.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import base64
4
+ import requests
5
+ from openai import OpenAI
6
+ from dotenv import load_dotenv
7
+ from moviepy.editor import VideoFileClip
8
+
9
+ load_dotenv()
10
+
11
+ audio_filename = "extracted_audio.wav"
12
+ image_filename = "extracted_image.jpg"
13
+
14
+ api_key = os.getenv("OPENAI_API_KEY")
15
+ client = OpenAI(api_key=api_key)
16
+
17
+ video = VideoFileClip("zach.mov")
18
+ audio = video.audio
19
+ audio.write_audiofile(audio_filename)
20
+
21
+ def encode_image(image_path):
22
+ with open(image_path, "rb") as image_file:
23
+ return base64.b64encode(image_file.read()).decode('utf-8')
24
+
25
+ audio_file= open(audio_filename, "rb")
26
+
27
+ # Extract an image halfway through the video
28
+ halfway_time = video.duration / 2
29
+ video.save_frame(image_filename, t=halfway_time)
30
+
31
+ transcription = client.audio.transcriptions.create(
32
+ model="whisper-1",
33
+ file=audio_file
34
+ )
35
+ video_text = transcription.text.strip()
36
+
37
+
38
+ # Analyze sentiment using GPT-4
39
+ prompt = f"""Analyze the sentiment of the following text:\n\n{video_text}
40
+
41
+ You should respond in json format, as an object with key `response` and value as a string.
42
+ """
43
+ completion = client.chat.completions.create(
44
+ messages=[
45
+ {"role": "system", "content": "You are a helpful assistant."},
46
+ {"role": "user", "content": prompt}
47
+ ],
48
+ model="gpt-3.5-turbo-1106",
49
+ response_format={"type": "json_object"},
50
+ )
51
+ response = completion.choices[0].message.content
52
+ result = json.loads(response)
53
+ parsed = result['response']
54
+ print(parsed)
55
+
56
+
57
+ # Analyze sentiment using GPT-4
58
+ prompt = f"""Analyze the personality traits of the speaker in the following text:\n\n{video_text}
59
+
60
+ You should respond in json format, as an object with key `response` and value as an array of personality traits, like "funny", "happy", "sarcastic".
61
+ """
62
+ completion = client.chat.completions.create(
63
+ messages=[
64
+ {"role": "system", "content": "You are a helpful assistant."},
65
+ {"role": "user", "content": prompt}
66
+ ],
67
+ model="gpt-3.5-turbo-1106",
68
+ response_format={"type": "json_object"},
69
+ )
70
+ response = completion.choices[0].message.content
71
+ result = json.loads(response)
72
+ parsed = result['response']
73
+ print(parsed)
74
+
75
+
76
+
77
+
78
+ # Function to encode the image
79
+ def encode_image(image_path):
80
+ with open(image_path, "rb") as image_file:
81
+ return base64.b64encode(image_file.read()).decode('utf-8')
82
+
83
+
84
+ # Getting the base64 string
85
+ base64_image = encode_image(image_filename)
86
+
87
+ headers = {
88
+ "Content-Type": "application/json",
89
+ "Authorization": f"Bearer {api_key}"
90
+ }
91
+
92
+ payload = {
93
+ "model": "gpt-4o",
94
+ "messages": [
95
+ {
96
+ "role": "user",
97
+ "content": [
98
+ {
99
+ "type": "text",
100
+ "text": "Describe the person in this image. Be detailed."
101
+ },
102
+ {
103
+ "type": "image_url",
104
+ "image_url": {
105
+ "url": f"data:image/jpeg;base64,{base64_image}"
106
+ }
107
+ }
108
+ ]
109
+ }
110
+ ],
111
+ "max_tokens": 300
112
+ }
113
+
114
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
115
+
116
+ json_data = response.json()
117
+ parsed = json_data['choices'][0]['message']['content']
118
+ print(parsed)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ flask==3.0.3
2
+ moviepy==1.0.3
3
+ transformers==4.42.4
4
+ openai==1.34.0
5
+ python-dotenv==1.0.1
6
+ torch==2.3.1
7
+ transformers==4.42.4
8
+ uvicorn==0.30.3
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.12.4
video_analyzer.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import base64
4
+ import requests
5
+ from tqdm import tqdm
6
+ from dotenv import load_dotenv
7
+ from moviepy.editor import VideoFileClip
8
+ import re
9
+ import urllib.request
10
+
11
+ load_dotenv()
12
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
+ TWILIO_ACCOUNT_SID = os.getenv("TWILIO_ACCOUNT_SID")
14
+ TWILIO_AUTH_TOKEN = os.getenv("TWILIO_AUTH_TOKEN")
15
+
16
+ class VideoAnalyzer:
17
+ def __init__(self, video_path=None, video_url=None, num_images=3):
18
+ self.video_path = video_path
19
+ self.video_url = video_url
20
+ self.api_key = OPENAI_API_KEY
21
+ self.num_images = num_images
22
+ self.headers = {
23
+ "Content-Type": "application/json",
24
+ "Authorization": f"Bearer {self.api_key}"
25
+ }
26
+
27
+ if self.video_url and not self.video_path:
28
+ self.download_video()
29
+
30
+ def download_video(self):
31
+ self.video_path = "./tmp/video.mp4"
32
+ password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
33
+ password_mgr.add_password(None, self.video_url, TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN)
34
+ handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
35
+ opener = urllib.request.build_opener(handler)
36
+ urllib.request.install_opener(opener)
37
+ urllib.request.urlretrieve(self.video_url, self.video_path)
38
+
39
+ def extract_images(self):
40
+ video = VideoFileClip(self.video_path)
41
+ duration = video.duration
42
+ time_intervals = [i * (duration / (self.num_images + 1)) for i in range(1, self.num_images + 1)]
43
+ self.image_filenames = [f"./tmp/image_{i}.jpg" for i in range(self.num_images)]
44
+
45
+ for i, t in enumerate(time_intervals):
46
+ video.save_frame(self.image_filenames[i], t=t)
47
+
48
+ def encode_image(self, image_path):
49
+ with open(image_path, "rb") as image_file:
50
+ return base64.b64encode(image_file.read()).decode('utf-8')
51
+
52
+ def analyze_images(self):
53
+ traits_list = []
54
+
55
+ for image_filename in tqdm(self.image_filenames, desc="Processing images"):
56
+ base64_image = self.encode_image(image_filename)
57
+ payload = {
58
+ "model": "gpt-4o",
59
+ "messages": [
60
+ {
61
+ "role": "user",
62
+ "content": [
63
+ {
64
+ "type": "text",
65
+ "text": "Describe the person in this image. Be detailed as it pertains to their physical and emotional state. Please return the response in a list of traits or characteristics in a comma-separated list."
66
+ },
67
+ {
68
+ "type": "image_url",
69
+ "image_url": {
70
+ "url": f"data:image/jpeg;base64,{base64_image}"
71
+ }
72
+ }
73
+ ]
74
+ }
75
+ ],
76
+ "max_tokens": 300
77
+ }
78
+
79
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers, json=payload)
80
+ response_data = response.json()
81
+ description = response_data['choices'][0]['message']['content']
82
+ traits = description.split(",")
83
+ traits_list.extend(traits)
84
+
85
+ return traits_list
86
+
87
+ def clean_trait(self, trait):
88
+ # Remove line breaks, leading/trailing whitespace, and unnecessary dashes
89
+ cleaned_trait = trait.replace('\n', '').strip()
90
+ cleaned_trait = re.sub(r'^-+', '', cleaned_trait).strip()
91
+ cleaned_trait = cleaned_trait.rstrip('.')
92
+
93
+ # If the cleaned trait has more than 4 words, it's likely a sentence, so let's remove it
94
+ if len(cleaned_trait.split()) > 4:
95
+ return None
96
+
97
+ return cleaned_trait
98
+
99
+ def retrieve_traits(self):
100
+ self.extract_images()
101
+ traits = self.analyze_images()
102
+
103
+ cleaned_traits = [self.clean_trait(trait) for trait in traits]
104
+ cleaned_traits = [trait for trait in cleaned_traits if trait]
105
+ common_traits = list(set(cleaned_traits))
106
+ return common_traits
107
+
108
+ if __name__ == "__main__":
109
+ # Use either video_path or video_url
110
+ # video_path = "./raw/zach.mov"
111
+ video_url = "https://video.twilio.com/v1/Recordings/RT2c1baf50b6343802964c98e5a6f979e3/Media"
112
+ analyzer = VideoAnalyzer(video_url=video_url)
113
+ traits = analyzer.retrieve_traits()
114
+ print(traits)