Spaces:
Sleeping
Sleeping
File size: 3,046 Bytes
1727a7e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import os
import json
import base64
import requests
from openai import OpenAI
from dotenv import load_dotenv
from moviepy.editor import VideoFileClip
load_dotenv()
audio_filename = "extracted_audio.wav"
image_filename = "extracted_image.jpg"
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)
video = VideoFileClip("zach.mov")
audio = video.audio
audio.write_audiofile(audio_filename)
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
audio_file= open(audio_filename, "rb")
# Extract an image halfway through the video
halfway_time = video.duration / 2
video.save_frame(image_filename, t=halfway_time)
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
video_text = transcription.text.strip()
# Analyze sentiment using GPT-4
prompt = f"""Analyze the sentiment of the following text:\n\n{video_text}
You should respond in json format, as an object with key `response` and value as a string.
"""
completion = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
model="gpt-3.5-turbo-1106",
response_format={"type": "json_object"},
)
response = completion.choices[0].message.content
result = json.loads(response)
parsed = result['response']
print(parsed)
# Analyze sentiment using GPT-4
prompt = f"""Analyze the personality traits of the speaker in the following text:\n\n{video_text}
You should respond in json format, as an object with key `response` and value as an array of personality traits, like "funny", "happy", "sarcastic".
"""
completion = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
model="gpt-3.5-turbo-1106",
response_format={"type": "json_object"},
)
response = completion.choices[0].message.content
result = json.loads(response)
parsed = result['response']
print(parsed)
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# Getting the base64 string
base64_image = encode_image(image_filename)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe the person in this image. Be detailed."
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 300
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
json_data = response.json()
parsed = json_data['choices'][0]['message']['content']
print(parsed) |