Spaces:

Filteroff
/

video_audio_analyzer

Sleeping

File size: 3,046 Bytes

1727a7e

import os
import json
import base64
import requests
from openai import OpenAI
from dotenv import load_dotenv
from moviepy.editor import VideoFileClip

load_dotenv()

audio_filename = "extracted_audio.wav"
image_filename = "extracted_image.jpg"

api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

video = VideoFileClip("zach.mov")
audio = video.audio
audio.write_audiofile(audio_filename)

def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

audio_file= open(audio_filename, "rb")

# Extract an image halfway through the video
halfway_time = video.duration / 2
video.save_frame(image_filename, t=halfway_time)

transcription = client.audio.transcriptions.create(
  model="whisper-1", 
  file=audio_file
)
video_text = transcription.text.strip()


# Analyze sentiment using GPT-4
prompt = f"""Analyze the sentiment of the following text:\n\n{video_text}

You should respond in json format, as an object with key `response` and value as a string.
"""
completion = client.chat.completions.create(
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ],
    model="gpt-3.5-turbo-1106",
    response_format={"type": "json_object"},
)
response = completion.choices[0].message.content
result = json.loads(response)
parsed = result['response']
print(parsed)


# Analyze sentiment using GPT-4
prompt = f"""Analyze the personality traits of the speaker in the following text:\n\n{video_text}

You should respond in json format, as an object with key `response` and value as an array of personality traits, like "funny", "happy", "sarcastic".
"""
completion = client.chat.completions.create(
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ],
    model="gpt-3.5-turbo-1106",
    response_format={"type": "json_object"},
)
response = completion.choices[0].message.content
result = json.loads(response)
parsed = result['response']
print(parsed)




# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')


# Getting the base64 string
base64_image = encode_image(image_filename)

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}

payload = {
  "model": "gpt-4o",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "Describe the person in this image. Be detailed."
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}"
          }
        }
      ]
    }
  ],
  "max_tokens": 300
}

response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

json_data = response.json()
parsed = json_data['choices'][0]['message']['content']
print(parsed)