Spaces:

Filteroff
/

video_audio_analyzer

Running on T4

App Files Files Community

video_audio_analyzer / playground.py

beweinreich

first

1727a7e 4 months ago

raw

history blame

3.05 kB

	import os
	import json
	import base64
	import requests
	from openai import OpenAI
	from dotenv import load_dotenv
	from moviepy.editor import VideoFileClip

	load_dotenv()

	audio_filename = "extracted_audio.wav"
	image_filename = "extracted_image.jpg"

	api_key = os.getenv("OPENAI_API_KEY")
	client = OpenAI(api_key=api_key)

	video = VideoFileClip("zach.mov")
	audio = video.audio
	audio.write_audiofile(audio_filename)

	def encode_image(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')

	audio_file= open(audio_filename, "rb")

	# Extract an image halfway through the video
	halfway_time = video.duration / 2
	video.save_frame(image_filename, t=halfway_time)

	transcription = client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file
	)
	video_text = transcription.text.strip()


	# Analyze sentiment using GPT-4
	prompt = f"""Analyze the sentiment of the following text:\n\n{video_text}

	You should respond in json format, as an object with key `response` and value as a string.
	"""
	completion = client.chat.completions.create(
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	],
	model="gpt-3.5-turbo-1106",
	response_format={"type": "json_object"},
	)
	response = completion.choices[0].message.content
	result = json.loads(response)
	parsed = result['response']
	print(parsed)


	# Analyze sentiment using GPT-4
	prompt = f"""Analyze the personality traits of the speaker in the following text:\n\n{video_text}

	You should respond in json format, as an object with key `response` and value as an array of personality traits, like "funny", "happy", "sarcastic".
	"""
	completion = client.chat.completions.create(
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	],
	model="gpt-3.5-turbo-1106",
	response_format={"type": "json_object"},
	)
	response = completion.choices[0].message.content
	result = json.loads(response)
	parsed = result['response']
	print(parsed)




	# Function to encode the image
	def encode_image(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')


	# Getting the base64 string
	base64_image = encode_image(image_filename)

	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}"
	}

	payload = {
	"model": "gpt-4o",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": "Describe the person in this image. Be detailed."
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	}
	]
	}
	],
	"max_tokens": 300
	}

	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

	json_data = response.json()
	parsed = json_data['choices'][0]['message']['content']
	print(parsed)