ai_agents / utils /audit /audit_audio.py
Ilyas KHIAT
first push
56a3465
raw
history blame
1.78 kB
import numpy as np
import scipy.io.wavfile as wavfile
from pydub import AudioSegment
import io
import tiktoken
from openai import OpenAI
def transcript_audio_func(audio_file):
client = OpenAI()
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
return transcription.text
def count_tokens(input_string: str) -> int:
tokenizer = tiktoken.get_encoding("cl100k_base")
tokens = tokenizer.encode(input_string)
return len(tokens)
# Function to calculate SNR
def calculate_snr(audio_data):
signal = audio_data
noise = audio_data - np.mean(audio_data)
signal_power = np.mean(signal ** 2)
noise_power = np.mean(noise ** 2)
snr = 10 * np.log10(signal_power / noise_power)
return snr
# Function to evaluate audio quality
def evaluate_audio_quality(file) -> dict:
try:
audio = AudioSegment.from_file(file)
except:
audio = AudioSegment.from_file(io.BytesIO(file.read()))
audio_data = np.array(audio.get_array_of_samples())
#number of minutes
duration = len(audio_data) / audio.frame_rate*2 / 60
# Calculate volume
volume = audio.dBFS
# Calculate SNR
snr = calculate_snr(audio_data)
#get the transcription of the audio
transcription = transcript_audio_func(file)
audit = {
"volume": volume,
"SNR": snr,
"duration": duration,
"number_of_tokens": count_tokens(transcription),
"number_of_words": len(transcription.split())
}
content = {
"transcription": transcription,
"audio_data": audio_data,
"frame_rate": audio.frame_rate
}
audit_global = {
"audit": audit,
"content": content
}
return audit_global