Spaces:
Running
Running
File size: 4,427 Bytes
8554b58 e1fe3b9 73b5748 8554b58 73b5748 8554b58 5716a3c 8554b58 e1fe3b9 8554b58 1c35da3 45676fb 1c35da3 8554b58 e1fe3b9 8554b58 e1fe3b9 8554b58 e1fe3b9 8554b58 e1fe3b9 8554b58 e1fe3b9 8554b58 e1fe3b9 8554b58 e1fe3b9 8554b58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# this is a script that transcibe downloaded youtube video using deepgram
# the audio should be cleaned with UVR5 first, so the file is flac
# it will upload the full length of interview or podcast to deepgram
# and will return the speaker id. User must manually listen audio clip to find out which speaker is wanted
# discard remaining speakers and short length audio
#
import os
from dotenv import load_dotenv
from pydub import AudioSegment
import math
from os.path import join
import shutil
from deepgram import (
DeepgramClient,
PrerecordedOptions,
FileSource,
)
def write_csv_file(csv_file, csv_data):
with open(csv_file, 'w') as file:
# Iterate over each row in the data
for row in csv_data:
# Create a string where each field is separated by a '|'
row_string = '|'.join(str(item) for item in row)
# Write the string to the file, followed by a newline character
file.write(row_string + '\n')
print(f"Data written to {csv_file}")
def process(audio_file, tag, progress):
load_dotenv("myenv-variable.env")
# Path to the audio file
AUDIO_FILE = audio_file #audio name
TAGS = tag # youtube source, for categorization
API_KEY = os.getenv('API_DEEPGRAM')
original_parent_folder = os.getcwd()
output_folder = join(original_parent_folder, "output")
if os.path.isdir(output_folder):
shutil.rmtree(output_folder)
if os.path.exists("output.zip"):
os.remove("output.zip")
os.mkdir(output_folder)
deepgram = DeepgramClient(API_KEY)
with open(AUDIO_FILE, "rb") as file:
buffer_data = file.read()
payload: FileSource = {
"buffer": buffer_data,
}
#STEP 2: Configure Deepgram options for audio analysis
options = PrerecordedOptions(
model="nova-2",
smart_format=True,
filler_words=True,
diarize=True
)
progress(0.20)
try:
response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options)
except Exception as e:
print(e)
progress(0.30)
audio = AudioSegment.from_file(AUDIO_FILE)
data = response
paragraphs = data['results']['channels'][0]['alternatives'][0]['paragraphs']['paragraphs']
csv_data_dict = dict()
i=1
progress(0.40)
for paragraph in progress.tqdm(paragraphs, desc="Generating..."):
sentences = paragraph['sentences']
for text in sentences:
# convert the start and end time of the sentence to ms, add +- 5ms buffer to it
start_time_ms = math.floor(text['start']*1000)-5
end_time_ms = math.ceil(text['end']*1000)+5
duration_s = round(text['end']-text['start'],3)
duration_ms = str(end_time_ms-start_time_ms).zfill(6)
if duration_s < 2:
continue
speaker_id = paragraph['speaker']
folder_path = join(output_folder, "Speaker_"+str(speaker_id))
if not os.path.isdir(folder_path):
os.mkdir(folder_path)
csv_data_dict.update({str(speaker_id): [["filename", "speaker", "text", "start_time", "end_time", "duration"]]})
print(csv_data_dict)
if speaker_id == 10:
speaker_id = "Tayr"
# Slice the audio segment
segment = audio[start_time_ms:end_time_ms]
# Generate file name
file_name = join("wavs",f"{TAGS}_Speaker_{speaker_id}_i{str(i).zfill(3)}_d{duration_ms}.wav")
# Export the segment to temp folder
temp_folder = join(folder_path,f"{TAGS}_Speaker_{speaker_id}_i{str(i).zfill(3)}_d{duration_ms}.wav")
segment.export(temp_folder, format="wav")
# Add data to CSV list
csv_data_dict[str(speaker_id)].append([file_name, speaker_id, text['text'], start_time_ms, end_time_ms, duration_s])
i += 1
progress(0.80)
# write output.txt file
for key, value in csv_data_dict.items():
# Specify the filename
speaker_folder = join(output_folder, f"Speaker_{key}")
csv_filename = join(speaker_folder,f"Speaker_{key}_{TAGS}_output.txt")
write_csv_file(csv_filename, value)
progress(0.90)
shutil.make_archive("output", 'zip', output_folder)
progress(1.00)
return "output.zip"
|