hivecorp commited on
Commit
7697af6
·
verified ·
1 Parent(s): bccb8c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -28
app.py CHANGED
@@ -23,17 +23,21 @@ def format_time(seconds):
23
  # Function to split text into segments by punctuation or limit to 7-8 words
24
  def split_text_into_segments(text):
25
  segments = []
 
26
  raw_segments = re.split(r'([.!?])', text)
27
  for i in range(0, len(raw_segments) - 1, 2):
 
28
  sentence = raw_segments[i].strip() + raw_segments[i + 1]
29
  words = sentence.split()
30
 
 
31
  if len(words) > 8:
32
  for j in range(0, len(words), 8):
33
  segments.append(" ".join(words[j:j+8]))
34
  else:
35
  segments.append(sentence.strip())
36
 
 
37
  if len(raw_segments) % 2 == 1:
38
  remaining_text = raw_segments[-1].strip()
39
  words = remaining_text.split()
@@ -43,22 +47,27 @@ def split_text_into_segments(text):
43
  return segments
44
 
45
  # Function to generate SRT with accurate timing per batch and cross-check timing
46
- async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, voice, rate):
47
  audio_file = f"batch_{batch_num}_audio.wav"
48
 
49
  # Generate the audio using edge-tts with pitch and rate adjustment
50
  tts = edge_tts.Communicate(batch_text, voice, rate=f"{rate}%", pitch=f"{pitch}Hz")
51
  await tts.save(audio_file)
52
 
 
53
  actual_length = get_audio_length(audio_file)
 
 
54
  segments = split_text_into_segments(batch_text)
55
- segment_duration = actual_length / len(segments)
56
  start_time = start_offset
57
 
 
58
  srt_content = ""
59
  for index, segment in enumerate(segments):
60
  end_time = start_time + segment_duration
61
 
 
62
  if end_time > start_offset + actual_length:
63
  end_time = start_offset + actual_length
64
 
@@ -66,28 +75,35 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, voic
66
  srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
67
  srt_content += segment + "\n\n"
68
 
 
69
  start_time = end_time
70
 
71
- return srt_content, audio_file, start_time
72
 
73
  # Batch processing function with cumulative timing, progress indicator, and final SRT validation
74
- async def batch_process_srt_and_audio(script_text, pitch, voice, rate, progress=gr.Progress()):
75
  batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
76
  all_srt_content = ""
77
  combined_audio = AudioSegment.empty()
78
- start_offset = 0.0
79
 
 
80
  for batch_num, batch_text in enumerate(batches):
81
- srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, voice, rate)
82
  all_srt_content += srt_content
83
 
 
84
  batch_audio = AudioSegment.from_file(audio_file)
85
  combined_audio += batch_audio
86
- start_offset = end_offset
87
 
 
88
  os.remove(audio_file)
 
 
89
  progress((batch_num + 1) / len(batches))
90
 
 
91
  total_audio_length = combined_audio.duration_seconds
92
  validated_srt_content = ""
93
  for line in all_srt_content.strip().splitlines():
@@ -100,50 +116,42 @@ async def batch_process_srt_and_audio(script_text, pitch, voice, rate, progress=
100
  line = f"{format_time(start_time)} --> {format_time(end_time)}"
101
  validated_srt_content += line + "\n"
102
 
 
103
  unique_id = uuid.uuid4()
104
- final_audio_path = f"final_audio_{unique_id}.mp3"
105
  final_srt_path = f"final_subtitles_{unique_id}.srt"
106
 
 
107
  combined_audio.export(final_audio_path, format="mp3", bitrate="320k")
108
 
 
109
  with open(final_srt_path, "w") as srt_file:
110
  srt_file.write(validated_srt_content)
111
 
112
  return final_srt_path, final_audio_path
113
 
114
  # Gradio interface function
115
- async def process_script(script_text, pitch, voice, rate):
116
- srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch, voice, rate)
117
  return srt_path, audio_path, audio_path
118
 
119
- # List of available voices
120
- voices = {
121
- "Jenny": "en-US-JennyNeural",
122
- "Guy": "en-US-GuyNeural",
123
- "Ana": "en-US-AnaNeural",
124
- "Aria": "en-US-AriaNeural",
125
- "Brian": "en-US-BrianNeural",
126
- "Christopher": "en-US-ChristopherNeural",
127
- "Eric": "en-US-EricNeural",
128
- "Michelle": "en-US-MichelleNeural",
129
- "Roger": "en-US-RogerNeural",
130
- }
131
-
132
- # Gradio interface setup with voice selection and speech rate adjustment
133
  app = gr.Interface(
134
  fn=process_script,
135
  inputs=[
136
  gr.Textbox(label="Enter Script Text", lines=10),
137
- gr.Dropdown(label="Select Voice", choices=list(voices.keys()), value="Jenny"),
138
- gr.Slider(label="Speech Rate Adjustment (%)", minimum=0, maximum=2, step=0.1, value=1),
139
- gr.Slider(label="Pitch Adjustment (Hz)", minimum=-100, maximum=100, step=1, value=1)
140
  ],
141
  outputs=[
142
  gr.File(label="Download SRT File"),
143
  gr.File(label="Download Audio File"),
144
  gr.Audio(label="Play Audio")
145
  ],
146
- description="HIVEcorp TTS Generator with customizable voice, speech rate, and pitch adjustments."
147
  )
148
 
149
  app.launch()
 
23
  # Function to split text into segments by punctuation or limit to 7-8 words
24
  def split_text_into_segments(text):
25
  segments = []
26
+ # Split by punctuation (., !, ?)
27
  raw_segments = re.split(r'([.!?])', text)
28
  for i in range(0, len(raw_segments) - 1, 2):
29
+ # Combine segment with following punctuation
30
  sentence = raw_segments[i].strip() + raw_segments[i + 1]
31
  words = sentence.split()
32
 
33
+ # If segment is longer than 8 words, split into 7-8 word chunks
34
  if len(words) > 8:
35
  for j in range(0, len(words), 8):
36
  segments.append(" ".join(words[j:j+8]))
37
  else:
38
  segments.append(sentence.strip())
39
 
40
+ # Handle remaining text after the last punctuation
41
  if len(raw_segments) % 2 == 1:
42
  remaining_text = raw_segments[-1].strip()
43
  words = remaining_text.split()
 
47
  return segments
48
 
49
  # Function to generate SRT with accurate timing per batch and cross-check timing
50
+ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice):
51
  audio_file = f"batch_{batch_num}_audio.wav"
52
 
53
  # Generate the audio using edge-tts with pitch and rate adjustment
54
  tts = edge_tts.Communicate(batch_text, voice, rate=f"{rate}%", pitch=f"{pitch}Hz")
55
  await tts.save(audio_file)
56
 
57
+ # Get the actual length of the audio file
58
  actual_length = get_audio_length(audio_file)
59
+
60
+ # Split the text into segments based on punctuation and word count
61
  segments = split_text_into_segments(batch_text)
62
+ segment_duration = actual_length / len(segments) # Duration per segment
63
  start_time = start_offset
64
 
65
+ # Initialize SRT content
66
  srt_content = ""
67
  for index, segment in enumerate(segments):
68
  end_time = start_time + segment_duration
69
 
70
+ # If end_time exceeds actual audio length of the batch, adjust it
71
  if end_time > start_offset + actual_length:
72
  end_time = start_offset + actual_length
73
 
 
75
  srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
76
  srt_content += segment + "\n\n"
77
 
78
+ # Update start time for next segment
79
  start_time = end_time
80
 
81
+ return srt_content, audio_file, start_time # Return updated start time for cumulative tracking
82
 
83
  # Batch processing function with cumulative timing, progress indicator, and final SRT validation
84
+ async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=gr.Progress()):
85
  batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
86
  all_srt_content = ""
87
  combined_audio = AudioSegment.empty()
88
+ start_offset = 0.0 # Track cumulative time offset for SRT timing
89
 
90
+ # Process each batch sequentially to ensure proper timing and cumulative offset tracking
91
  for batch_num, batch_text in enumerate(batches):
92
+ srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice)
93
  all_srt_content += srt_content
94
 
95
+ # Append the audio of each batch to the combined audio
96
  batch_audio = AudioSegment.from_file(audio_file)
97
  combined_audio += batch_audio
98
+ start_offset = end_offset # Update the start offset for the next batch
99
 
100
+ # Clean up the individual batch audio file
101
  os.remove(audio_file)
102
+
103
+ # Update progress
104
  progress((batch_num + 1) / len(batches))
105
 
106
+ # Final cross-check: Adjust any subtitle that exceeds the total audio length
107
  total_audio_length = combined_audio.duration_seconds
108
  validated_srt_content = ""
109
  for line in all_srt_content.strip().splitlines():
 
116
  line = f"{format_time(start_time)} --> {format_time(end_time)}"
117
  validated_srt_content += line + "\n"
118
 
119
+ # Generate unique names for the final files
120
  unique_id = uuid.uuid4()
121
+ final_audio_path = f"final_audio_{unique_id}.mp3" # Set to MP3
122
  final_srt_path = f"final_subtitles_{unique_id}.srt"
123
 
124
+ # Export combined audio directly as MP3 with 320 kbps bitrate
125
  combined_audio.export(final_audio_path, format="mp3", bitrate="320k")
126
 
127
+ # Export validated SRT with unique names
128
  with open(final_srt_path, "w") as srt_file:
129
  srt_file.write(validated_srt_content)
130
 
131
  return final_srt_path, final_audio_path
132
 
133
  # Gradio interface function
134
+ async def process_script(script_text, pitch, rate, voice):
135
+ srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch, rate, voice)
136
  return srt_path, audio_path, audio_path
137
 
138
+ # Gradio interface setup with pitch adjustment slider, rate adjustment slider, and voice selection
139
+ voice_options = ["en-US-AndrewNeural", "en-US-JennyNeural", "en-US-GuyNeural"] # Example voice options
140
+
 
 
 
 
 
 
 
 
 
 
 
141
  app = gr.Interface(
142
  fn=process_script,
143
  inputs=[
144
  gr.Textbox(label="Enter Script Text", lines=10),
145
+ gr.Slider(label="Pitch Adjustment (Hz)", minimum=-100, maximum=100, step=1, value=0),
146
+ gr.Slider(label="Rate Adjustment (%)", minimum=-100, maximum=100, step=1, value=0),
147
+ gr.Dropdown(label="Select Speaker", choices=voice_options, value=voice_options[0]) # Dropdown for voice selection
148
  ],
149
  outputs=[
150
  gr.File(label="Download SRT File"),
151
  gr.File(label="Download Audio File"),
152
  gr.Audio(label="Play Audio")
153
  ],
154
+ description="HIVEcorp TTS Generator with adjustable pitch, rate, and speaker selection."
155
  )
156
 
157
  app.launch()