hivecorp commited on
Commit
ced46ea
·
verified ·
1 Parent(s): 3000f97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -90
app.py CHANGED
@@ -1,109 +1,89 @@
1
  import gradio as gr
 
2
  import edge_tts
3
  import os
4
  import asyncio
5
- import re
6
- from datetime import timedelta
7
- from pydub import AudioSegment # Requires `pydub`
8
-
9
- # Split text into 500-word batches for large scripts
10
- def split_text(text, max_words=500):
11
- words = text.split()
12
- return [' '.join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
13
-
14
- # Split batch into SRT sections (e.g., 8-10 words per section)
15
- def generate_srt_sections(text, words_per_segment=8):
16
- words = re.split(r'(\s+)', text) # Split with spaces to keep punctuation
17
- srt_sections = []
18
- section_text = []
19
-
20
- for word in words:
21
- section_text.append(word)
22
- if len(section_text) >= words_per_segment or word.endswith(('.', '!', '?')):
23
- srt_sections.append(''.join(section_text).strip())
24
- section_text = []
25
-
26
- if section_text:
27
- srt_sections.append(''.join(section_text).strip())
28
-
29
- return srt_sections
30
-
31
- # Generate audio for a single SRT section and return its length
32
- async def generate_audio_for_section(text, filename):
33
- communicate = edge_tts.Communicate(text, "en-US-GuyNeural")
34
- await communicate.save(filename)
35
- audio = AudioSegment.from_file(filename)
36
- return len(audio) / 1000 # Duration in seconds
37
-
38
- # Create accurate SRT for a batch with cross-check mechanism
39
- def generate_accurate_srt(batch_text, estimated_rate=0.5):
40
- sections = generate_srt_sections(batch_text)
41
  srt_content = ""
42
- index = 1
43
  start_time = 0.0
44
-
45
- for section in sections:
46
- # Generate and cross-check audio for the section
47
- audio_file = f"temp_audio_{index}.mp3"
48
- asyncio.run(generate_audio_for_section(section, audio_file))
49
-
50
- # Measure actual audio length for precise timing
51
- actual_length = get_audio_length(audio_file)
52
- end_time = start_time + actual_length
53
-
54
- # Create SRT format for each section
55
- start_timestamp = str(timedelta(seconds=start_time))
56
- end_timestamp = str(timedelta(seconds=end_time))
57
-
58
- srt_content += f"{index}\n{start_timestamp} --> {end_timestamp}\n{section}\n\n"
59
-
60
  start_time = end_time
61
- index += 1
62
 
63
- return srt_content
64
 
65
- # Batch processing with section-wise cross-checking
66
- def batch_process_srt_and_audio(script):
67
- batches = split_text(script, max_words=500)
68
- final_srt_content = ""
69
- audio_files = []
70
 
71
- for batch_index, batch_text in enumerate(batches):
72
- # Generate precise SRT for each batch with individual section cross-checking
73
- srt_content = generate_accurate_srt(batch_text)
74
- final_srt_content += srt_content
75
 
76
- # Generate final batch audio and store
77
- batch_audio_file = f"batch_audio_{batch_index}.mp3"
78
- asyncio.run(generate_audio_for_section(batch_text, batch_audio_file))
79
- audio_files.append(batch_audio_file)
80
 
81
- # Save final SRT file
82
- final_srt_path = "final_output.srt"
83
- with open(final_srt_path, "w") as f:
84
- f.write(final_srt_content)
85
 
86
- # Combine all batch audio files
87
- final_audio_path = "final_combined_audio.mp3"
88
- combine_audio_files(audio_files, final_audio_path)
 
89
 
90
- return final_srt_path, final_audio_path
91
 
92
- # Combine audio files into one output file
93
- def combine_audio_files(audio_files, output_file):
94
- combined = AudioSegment.empty()
95
- for file in audio_files:
96
- combined += AudioSegment.from_file(file)
97
- combined.export(output_file, format="mp3")
98
 
99
- # Gradio Interface
100
  app = gr.Interface(
101
- fn=batch_process_srt_and_audio,
102
- inputs=gr.Textbox(lines=10, label="Input Script"),
103
- outputs=[gr.File(label="Download SRT"), gr.File(label="Download Audio")],
104
- title="Accurate Batch SRT & Audio Generator with Cross-Check",
105
- description="Enter a script to generate synchronized SRT and audio files with section-wise accuracy."
 
 
106
  )
107
 
108
- if __name__ == "__main__":
109
- app.launch()
 
1
  import gradio as gr
2
+ from pydub import AudioSegment
3
  import edge_tts
4
  import os
5
  import asyncio
6
+
7
+ # Function to get the length of an audio file in seconds
8
+ def get_audio_length(audio_file):
9
+ audio = AudioSegment.from_file(audio_file)
10
+ return audio.duration_seconds
11
+
12
+ # Function to format time for SRT
13
+ def format_time(seconds):
14
+ millis = int((seconds % 1) * 1000)
15
+ seconds = int(seconds)
16
+ hrs = seconds // 3600
17
+ mins = (seconds % 3600) // 60
18
+ secs = seconds % 60
19
+ return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
20
+
21
+ # Function to generate SRT with accurate timing per batch
22
+ async def generate_accurate_srt(batch_text, batch_num):
23
+ audio_file = f"batch_{batch_num}_audio.wav"
24
+
25
+ # Generate the audio using edge-tts
26
+ tts = edge_tts.Communicate(batch_text, "en-US-JennyNeural")
27
+ await tts.save(audio_file)
28
+
29
+ # Get the actual length of the audio file
30
+ actual_length = get_audio_length(audio_file)
31
+
32
+ # Initialize SRT content
 
 
 
 
 
 
 
 
 
33
  srt_content = ""
34
+ words = batch_text.split()
35
  start_time = 0.0
36
+ segment_duration = actual_length / len(words) * 10 # Assuming ~10 words per SRT segment
37
+
38
+ # Build SRT content with accurate timing
39
+ for i in range(0, len(words), 10):
40
+ segment_words = words[i:i+10]
41
+ end_time = start_time + segment_duration
42
+ srt_content += f"{i // 10 + 1}\n"
43
+ srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
44
+ srt_content += " ".join(segment_words) + "\n\n"
 
 
 
 
 
 
 
45
  start_time = end_time
 
46
 
47
+ return srt_content, audio_file
48
 
49
+ # Batch processing function for SRT and audio generation
50
+ async def batch_process_srt_and_audio(script_text):
51
+ batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
52
+ all_srt_content = ""
53
+ combined_audio = AudioSegment.empty()
54
 
55
+ for batch_num, batch_text in enumerate(batches):
56
+ srt_content, audio_file = await generate_accurate_srt(batch_text, batch_num)
57
+ all_srt_content += srt_content
 
58
 
59
+ # Append the audio of each batch to the combined audio
60
+ batch_audio = AudioSegment.from_file(audio_file)
61
+ combined_audio += batch_audio
 
62
 
63
+ # Clean up the individual batch audio file
64
+ os.remove(audio_file)
 
 
65
 
66
+ # Export combined audio and SRT
67
+ combined_audio.export("final_audio.wav", format="wav")
68
+ with open("final_subtitles.srt", "w") as srt_file:
69
+ srt_file.write(all_srt_content)
70
 
71
+ return "final_subtitles.srt", "final_audio.wav"
72
 
73
+ # Gradio interface function
74
+ async def process_script(script_text):
75
+ srt_path, audio_path = await batch_process_srt_and_audio(script_text)
76
+ return srt_path, audio_path
 
 
77
 
78
+ # Gradio interface setup
79
  app = gr.Interface(
80
+ fn=process_script,
81
+ inputs=gr.Textbox(label="Enter Script Text", lines=10),
82
+ outputs=[
83
+ gr.File(label="Download SRT File"),
84
+ gr.File(label="Download Audio File")
85
+ ],
86
+ description="Upload your script text, and the app will generate audio and an accurate SRT file for download."
87
  )
88
 
89
+ app.launch()