hivecorp commited on
Commit
27bfe3b
·
verified ·
1 Parent(s): 22a64e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -19
app.py CHANGED
@@ -33,22 +33,23 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset):
33
  # Initialize SRT content
34
  srt_content = ""
35
  words = batch_text.split()
36
- num_segments = max(1, len(words) // 10) # Calculate number of segments based on ~10 words per segment
37
  segment_duration = actual_length / num_segments # Duration for each segment
38
  start_time = start_offset
 
39
 
40
  # Build SRT content with accurate timing
41
- for i in range(0, len(words), 10):
42
- segment_words = words[i:i+10]
43
- end_time = start_time + segment_duration
44
- srt_content += f"{i // 10 + 1 + (batch_num * 100)}\n"
45
  srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
46
  srt_content += " ".join(segment_words) + "\n\n"
47
  start_time = end_time
48
 
49
- return srt_content, audio_file, start_time
50
 
51
- # Batch processing function with concurrent processing and progress indicator
52
  async def batch_process_srt_and_audio(script_text, progress=gr.Progress()):
53
  batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
54
  all_srt_content = ""
@@ -56,16 +57,8 @@ async def batch_process_srt_and_audio(script_text, progress=gr.Progress()):
56
  start_offset = 0.0 # Track cumulative time offset for SRT timing
57
 
58
  # Prepare tasks for concurrent batch processing
59
- tasks = [
60
- generate_accurate_srt(batch_text, batch_num, start_offset)
61
- for batch_num, batch_text in enumerate(batches)
62
- ]
63
-
64
- # Execute tasks concurrently and update progress after each completion
65
- completed_tasks = 0
66
- total_tasks = len(tasks)
67
- for task in asyncio.as_completed(tasks):
68
- srt_content, audio_file, end_offset = await task
69
  all_srt_content += srt_content
70
 
71
  # Append the audio of each batch to the combined audio
@@ -77,8 +70,7 @@ async def batch_process_srt_and_audio(script_text, progress=gr.Progress()):
77
  os.remove(audio_file)
78
 
79
  # Update progress
80
- completed_tasks += 1
81
- progress(completed_tasks / total_tasks)
82
 
83
  # Generate unique names for the final files
84
  unique_id = uuid.uuid4()
 
33
  # Initialize SRT content
34
  srt_content = ""
35
  words = batch_text.split()
36
+ num_segments = max(1, len(words) // 15) # Group words into segments of ~15 words each
37
  segment_duration = actual_length / num_segments # Duration for each segment
38
  start_time = start_offset
39
+ min_display_duration = 1.5 # Set a minimum display time of 1.5 seconds per subtitle
40
 
41
  # Build SRT content with accurate timing
42
+ for i in range(0, len(words), 15): # Process ~15 words per subtitle
43
+ segment_words = words[i:i+15]
44
+ end_time = start_time + max(segment_duration, min_display_duration) # Ensure each subtitle shows at least min_display_duration
45
+ srt_content += f"{i // 15 + 1 + (batch_num * 100)}\n"
46
  srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
47
  srt_content += " ".join(segment_words) + "\n\n"
48
  start_time = end_time
49
 
50
+ return srt_content, audio_file, start_time # Return updated start time for cumulative tracking
51
 
52
+ # Batch processing function with cumulative timing and progress indicator
53
  async def batch_process_srt_and_audio(script_text, progress=gr.Progress()):
54
  batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
55
  all_srt_content = ""
 
57
  start_offset = 0.0 # Track cumulative time offset for SRT timing
58
 
59
  # Prepare tasks for concurrent batch processing
60
+ for batch_num, batch_text in enumerate(batches):
61
+ srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset)
 
 
 
 
 
 
 
 
62
  all_srt_content += srt_content
63
 
64
  # Append the audio of each batch to the combined audio
 
70
  os.remove(audio_file)
71
 
72
  # Update progress
73
+ progress((batch_num + 1) / len(batches))
 
74
 
75
  # Generate unique names for the final files
76
  unique_id = uuid.uuid4()