Diarization now supports version selection, with the default set to speaker-diarization-3.1.
Browse files- app.py +13 -7
- requirements-fasterWhisper.txt +1 -1
- requirements-whisper.txt +1 -1
- requirements.txt +1 -1
- src/config.py +3 -0
- src/diarization/diarization.py +11 -10
- src/diarization/diarizationContainer.py +10 -7
- src/utils.py +14 -8
app.py
CHANGED
@@ -19,7 +19,6 @@ from src.diarization.diarization import Diarization
|
|
19 |
from src.diarization.diarizationContainer import DiarizationContainer
|
20 |
from src.hooks.progressListener import ProgressListener
|
21 |
from src.hooks.subTaskProgressListener import SubTaskProgressListener
|
22 |
-
from src.hooks.whisperProgressHook import create_progress_listener_handle
|
23 |
from src.modelCache import ModelCache
|
24 |
from src.prompts.jsonPromptStrategy import JsonPromptStrategy
|
25 |
from src.prompts.prependPromptStrategy import PrependPromptStrategy
|
@@ -32,7 +31,7 @@ import ffmpeg
|
|
32 |
# UI
|
33 |
import gradio as gr
|
34 |
|
35 |
-
from src.download import ExceededMaximumDuration
|
36 |
from src.utils import optional_int, slugify, str2bool, write_srt, write_srt_original, write_vtt
|
37 |
from src.vad import AbstractTranscription, NonSpeechStrategy, PeriodicTranscriptionConfig, TranscriptionConfig, VadPeriodicTranscription, VadSileroTranscription
|
38 |
from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
|
@@ -100,11 +99,15 @@ class WhisperTranscriber:
|
|
100 |
self.vad_cpu_cores = min(os.cpu_count(), MAX_AUTO_CPU_CORES)
|
101 |
print("[Auto parallel] Using GPU devices " + str(self.parallel_device_list) + " and " + str(self.vad_cpu_cores) + " CPU cores for VAD/transcription.")
|
102 |
|
103 |
-
def set_diarization(self, auth_token: str, enable_daemon_process: bool = True, **kwargs):
|
|
|
|
|
104 |
if self.diarization is None:
|
105 |
self.diarization = DiarizationContainer(auth_token=auth_token, enable_daemon_process=enable_daemon_process,
|
106 |
auto_cleanup_timeout_seconds=self.app_config.diarization_process_timeout,
|
107 |
-
cache=self.model_cache)
|
|
|
|
|
108 |
# Set parameters
|
109 |
self.diarization_kwargs = kwargs
|
110 |
|
@@ -257,6 +260,7 @@ class WhisperTranscriber:
|
|
257 |
diarization_speakers: int = decodeOptions.pop("diarization_speakers", 2)
|
258 |
diarization_min_speakers: int = decodeOptions.pop("diarization_min_speakers", 1)
|
259 |
diarization_max_speakers: int = decodeOptions.pop("diarization_max_speakers", 8)
|
|
|
260 |
highlight_words: bool = decodeOptions.pop("highlight_words", False)
|
261 |
|
262 |
temperature: float = decodeOptions.pop("temperature", None)
|
@@ -290,9 +294,9 @@ class WhisperTranscriber:
|
|
290 |
|
291 |
if diarization:
|
292 |
if diarization_speakers is not None and diarization_speakers < 1:
|
293 |
-
self.set_diarization(auth_token=self.app_config.auth_token, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
|
294 |
else:
|
295 |
-
self.set_diarization(auth_token=self.app_config.auth_token, num_speakers=diarization_speakers, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers)
|
296 |
else:
|
297 |
self.unset_diarization()
|
298 |
|
@@ -1137,7 +1141,8 @@ def create_ui(app_config: ApplicationConfig):
|
|
1137 |
gr.Checkbox(label="Diarization", value=app_config.diarization, interactive=has_diarization_libs, elem_id="diarization", info="Whether to perform speaker diarization"),
|
1138 |
gr.Number(label="Diarization - Speakers", precision=0, value=app_config.diarization_speakers, interactive=has_diarization_libs, elem_id="diarization_speakers", info="The number of speakers to detect"),
|
1139 |
gr.Number(label="Diarization - Min Speakers", precision=0, value=app_config.diarization_min_speakers, interactive=has_diarization_libs, elem_id="diarization_min_speakers", info="The minimum number of speakers to detect"),
|
1140 |
-
gr.Number(label="Diarization - Max Speakers", precision=0, value=app_config.diarization_max_speakers, interactive=has_diarization_libs, elem_id="diarization_max_speakers", info="The maximum number of speakers to detect")
|
|
|
1141 |
}
|
1142 |
|
1143 |
common_output = lambda : [
|
@@ -1439,6 +1444,7 @@ if __name__ == '__main__':
|
|
1439 |
parser.add_argument("--diarization_max_speakers", type=int, default=default_app_config.diarization_max_speakers, help="Maximum number of speakers")
|
1440 |
parser.add_argument("--diarization_process_timeout", type=int, default=default_app_config.diarization_process_timeout, \
|
1441 |
help="Number of seconds before inactivate diarization processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
|
|
|
1442 |
|
1443 |
args = parser.parse_args().__dict__
|
1444 |
|
|
|
19 |
from src.diarization.diarizationContainer import DiarizationContainer
|
20 |
from src.hooks.progressListener import ProgressListener
|
21 |
from src.hooks.subTaskProgressListener import SubTaskProgressListener
|
|
|
22 |
from src.modelCache import ModelCache
|
23 |
from src.prompts.jsonPromptStrategy import JsonPromptStrategy
|
24 |
from src.prompts.prependPromptStrategy import PrependPromptStrategy
|
|
|
31 |
# UI
|
32 |
import gradio as gr
|
33 |
|
34 |
+
from src.download import ExceededMaximumDuration
|
35 |
from src.utils import optional_int, slugify, str2bool, write_srt, write_srt_original, write_vtt
|
36 |
from src.vad import AbstractTranscription, NonSpeechStrategy, PeriodicTranscriptionConfig, TranscriptionConfig, VadPeriodicTranscription, VadSileroTranscription
|
37 |
from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
|
|
|
99 |
self.vad_cpu_cores = min(os.cpu_count(), MAX_AUTO_CPU_CORES)
|
100 |
print("[Auto parallel] Using GPU devices " + str(self.parallel_device_list) + " and " + str(self.vad_cpu_cores) + " CPU cores for VAD/transcription.")
|
101 |
|
102 |
+
def set_diarization(self, auth_token: str, enable_daemon_process: bool = True, diarization_version: str = None, **kwargs):
|
103 |
+
if diarization_version == None:
|
104 |
+
diarization_version = self.app_config.diarization_version
|
105 |
if self.diarization is None:
|
106 |
self.diarization = DiarizationContainer(auth_token=auth_token, enable_daemon_process=enable_daemon_process,
|
107 |
auto_cleanup_timeout_seconds=self.app_config.diarization_process_timeout,
|
108 |
+
cache=self.model_cache, diarization_version=diarization_version)
|
109 |
+
else:
|
110 |
+
self.diarization.diarization_version=diarization_version
|
111 |
# Set parameters
|
112 |
self.diarization_kwargs = kwargs
|
113 |
|
|
|
260 |
diarization_speakers: int = decodeOptions.pop("diarization_speakers", 2)
|
261 |
diarization_min_speakers: int = decodeOptions.pop("diarization_min_speakers", 1)
|
262 |
diarization_max_speakers: int = decodeOptions.pop("diarization_max_speakers", 8)
|
263 |
+
diarization_version: str = decodeOptions.pop("diarization_version", "speaker-diarization-3.1")
|
264 |
highlight_words: bool = decodeOptions.pop("highlight_words", False)
|
265 |
|
266 |
temperature: float = decodeOptions.pop("temperature", None)
|
|
|
294 |
|
295 |
if diarization:
|
296 |
if diarization_speakers is not None and diarization_speakers < 1:
|
297 |
+
self.set_diarization(auth_token=self.app_config.auth_token, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers, diarization_version=diarization_version)
|
298 |
else:
|
299 |
+
self.set_diarization(auth_token=self.app_config.auth_token, num_speakers=diarization_speakers, min_speakers=diarization_min_speakers, max_speakers=diarization_max_speakers, diarization_version=diarization_version)
|
300 |
else:
|
301 |
self.unset_diarization()
|
302 |
|
|
|
1141 |
gr.Checkbox(label="Diarization", value=app_config.diarization, interactive=has_diarization_libs, elem_id="diarization", info="Whether to perform speaker diarization"),
|
1142 |
gr.Number(label="Diarization - Speakers", precision=0, value=app_config.diarization_speakers, interactive=has_diarization_libs, elem_id="diarization_speakers", info="The number of speakers to detect"),
|
1143 |
gr.Number(label="Diarization - Min Speakers", precision=0, value=app_config.diarization_min_speakers, interactive=has_diarization_libs, elem_id="diarization_min_speakers", info="The minimum number of speakers to detect"),
|
1144 |
+
gr.Number(label="Diarization - Max Speakers", precision=0, value=app_config.diarization_max_speakers, interactive=has_diarization_libs, elem_id="diarization_max_speakers", info="The maximum number of speakers to detect"),
|
1145 |
+
gr.Dropdown(label="Diarization Version", choices=["speaker-diarization-3.1", "speaker-diarization-3.0", "[email protected]"], value=app_config.diarization_version, elem_id="diarization_version", info="pyannote.audio speaker diarization pipeline v3.1 is expected to be much better (and faster) than v2.x. [Benchmark](https://github.com/pyannote/pyannote-audio?tab=readme-ov-file#benchmark)"),
|
1146 |
}
|
1147 |
|
1148 |
common_output = lambda : [
|
|
|
1444 |
parser.add_argument("--diarization_max_speakers", type=int, default=default_app_config.diarization_max_speakers, help="Maximum number of speakers")
|
1445 |
parser.add_argument("--diarization_process_timeout", type=int, default=default_app_config.diarization_process_timeout, \
|
1446 |
help="Number of seconds before inactivate diarization processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
|
1447 |
+
parser.add_argument('--diarization_version', type=str, default=default_app_config.diarization_version, help='Specify the diarization version, defaulting to speaker-diarization-3.1')
|
1448 |
|
1449 |
args = parser.parse_args().__dict__
|
1450 |
|
requirements-fasterWhisper.txt
CHANGED
@@ -20,7 +20,7 @@ sentencepiece
|
|
20 |
# Needed by diarization
|
21 |
intervaltree
|
22 |
srt
|
23 |
-
|
24 |
|
25 |
# Needed by ALMA-GPTQ
|
26 |
accelerate
|
|
|
20 |
# Needed by diarization
|
21 |
intervaltree
|
22 |
srt
|
23 |
+
pyannote.audio
|
24 |
|
25 |
# Needed by ALMA-GPTQ
|
26 |
accelerate
|
requirements-whisper.txt
CHANGED
@@ -20,7 +20,7 @@ sentencepiece
|
|
20 |
# Needed by diarization
|
21 |
intervaltree
|
22 |
srt
|
23 |
-
|
24 |
|
25 |
# Needed by ALMA-GPTQ
|
26 |
accelerate
|
|
|
20 |
# Needed by diarization
|
21 |
intervaltree
|
22 |
srt
|
23 |
+
pyannote.audio
|
24 |
|
25 |
# Needed by ALMA-GPTQ
|
26 |
accelerate
|
requirements.txt
CHANGED
@@ -20,7 +20,7 @@ sentencepiece
|
|
20 |
# Needed by diarization
|
21 |
intervaltree
|
22 |
srt
|
23 |
-
|
24 |
|
25 |
# Needed by ALMA-GPTQ
|
26 |
accelerate
|
|
|
20 |
# Needed by diarization
|
21 |
intervaltree
|
22 |
srt
|
23 |
+
pyannote.audio
|
24 |
|
25 |
# Needed by ALMA-GPTQ
|
26 |
accelerate
|
src/config.py
CHANGED
@@ -78,6 +78,7 @@ class ApplicationConfig:
|
|
78 |
auth_token: str = None, diarization: bool = False, diarization_speakers: int = 2,
|
79 |
diarization_min_speakers: int = 1, diarization_max_speakers: int = 5,
|
80 |
diarization_process_timeout: int = 60,
|
|
|
81 |
# Translation
|
82 |
translation_batch_size: int = 2,
|
83 |
translation_no_repeat_ngram_size: int = 4,
|
@@ -148,6 +149,8 @@ class ApplicationConfig:
|
|
148 |
self.diarization_min_speakers = diarization_min_speakers
|
149 |
self.diarization_max_speakers = diarization_max_speakers
|
150 |
self.diarization_process_timeout = diarization_process_timeout
|
|
|
|
|
151 |
# Translation
|
152 |
self.translation_batch_size = translation_batch_size
|
153 |
self.translation_no_repeat_ngram_size = translation_no_repeat_ngram_size
|
|
|
78 |
auth_token: str = None, diarization: bool = False, diarization_speakers: int = 2,
|
79 |
diarization_min_speakers: int = 1, diarization_max_speakers: int = 5,
|
80 |
diarization_process_timeout: int = 60,
|
81 |
+
diarization_version: str = "speaker-diarization-3.1",
|
82 |
# Translation
|
83 |
translation_batch_size: int = 2,
|
84 |
translation_no_repeat_ngram_size: int = 4,
|
|
|
149 |
self.diarization_min_speakers = diarization_min_speakers
|
150 |
self.diarization_max_speakers = diarization_max_speakers
|
151 |
self.diarization_process_timeout = diarization_process_timeout
|
152 |
+
self.diarization_version = diarization_version
|
153 |
+
|
154 |
# Translation
|
155 |
self.translation_batch_size = translation_batch_size
|
156 |
self.translation_no_repeat_ngram_size = translation_no_repeat_ngram_size
|
src/diarization/diarization.py
CHANGED
@@ -26,15 +26,16 @@ class DiarizationEntry:
|
|
26 |
}
|
27 |
|
28 |
class Diarization:
|
29 |
-
def __init__(self, auth_token=None):
|
30 |
if auth_token is None:
|
31 |
auth_token = os.environ.get("HF_ACCESS_TOKEN")
|
32 |
if auth_token is None:
|
33 |
raise ValueError("No HuggingFace API Token provided - please use the --auth_token argument or set the HF_ACCESS_TOKEN environment variable")
|
34 |
|
35 |
-
self.auth_token
|
36 |
-
self.initialized
|
37 |
-
self.pipeline
|
|
|
38 |
|
39 |
@staticmethod
|
40 |
def has_libraries():
|
@@ -47,17 +48,17 @@ class Diarization:
|
|
47 |
|
48 |
def initialize(self):
|
49 |
"""
|
50 |
-
1.Install pyannote.audio 3.
|
51 |
2.Accept pyannote/segmentation-3.0 user conditions
|
52 |
-
3.Accept pyannote/speaker-diarization-3.
|
53 |
4.Create access token at hf.co/settings/tokens.
|
54 |
-
https://huggingface.co/pyannote/speaker-diarization-3.
|
55 |
"""
|
56 |
if self.initialized:
|
57 |
return
|
58 |
from pyannote.audio import Pipeline
|
59 |
-
|
60 |
-
self.pipeline = Pipeline.from_pretrained("pyannote/
|
61 |
self.initialized = True
|
62 |
|
63 |
# Load GPU mode if available
|
@@ -174,7 +175,7 @@ def main():
|
|
174 |
# Read whisper JSON or SRT file
|
175 |
whisper_result = load_transcript(args.whisper_file)
|
176 |
|
177 |
-
diarization = Diarization(auth_token=args.auth_token)
|
178 |
diarization_result = list(diarization.run(args.audio_file, num_speakers=args.num_speakers, min_speakers=args.min_speakers, max_speakers=args.max_speakers))
|
179 |
|
180 |
# Print result
|
|
|
26 |
}
|
27 |
|
28 |
class Diarization:
|
29 |
+
def __init__(self, auth_token=None, diarization_version=None):
|
30 |
if auth_token is None:
|
31 |
auth_token = os.environ.get("HF_ACCESS_TOKEN")
|
32 |
if auth_token is None:
|
33 |
raise ValueError("No HuggingFace API Token provided - please use the --auth_token argument or set the HF_ACCESS_TOKEN environment variable")
|
34 |
|
35 |
+
self.auth_token = auth_token
|
36 |
+
self.initialized = False
|
37 |
+
self.pipeline = None
|
38 |
+
self.diarization_version = diarization_version
|
39 |
|
40 |
@staticmethod
|
41 |
def has_libraries():
|
|
|
48 |
|
49 |
def initialize(self):
|
50 |
"""
|
51 |
+
1.Install pyannote.audio 3.1 with pip install pyannote.audio
|
52 |
2.Accept pyannote/segmentation-3.0 user conditions
|
53 |
+
3.Accept pyannote/speaker-diarization-3.1 user conditions
|
54 |
4.Create access token at hf.co/settings/tokens.
|
55 |
+
https://huggingface.co/pyannote/speaker-diarization-3.1
|
56 |
"""
|
57 |
if self.initialized:
|
58 |
return
|
59 |
from pyannote.audio import Pipeline
|
60 |
+
|
61 |
+
self.pipeline = Pipeline.from_pretrained(f"pyannote/{self.diarization_version}", use_auth_token=self.auth_token)
|
62 |
self.initialized = True
|
63 |
|
64 |
# Load GPU mode if available
|
|
|
175 |
# Read whisper JSON or SRT file
|
176 |
whisper_result = load_transcript(args.whisper_file)
|
177 |
|
178 |
+
diarization = Diarization(auth_token=args.auth_token, diarization_version=args.diarization_version)
|
179 |
diarization_result = list(diarization.run(args.audio_file, num_speakers=args.num_speakers, min_speakers=args.min_speakers, max_speakers=args.max_speakers))
|
180 |
|
181 |
# Print result
|
src/diarization/diarizationContainer.py
CHANGED
@@ -4,13 +4,14 @@ from src.modelCache import GLOBAL_MODEL_CACHE, ModelCache
|
|
4 |
from src.vadParallel import ParallelContext
|
5 |
|
6 |
class DiarizationContainer:
|
7 |
-
def __init__(self, auth_token: str = None, enable_daemon_process: bool = True, auto_cleanup_timeout_seconds=60, cache: ModelCache = None):
|
8 |
self.auth_token = auth_token
|
9 |
self.enable_daemon_process = enable_daemon_process
|
10 |
self.auto_cleanup_timeout_seconds = auto_cleanup_timeout_seconds
|
11 |
self.diarization_context: ParallelContext = None
|
12 |
self.cache = cache
|
13 |
self.model = None
|
|
|
14 |
|
15 |
def run(self, audio_file, **kwargs):
|
16 |
# Create parallel context if needed
|
@@ -37,18 +38,18 @@ class DiarizationContainer:
|
|
37 |
return self.model.mark_speakers(diarization_result, whisper_result)
|
38 |
|
39 |
# Create a new diarization model (calling mark_speakers will not initialize pyannote.audio)
|
40 |
-
model = Diarization(self.auth_token)
|
41 |
return model.mark_speakers(diarization_result, whisper_result)
|
42 |
|
43 |
def get_model(self):
|
44 |
# Lazy load the model
|
45 |
if (self.model is None):
|
46 |
if self.cache:
|
47 |
-
print("Loading
|
48 |
-
self.model = self.cache.get(
|
49 |
else:
|
50 |
-
print("Loading
|
51 |
-
self.model = Diarization(self.auth_token)
|
52 |
return self.model
|
53 |
|
54 |
def execute(self, audio_file, **kwargs):
|
@@ -66,7 +67,8 @@ class DiarizationContainer:
|
|
66 |
return {
|
67 |
"auth_token": self.auth_token,
|
68 |
"enable_daemon_process": self.enable_daemon_process,
|
69 |
-
"auto_cleanup_timeout_seconds": self.auto_cleanup_timeout_seconds
|
|
|
70 |
}
|
71 |
|
72 |
def __setstate__(self, state):
|
@@ -74,5 +76,6 @@ class DiarizationContainer:
|
|
74 |
self.enable_daemon_process = state["enable_daemon_process"]
|
75 |
self.auto_cleanup_timeout_seconds = state["auto_cleanup_timeout_seconds"]
|
76 |
self.diarization_context = None
|
|
|
77 |
self.cache = GLOBAL_MODEL_CACHE
|
78 |
self.model = None
|
|
|
4 |
from src.vadParallel import ParallelContext
|
5 |
|
6 |
class DiarizationContainer:
|
7 |
+
def __init__(self, auth_token: str = None, enable_daemon_process: bool = True, auto_cleanup_timeout_seconds=60, cache: ModelCache = None, diarization_version=None):
|
8 |
self.auth_token = auth_token
|
9 |
self.enable_daemon_process = enable_daemon_process
|
10 |
self.auto_cleanup_timeout_seconds = auto_cleanup_timeout_seconds
|
11 |
self.diarization_context: ParallelContext = None
|
12 |
self.cache = cache
|
13 |
self.model = None
|
14 |
+
self.diarization_version = diarization_version
|
15 |
|
16 |
def run(self, audio_file, **kwargs):
|
17 |
# Create parallel context if needed
|
|
|
38 |
return self.model.mark_speakers(diarization_result, whisper_result)
|
39 |
|
40 |
# Create a new diarization model (calling mark_speakers will not initialize pyannote.audio)
|
41 |
+
model = Diarization(self.auth_token, self.diarization_version)
|
42 |
return model.mark_speakers(diarization_result, whisper_result)
|
43 |
|
44 |
def get_model(self):
|
45 |
# Lazy load the model
|
46 |
if (self.model is None):
|
47 |
if self.cache:
|
48 |
+
print(f"Loading {self.diarization_version} model from cache")
|
49 |
+
self.model = self.cache.get(self.diarization_version, lambda : Diarization(self.auth_token, self.diarization_version))
|
50 |
else:
|
51 |
+
print(f"Loading {self.diarization_version} model")
|
52 |
+
self.model = Diarization(self.auth_token, self.diarization_version)
|
53 |
return self.model
|
54 |
|
55 |
def execute(self, audio_file, **kwargs):
|
|
|
67 |
return {
|
68 |
"auth_token": self.auth_token,
|
69 |
"enable_daemon_process": self.enable_daemon_process,
|
70 |
+
"auto_cleanup_timeout_seconds": self.auto_cleanup_timeout_seconds,
|
71 |
+
"diarization_version": self.diarization_version
|
72 |
}
|
73 |
|
74 |
def __setstate__(self, state):
|
|
|
76 |
self.enable_daemon_process = state["enable_daemon_process"]
|
77 |
self.auto_cleanup_timeout_seconds = state["auto_cleanup_timeout_seconds"]
|
78 |
self.diarization_context = None
|
79 |
+
self.diarization_version = state["diarization_version"]
|
80 |
self.cache = GLOBAL_MODEL_CACHE
|
81 |
self.model = None
|
src/utils.py
CHANGED
@@ -150,7 +150,7 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
|
|
150 |
yield segment
|
151 |
|
152 |
if segment_longest_speaker is not None:
|
153 |
-
segment_longest_speaker = segment_longest_speaker.replace("SPEAKER", "S")
|
154 |
|
155 |
subtitle_start = segment['start']
|
156 |
subtitle_end = segment['end']
|
@@ -160,7 +160,9 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
|
|
160 |
if len(words) == 0:
|
161 |
# Prepend the longest speaker ID if available
|
162 |
if segment_longest_speaker is not None:
|
163 |
-
text = f"
|
|
|
|
|
164 |
|
165 |
result = {
|
166 |
'start': subtitle_start,
|
@@ -175,12 +177,16 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
|
|
175 |
continue
|
176 |
|
177 |
if segment_longest_speaker is not None:
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
|
|
|
|
|
|
|
|
184 |
|
185 |
text_words = [text] if not highlight_words and text_original is not None and len(text_original) > 0 else [ this_word["word"] for this_word in words ]
|
186 |
|
|
|
150 |
yield segment
|
151 |
|
152 |
if segment_longest_speaker is not None:
|
153 |
+
segment_longest_speaker = "(" + segment_longest_speaker.replace("SPEAKER", "S") + ")"
|
154 |
|
155 |
subtitle_start = segment['start']
|
156 |
subtitle_end = segment['end']
|
|
|
160 |
if len(words) == 0:
|
161 |
# Prepend the longest speaker ID if available
|
162 |
if segment_longest_speaker is not None:
|
163 |
+
text = f"{segment_longest_speaker} {text}"
|
164 |
+
if text_original is not None and len(text_original) > 0:
|
165 |
+
text_original = f"{segment_longest_speaker} {text_original}"
|
166 |
|
167 |
result = {
|
168 |
'start': subtitle_start,
|
|
|
177 |
continue
|
178 |
|
179 |
if segment_longest_speaker is not None:
|
180 |
+
if words[0].get('word') != segment_longest_speaker:
|
181 |
+
# Add the beginning
|
182 |
+
words.insert(0, {
|
183 |
+
'start': subtitle_start,
|
184 |
+
'end' : subtitle_start,
|
185 |
+
'word' : segment_longest_speaker
|
186 |
+
})
|
187 |
+
text = f"{segment_longest_speaker} {text}"
|
188 |
+
if text_original is not None and len(text_original) > 0:
|
189 |
+
text_original = f"{segment_longest_speaker} {text_original}"
|
190 |
|
191 |
text_words = [text] if not highlight_words and text_original is not None and len(text_original) > 0 else [ this_word["word"] for this_word in words ]
|
192 |
|