File size: 1,665 Bytes
9dc25d9
 
56e3ed2
9dc25d9
56e3ed2
9dc25d9
56e3ed2
 
 
 
 
 
9dc25d9
56e3ed2
9dc25d9
 
 
 
 
 
 
 
 
56e3ed2
9dc25d9
 
 
 
 
56e3ed2
f439ce7
 
 
 
 
 
56e3ed2
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from huggingface_hub import InferenceClient
import gradio as gr
import re

client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

def parse_srt(content):
    """Parse the SRT file content, yielding text lines and skipping timestamps, line numbers, or blank lines."""
    lines = content.split("\n")
    for line in lines:
        if not line.isdigit() and not re.match(r'\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}', line) and line.strip():
            yield line

def translate_line(line, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )

    stream = client.text_generation(f"Translate to English: {line}", **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
    return output

def translate_srt(file_info):
    # Assuming `file_info` is the file content as a bytes object directly
    content = file_info.decode("utf-8")  # Decode content from bytes to string
    translated_lines = []
    for line in parse_srt(content):
        translated_line = translate_line(line)
        translated_lines.append(translated_line)
    return "\n".join(translated_lines)

gr.Interface(
    fn=translate_srt,
    inputs=gr.File(label="Upload SRT File"),
    outputs=gr.Textbox(label="Translated Text"),
    title="SRT Translator",
    description="Upload an SRT file to translate its content line by line."
).launch()