Lenylvt's picture
Update app.py
f439ce7 verified
raw
history blame
1.67 kB
from huggingface_hub import InferenceClient
import gradio as gr
import re
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
def parse_srt(content):
"""Parse the SRT file content, yielding text lines and skipping timestamps, line numbers, or blank lines."""
lines = content.split("\n")
for line in lines:
if not line.isdigit() and not re.match(r'\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}', line) and line.strip():
yield line
def translate_line(line, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
stream = client.text_generation(f"Translate to English: {line}", **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
return output
def translate_srt(file_info):
# Assuming `file_info` is the file content as a bytes object directly
content = file_info.decode("utf-8") # Decode content from bytes to string
translated_lines = []
for line in parse_srt(content):
translated_line = translate_line(line)
translated_lines.append(translated_line)
return "\n".join(translated_lines)
gr.Interface(
fn=translate_srt,
inputs=gr.File(label="Upload SRT File"),
outputs=gr.Textbox(label="Translated Text"),
title="SRT Translator",
description="Upload an SRT file to translate its content line by line."
).launch()