Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,16 @@
|
|
1 |
-
import io
|
2 |
-
import os
|
3 |
-
import requests
|
4 |
import streamlit as st
|
5 |
import pandas as pd
|
6 |
import pysrt
|
7 |
from transformers import MarianMTModel, MarianTokenizer
|
8 |
import tempfile
|
9 |
-
|
10 |
-
|
11 |
|
12 |
def fetch_languages(url):
|
13 |
response = requests.get(url)
|
14 |
if response.status_code == 200:
|
15 |
-
# Convert bytes to a string using decode, then create a file-like object with io.StringIO
|
16 |
csv_content = response.content.decode('utf-8')
|
17 |
-
df = pd.read_csv(
|
18 |
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
|
19 |
df['ISO 639-1'] = df['ISO 639-1'].str.strip()
|
20 |
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']} - {row['Language Name']}") for index, row in df.iterrows()]
|
@@ -42,19 +38,12 @@ def translate_text(text, source_language_code, target_language_code):
|
|
42 |
def translate_srt(input_file, source_language_code, target_language_code):
|
43 |
subs = pysrt.open(input_file)
|
44 |
translated_subs = []
|
45 |
-
progress_bar = st.progress(0)
|
46 |
for idx, sub in enumerate(subs):
|
47 |
translated_text = translate_text(sub.text, source_language_code, target_language_code)
|
48 |
translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
|
49 |
translated_subs.append(translated_sub)
|
50 |
-
progress_bar.progress((idx + 1) / len(subs))
|
51 |
translated_file = pysrt.SubRipFile(translated_subs)
|
52 |
-
|
53 |
-
with tempfile.NamedTemporaryFile(suffix=".srt", delete=False) as tmp_file:
|
54 |
-
translated_file.save(tmp_file.name)
|
55 |
-
translated_srt_path = tmp_file.name
|
56 |
-
progress_bar.empty()
|
57 |
-
return translated_srt_path
|
58 |
|
59 |
st.title("SRT Translator")
|
60 |
st.write("Translate subtitles from one language to another.")
|
@@ -63,14 +52,29 @@ st.write("Translate subtitles from one language to another.")
|
|
63 |
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
|
64 |
language_options = fetch_languages(url)
|
65 |
|
66 |
-
source_language_code
|
67 |
-
|
|
|
|
|
|
|
68 |
|
69 |
file_input = st.file_uploader("Upload SRT File", type=["srt"])
|
70 |
|
71 |
-
if file_input is not None:
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import pysrt
|
4 |
from transformers import MarianMTModel, MarianTokenizer
|
5 |
import tempfile
|
6 |
+
from io import BytesIO
|
7 |
+
import requests
|
8 |
|
9 |
def fetch_languages(url):
|
10 |
response = requests.get(url)
|
11 |
if response.status_code == 200:
|
|
|
12 |
csv_content = response.content.decode('utf-8')
|
13 |
+
df = pd.read_csv(BytesIO(response.content), delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
|
14 |
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
|
15 |
df['ISO 639-1'] = df['ISO 639-1'].str.strip()
|
16 |
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']} - {row['Language Name']}") for index, row in df.iterrows()]
|
|
|
38 |
def translate_srt(input_file, source_language_code, target_language_code):
|
39 |
subs = pysrt.open(input_file)
|
40 |
translated_subs = []
|
|
|
41 |
for idx, sub in enumerate(subs):
|
42 |
translated_text = translate_text(sub.text, source_language_code, target_language_code)
|
43 |
translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
|
44 |
translated_subs.append(translated_sub)
|
|
|
45 |
translated_file = pysrt.SubRipFile(translated_subs)
|
46 |
+
return translated_file
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
st.title("SRT Translator")
|
49 |
st.write("Translate subtitles from one language to another.")
|
|
|
52 |
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
|
53 |
language_options = fetch_languages(url)
|
54 |
|
55 |
+
source_language_code, target_language_code = None, None
|
56 |
+
|
57 |
+
if language_options:
|
58 |
+
source_language_code = st.selectbox("Select Source Language", options=language_options, format_func=lambda x: x[1])[0]
|
59 |
+
target_language_code = st.selectbox("Select Target Language", options=language_options, format_func=lambda x: x[1])[0]
|
60 |
|
61 |
file_input = st.file_uploader("Upload SRT File", type=["srt"])
|
62 |
|
63 |
+
if file_input is not None and source_language_code and target_language_code:
|
64 |
+
translated_srt = translate_srt(file_input, source_language_code, target_language_code)
|
65 |
+
|
66 |
+
# Save the translated subtitles to an in-memory buffer
|
67 |
+
buffer = BytesIO()
|
68 |
+
translated_srt.save(buffer, encoding='utf-8')
|
69 |
+
buffer.seek(0)
|
70 |
+
|
71 |
+
# Convert the BytesIO buffer to bytes for the download button
|
72 |
+
translated_srt_bytes = buffer.getvalue()
|
73 |
+
|
74 |
+
# Create a download button and serve the translated subtitles as a downloadable file
|
75 |
+
st.download_button(
|
76 |
+
label="Download Translated SRT",
|
77 |
+
data=translated_srt_bytes,
|
78 |
+
file_name="translated_subtitles.srt",
|
79 |
+
mime="text/plain",
|
80 |
+
)
|