finnstrom3693
/

opus-mt-id-en

+import gradio as gr
+from transformers import MarianMTModel, MarianTokenizer
+import torch
+from nltk.tokenize import sent_tokenize, LineTokenizer
+import math
+import nltk
+nltk.download('punkt_tab')
+# Load the translation model and tokenizer from Hugging Face
+model_name = "opus-mt-id-en"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+model = MarianMTModel.from_pretrained(model_name)
+# Define the translation function with adaptive input handling
+def translate_id_en(text):
+    # Tokenize the input into lines and sentences
+    lt = LineTokenizer()
+    batch_size = 8
+    paragraphs = lt.tokenize(text)
+    translated_paragraphs = []
+    for paragraph in paragraphs:
+        sentences = sent_tokenize(paragraph)
+        batches = math.ceil(len(sentences) / batch_size)
+        translated = []
+        # Process sentences in batches
+        for i in range(batches):
+            sent_batch = sentences[i * batch_size:(i + 1) * batch_size]
+            model_inputs = tokenizer(sent_batch, return_tensors="pt", padding=True, truncation=True)
+            # Generate translation
+            with torch.no_grad():
+                translated_batch = model.generate(**model_inputs)
+            # Decode the generated tokens into text
+            translated += [tokenizer.decode(t, skip_special_tokens=True) for t in translated_batch]
+        translated_paragraphs.append(" ".join(translated))
+    # Combine all paragraphs into the final translated text
+    translated_text = "\n\n".join(translated_paragraphs)
+    return translated_text
+# Define the Gradio interface
+iface = gr.Interface(
+    fn=translate_id_en,  # Function to translate text
+    inputs=gr.Textbox(lines=12, placeholder="Enter Indonesian text...", label="Input (Indonesian)"),  # Input box
+    outputs=gr.Textbox(lines=12, label="Output (English)"),  # Output box
+    title="Indonesian to English Translator",  # Title of the app
+    description="Translate Indonesian text to English using the opus-mt-id-en model."
+)
+# Launch the Gradio interface locally
+iface.launch()