Spaces:

SCBconsulting
/

synclm-demo

Running

App Files Files Community

SCBconsulting commited on 7 days ago

Commit

6f26572

verified ·

1 Parent(s): 576eef7

Update utils/translator.py

Browse files

Files changed (1) hide show

utils/translator.py +13 -17

utils/translator.py CHANGED Viewed

@@ -24,7 +24,7 @@ def clean_text(text: str) -> str:
 def chunk_text(text: str, max_chunk_chars: int = 500):
     """
-    Split long text into token-safe chunks.
     """
     words = text.split()
     chunks, current_chunk = [], ""
@@ -53,18 +53,12 @@ def translate_chunks(chunks, tokenizer, model):
     return " ".join(translated)
 def translate_to_portuguese(text: str) -> str:
-    """
-    🇺🇸 ➡️ 🇧🇷 Translate from English to Portuguese.
-    """
     if not text.strip():
         return "No input provided."
     chunks = chunk_text(clean_text(text))
     return translate_chunks(chunks, tokenizer_en_pt, model_en_pt)
 def translate_to_english(text: str) -> str:
-    """
-    🇧🇷 ➡️ 🇺🇸 Translate from Portuguese to English.
-    """
     if not text.strip():
         return "No input provided."
     chunks = chunk_text(clean_text(text))
@@ -72,8 +66,7 @@ def translate_to_english(text: str) -> str:
 def translate_text(text: str, direction: str = "en-pt") -> str:
     """
-    🌐 Wrapper para tradução direta via Gradio.
-    direction = "en-pt" ou "pt-en"
     """
     if direction == "en-pt":
         return translate_to_portuguese(text)
@@ -84,24 +77,27 @@ def translate_text(text: str, direction: str = "en-pt") -> str:
 # ========== Bilingual View ==========
-def bilingual_clauses(english_text: str) -> str:
     """
-    Create side-by-side bilingual clauses.
     """
-    clauses_en = chunk_text(clean_text(english_text), max_chunk_chars=300)
-    clauses_pt = [translate_to_portuguese(clause) for clause in clauses_en]
     bilingual_output = []
-    for en, pt in zip(clauses_en, clauses_pt):
-        bilingual_output.append(f"📘 EN: {en}\n📗 PT: {pt}\n" + "-" * 60)
     return "\n\n".join(bilingual_output)
 # ========== Export to DOCX ==========
 def export_to_word(text: str, filename: str = "translated_contract.docx") -> str:
     """
-    Export bilingual translation to a Word document.
     """
     doc = Document()
     doc.add_heading("Legal Translation Output", level=1)

 def chunk_text(text: str, max_chunk_chars: int = 500):
     """
+    Split long text into chunks based on character count.
     """
     words = text.split()
     chunks, current_chunk = [], ""
     return " ".join(translated)
 def translate_to_portuguese(text: str) -> str:
     if not text.strip():
         return "No input provided."
     chunks = chunk_text(clean_text(text))
     return translate_chunks(chunks, tokenizer_en_pt, model_en_pt)
 def translate_to_english(text: str) -> str:
     if not text.strip():
         return "No input provided."
     chunks = chunk_text(clean_text(text))
 def translate_text(text: str, direction: str = "en-pt") -> str:
     """
+    direction = 'en-pt' or 'pt-en'
     """
     if direction == "en-pt":
         return translate_to_portuguese(text)
 # ========== Bilingual View ==========
+def bilingual_clauses(text: str) -> str:
     """
+    Create bilingual clause-by-clause output (EN + PT).
     """
+    if not text.strip():
+        return "No input provided."
+    clauses_en = chunk_text(clean_text(text), max_chunk_chars=300)
     bilingual_output = []
+    for clause in clauses_en:
+        translated = translate_to_portuguese(clause)
+        bilingual_output.append(f"📘 EN: {clause}\n📗 PT: {translated}\n" + "-" * 60)
     return "\n\n".join(bilingual_output)
 # ========== Export to DOCX ==========
 def export_to_word(text: str, filename: str = "translated_contract.docx") -> str:
     """
+    Export text (bilingual or full) to Word DOCX.
     """
     doc = Document()
     doc.add_heading("Legal Translation Output", level=1)