SCBconsulting commited on
Commit
6f26572
ยท
verified ยท
1 Parent(s): 576eef7

Update utils/translator.py

Browse files
Files changed (1) hide show
  1. utils/translator.py +13 -17
utils/translator.py CHANGED
@@ -24,7 +24,7 @@ def clean_text(text: str) -> str:
24
 
25
  def chunk_text(text: str, max_chunk_chars: int = 500):
26
  """
27
- Split long text into token-safe chunks.
28
  """
29
  words = text.split()
30
  chunks, current_chunk = [], ""
@@ -53,18 +53,12 @@ def translate_chunks(chunks, tokenizer, model):
53
  return " ".join(translated)
54
 
55
  def translate_to_portuguese(text: str) -> str:
56
- """
57
- ๐Ÿ‡บ๐Ÿ‡ธ โžก๏ธ ๐Ÿ‡ง๐Ÿ‡ท Translate from English to Portuguese.
58
- """
59
  if not text.strip():
60
  return "No input provided."
61
  chunks = chunk_text(clean_text(text))
62
  return translate_chunks(chunks, tokenizer_en_pt, model_en_pt)
63
 
64
  def translate_to_english(text: str) -> str:
65
- """
66
- ๐Ÿ‡ง๐Ÿ‡ท โžก๏ธ ๐Ÿ‡บ๐Ÿ‡ธ Translate from Portuguese to English.
67
- """
68
  if not text.strip():
69
  return "No input provided."
70
  chunks = chunk_text(clean_text(text))
@@ -72,8 +66,7 @@ def translate_to_english(text: str) -> str:
72
 
73
  def translate_text(text: str, direction: str = "en-pt") -> str:
74
  """
75
- ๐ŸŒ Wrapper para traduรงรฃo direta via Gradio.
76
- direction = "en-pt" ou "pt-en"
77
  """
78
  if direction == "en-pt":
79
  return translate_to_portuguese(text)
@@ -84,24 +77,27 @@ def translate_text(text: str, direction: str = "en-pt") -> str:
84
 
85
  # ========== Bilingual View ==========
86
 
87
- def bilingual_clauses(english_text: str) -> str:
88
  """
89
- Create side-by-side bilingual clauses.
90
  """
91
- clauses_en = chunk_text(clean_text(english_text), max_chunk_chars=300)
92
- clauses_pt = [translate_to_portuguese(clause) for clause in clauses_en]
93
 
 
94
  bilingual_output = []
95
- for en, pt in zip(clauses_en, clauses_pt):
96
- bilingual_output.append(f"๐Ÿ“˜ EN: {en}\n๐Ÿ“— PT: {pt}\n" + "-" * 60)
97
-
 
 
98
  return "\n\n".join(bilingual_output)
99
 
100
  # ========== Export to DOCX ==========
101
 
102
  def export_to_word(text: str, filename: str = "translated_contract.docx") -> str:
103
  """
104
- Export bilingual translation to a Word document.
105
  """
106
  doc = Document()
107
  doc.add_heading("Legal Translation Output", level=1)
 
24
 
25
  def chunk_text(text: str, max_chunk_chars: int = 500):
26
  """
27
+ Split long text into chunks based on character count.
28
  """
29
  words = text.split()
30
  chunks, current_chunk = [], ""
 
53
  return " ".join(translated)
54
 
55
  def translate_to_portuguese(text: str) -> str:
 
 
 
56
  if not text.strip():
57
  return "No input provided."
58
  chunks = chunk_text(clean_text(text))
59
  return translate_chunks(chunks, tokenizer_en_pt, model_en_pt)
60
 
61
  def translate_to_english(text: str) -> str:
 
 
 
62
  if not text.strip():
63
  return "No input provided."
64
  chunks = chunk_text(clean_text(text))
 
66
 
67
  def translate_text(text: str, direction: str = "en-pt") -> str:
68
  """
69
+ direction = 'en-pt' or 'pt-en'
 
70
  """
71
  if direction == "en-pt":
72
  return translate_to_portuguese(text)
 
77
 
78
  # ========== Bilingual View ==========
79
 
80
+ def bilingual_clauses(text: str) -> str:
81
  """
82
+ Create bilingual clause-by-clause output (EN + PT).
83
  """
84
+ if not text.strip():
85
+ return "No input provided."
86
 
87
+ clauses_en = chunk_text(clean_text(text), max_chunk_chars=300)
88
  bilingual_output = []
89
+
90
+ for clause in clauses_en:
91
+ translated = translate_to_portuguese(clause)
92
+ bilingual_output.append(f"๐Ÿ“˜ EN: {clause}\n๐Ÿ“— PT: {translated}\n" + "-" * 60)
93
+
94
  return "\n\n".join(bilingual_output)
95
 
96
  # ========== Export to DOCX ==========
97
 
98
  def export_to_word(text: str, filename: str = "translated_contract.docx") -> str:
99
  """
100
+ Export text (bilingual or full) to Word DOCX.
101
  """
102
  doc = Document()
103
  doc.add_heading("Legal Translation Output", level=1)