gauravchand11 commited on
Commit
829aed6
·
verified ·
1 Parent(s): f72c1a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +377 -124
app.py CHANGED
@@ -4,27 +4,133 @@ from PyPDF2 import PdfReader
4
  import docx
5
  import os
6
  import re
 
 
 
 
 
 
 
7
 
8
- # Load NLLB model and tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  @st.cache_resource
10
  def load_translation_model():
11
- model_name = "facebook/nllb-200-distilled-600M"
12
- tokenizer = AutoTokenizer.from_pretrained(model_name)
13
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
14
- return tokenizer, model
 
 
 
 
15
 
16
  # Initialize model
17
  @st.cache_resource
18
  def initialize_models():
19
  tokenizer, model = load_translation_model()
 
 
 
20
  return {"nllb": (tokenizer, model)}
21
 
 
 
22
  # Enhanced idiom mapping with more comprehensive translations
23
  def preprocess_idioms(text, src_lang, tgt_lang):
 
 
 
24
  if src_lang == "en" and tgt_lang == "hi":
25
  idiom_map = {
26
- # Basic phrases
27
- "no piece of cake": "कोई आसान काम नहीं",
28
  "piece of cake": "बहुत आसान काम",
29
  "bite the bullet": "दांतों तले उंगली दबाना",
30
  "tackle it head-on": "सीधे मुकाबला करना",
@@ -75,162 +181,309 @@ def preprocess_idioms(text, src_lang, tgt_lang):
75
  "proof of concept": "व्यवहार्यता का प्रमाण",
76
  "game changer": "खेल बदलने वाला"
77
  }
78
-
79
- # Sort idioms by length (longest first) to handle overlapping phrases
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  sorted_idioms = sorted(idiom_map.keys(), key=len, reverse=True)
81
-
82
- # Create a single regex pattern for all idioms
83
  pattern = '|'.join(map(re.escape, sorted_idioms))
84
 
85
  def replace_idiom(match):
86
  return idiom_map[match.group(0).lower()]
87
 
88
- # Replace all idioms in one pass, case-insensitive
89
  text = re.sub(pattern, replace_idiom, text, flags=re.IGNORECASE)
90
 
91
  return text
92
 
93
- # Function to extract text from different file types
94
- def extract_text(file):
95
- ext = os.path.splitext(file.name)[1].lower()
96
-
97
- if ext == ".pdf":
98
- reader = PdfReader(file)
99
- text = ""
100
- for page in reader.pages:
101
- text += page.extract_text() + "\n"
102
- return text
103
-
104
- elif ext == ".docx":
105
- doc = docx.Document(file)
106
- text = ""
107
- for para in doc.paragraphs:
108
- text += para.text + "\n"
109
- return text
110
-
111
- elif ext == ".txt":
112
- return file.read().decode("utf-8")
113
-
114
- else:
115
- raise ValueError("Unsupported file format. Please upload PDF, DOCX, or TXT files.")
116
-
117
- # Translation function with improved chunking and fixed tokenizer issue
118
- def translate_text(text, src_lang, tgt_lang, models):
119
  if src_lang == tgt_lang:
120
  return text
121
 
122
- # Language codes for NLLB
123
- lang_map = {"en": "eng_Latn", "hi": "hin_Deva", "mr": "mar_Deva"}
 
 
 
124
 
125
- if src_lang not in lang_map or tgt_lang not in lang_map:
126
  return "Error: Unsupported language combination"
127
 
128
- tgt_lang_code = lang_map[tgt_lang]
129
- tokenizer, model = models["nllb"]
130
-
131
- # Preprocess for idioms
132
- preprocessed_text = preprocess_idioms(text, src_lang, tgt_lang)
133
-
134
- # Improved chunking: Split by sentences while preserving context
135
- chunks = []
136
- current_chunk = ""
137
-
138
- for sentence in re.split('([.!?।]+)', preprocessed_text):
139
- if sentence.strip():
140
- if len(current_chunk) + len(sentence) < 450: # Leave room for tokenization
141
- current_chunk += sentence
142
- else:
143
- if current_chunk:
144
- chunks.append(current_chunk)
145
- current_chunk = sentence
146
-
147
- if current_chunk:
148
- chunks.append(current_chunk)
149
-
150
- translated_text = ""
151
-
152
- for chunk in chunks:
153
- if chunk.strip():
154
- # Add target language token to the beginning of the input
155
- inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
156
-
157
- # Get the token ID for the target language
158
- tgt_lang_id = tokenizer.convert_tokens_to_ids(tgt_lang_code)
159
-
160
- translated = model.generate(
161
- **inputs,
162
- forced_bos_token_id=tgt_lang_id, # Fixed: Using convert_tokens_to_ids instead of lang_code_to_id
163
- max_length=512,
164
- num_beams=5,
165
- length_penalty=1.0,
166
- no_repeat_ngram_size=3
167
- )
168
- translated_chunk = tokenizer.decode(translated[0], skip_special_tokens=True)
169
- translated_text += translated_chunk + " "
170
-
171
- return translated_text.strip()
 
 
 
 
 
 
 
 
 
 
 
172
 
173
- # Function to save text as a file
174
  def save_text_to_file(text, original_filename, prefix="translated"):
175
- output_filename = f"{prefix}_{os.path.basename(original_filename)}.txt"
176
- with open(output_filename, "w", encoding="utf-8") as f:
177
- f.write(text)
178
- return output_filename
 
 
 
 
 
 
 
 
 
179
 
180
- # Main processing function
181
  def process_document(file, source_lang, target_lang, models):
182
  try:
183
- # Extract text from uploaded file
184
  text = extract_text(file)
185
-
186
- # Translate the text
187
  translated_text = translate_text(text, source_lang, target_lang, models)
188
 
189
- # Save the result
190
  if translated_text.startswith("Error:"):
191
  output_file = save_text_to_file(translated_text, file.name, prefix="error")
192
  else:
193
  output_file = save_text_to_file(translated_text, file.name)
194
 
 
 
 
195
  return output_file, translated_text
196
  except Exception as e:
197
  error_message = f"Error: {str(e)}"
198
  output_file = save_text_to_file(error_message, file.name, prefix="error")
199
  return output_file, error_message
200
 
201
- # Streamlit interface
 
202
  def main():
203
- st.title("Document Translator (NLLB-200)")
204
- st.write("Upload a document (PDF, DOCX, or TXT) and select source and target languages (English, Hindi, Marathi).")
205
-
206
- # Initialize models
207
  models = initialize_models()
 
 
 
208
 
209
- # File uploader
210
- uploaded_file = st.file_uploader("Upload Document", type=["pdf", "docx", "txt"])
211
 
212
- # Language selection
213
- col1, col2 = st.columns(2)
214
- with col1:
215
- source_lang = st.selectbox("Source Language", ["en", "hi", "mr"], index=0)
216
- with col2:
217
- target_lang = st.selectbox("Target Language", ["en", "hi", "mr"], index=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
- if uploaded_file is not None and st.button("Translate"):
220
- with st.spinner("Translating..."):
221
- output_file, result_text = process_document(uploaded_file, source_lang, target_lang, models)
222
-
223
- # Display result
224
- st.text_area("Translated Text", result_text, height=300)
225
-
226
- # Provide download button
227
- with open(output_file, "rb") as file:
228
- st.download_button(
229
- label="Download Translated Document",
230
- data=file,
231
- file_name=os.path.basename(output_file),
232
- mime="text/plain"
233
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  if __name__ == "__main__":
236
- main()
 
 
 
 
4
  import docx
5
  import os
6
  import re
7
+ import asyncio
8
+ from concurrent.futures import ThreadPoolExecutor
9
+ import torch
10
+ # Replace pytesseract with easyocr
11
+ import easyocr
12
+ from PIL import Image
13
+ import numpy as np
14
 
15
+ # Set up async environment for torch
16
+ if torch.cuda.is_available():
17
+ torch.multiprocessing.set_start_method('spawn', force=True)
18
+
19
+ # Initialize asyncio event loop
20
+ try:
21
+ loop = asyncio.get_event_loop()
22
+ except RuntimeError:
23
+ loop = asyncio.new_event_loop()
24
+ asyncio.set_event_loop(loop)
25
+
26
+ # Initialize EasyOCR reader
27
+ @st.cache_resource
28
+ def load_ocr_reader():
29
+ try:
30
+ return easyocr.Reader(['en']) # Initialize for English
31
+ except Exception as e:
32
+ st.error(f"Error loading OCR reader: {str(e)}")
33
+ return None
34
+
35
+ # Modified extract_text_from_image function with better error handling
36
+ def extract_text_from_image(image_file):
37
+ try:
38
+ # Get the OCR reader
39
+ reader = load_ocr_reader()
40
+ if reader is None:
41
+ raise Exception("Failed to initialize OCR reader")
42
+
43
+ # Read the image using PIL
44
+ image = Image.open(image_file)
45
+
46
+ # Convert to numpy array
47
+ image_np = np.array(image)
48
+
49
+ # Perform OCR
50
+ results = reader.readtext(image_np)
51
+
52
+ if not results:
53
+ return "No text was detected in the image."
54
+
55
+ # Extract text from results
56
+ text = "\n".join([result[1] for result in results])
57
+ return text.strip()
58
+ except Exception as e:
59
+ raise Exception(f"Error extracting text from image: {str(e)}")
60
+
61
+ # Modified extract_text function to support all file types
62
+ def extract_text(file):
63
+ try:
64
+ ext = os.path.splitext(file.name)[1].lower()
65
+
66
+ if ext == ".pdf":
67
+ try:
68
+ reader = PdfReader(file)
69
+ text = ""
70
+ for page in reader.pages:
71
+ text += page.extract_text() + "\n"
72
+ return text.strip()
73
+ except Exception as e:
74
+ raise Exception(f"Error reading PDF file: {str(e)}")
75
+
76
+ elif ext == ".docx":
77
+ try:
78
+ doc = docx.Document(file)
79
+ text = ""
80
+ for para in doc.paragraphs:
81
+ text += para.text + "\n"
82
+ return text.strip()
83
+ except Exception as e:
84
+ raise Exception(f"Error reading DOCX file: {str(e)}")
85
+
86
+ elif ext == ".txt":
87
+ try:
88
+ return file.read().decode("utf-8").strip()
89
+ except Exception as e:
90
+ raise Exception(f"Error reading TXT file: {str(e)}")
91
+
92
+ elif ext in [".jpg", ".jpeg", ".png"]:
93
+ try:
94
+ return extract_text_from_image(file)
95
+ except Exception as e:
96
+ raise Exception(f"Error processing image file: {str(e)}")
97
+
98
+ else:
99
+ raise ValueError("Unsupported file format. Please upload PDF, DOCX, TXT, or image files (JPG, JPEG, PNG).")
100
+ except Exception as e:
101
+ raise Exception(f"Error extracting text from file: {str(e)}")
102
+
103
+ # Load NLLB model and tokenizer with error handling
104
  @st.cache_resource
105
  def load_translation_model():
106
+ try:
107
+ model_name = "facebook/nllb-200-distilled-600M"
108
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
109
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
110
+ return tokenizer, model
111
+ except Exception as e:
112
+ st.error(f"Error loading model: {str(e)}")
113
+ return None, None
114
 
115
  # Initialize model
116
  @st.cache_resource
117
  def initialize_models():
118
  tokenizer, model = load_translation_model()
119
+ if tokenizer is None or model is None:
120
+ st.error("Failed to initialize models")
121
+ return None
122
  return {"nllb": (tokenizer, model)}
123
 
124
+
125
+
126
  # Enhanced idiom mapping with more comprehensive translations
127
  def preprocess_idioms(text, src_lang, tgt_lang):
128
+
129
+ idiom_map = {}
130
+
131
  if src_lang == "en" and tgt_lang == "hi":
132
  idiom_map = {
133
+ "no piece of cake": "कोई आसान काम नहीं",
 
134
  "piece of cake": "बहुत आसान काम",
135
  "bite the bullet": "दांतों तले उंगली दबाना",
136
  "tackle it head-on": "सीधे मुकाबला करना",
 
181
  "proof of concept": "व्यवहार्यता का प्रमाण",
182
  "game changer": "खेल बदलने वाला"
183
  }
184
+ elif src_lang == "en" and tgt_lang == "mr":
185
+ idiom_map = {
186
+ "no piece of cake": "सोपं काम नाही",
187
+ "piece of cake": "अतिशय सोपं काम",
188
+ "bite the bullet": "कठीण निर्णय घेणे",
189
+ "tackle it head-on": "समस्येला थेट सामोरे जाणे",
190
+ "fell into place": "सगळं व्यवस्थित झालं",
191
+ "see the light at the end of the tunnel": "अंधारातून उजेडाची किरण दिसणे",
192
+ "with a little perseverance": "थोड्या धीराने",
193
+ "break a leg": "खूप शुभेच्छा",
194
+ "hit the nail on the head": "अगदी बरोबर बोललात",
195
+ "once in a blue moon": "क्वचितच, कधीतरी",
196
+ "under the weather": "तब्येत ठीक नसणे",
197
+ "cost an arm and a leg": "खूप महाग",
198
+ "beating around the bush": "गोल गोल फिरवणे",
199
+ "call it a day": "दिवसाचं काम संपवणे",
200
+ "burn the midnight oil": "रात्रंदिवस मेहनत करणे",
201
+ "get the ball rolling": "सुरुवात करणे",
202
+ "pull yourself together": "स्वतःला सावरा",
203
+ "shoot yourself in the foot": "स्वतःचेच पाय स्वतः कापणे",
204
+ "take it with a grain of salt": "साशंक दृष्टीने पाहणे",
205
+ "the last straw": "सहनशक्तीची शेवटची मर्यादा",
206
+ "time flies": "वेळ पंख लावून उडतो",
207
+ "wrap your head around": "समजून घेण्याचा प्रयत्न करणे",
208
+ "cut corners": "कमी वेळात काम उरकणे",
209
+ "back to square one": "पुन्हा सुरुवातीला",
210
+ "blessing in disguise": "आशीर्वाद लपलेला",
211
+ "cry over spilled milk": "झालेल्या गोष्टीसाठी रडत बसणे",
212
+ "keep your chin up": "धीर धरा",
213
+
214
+ # Work-related idioms
215
+ "think outside the box": "वेगळ्या पद्धतीने विचार करणे",
216
+ "raise the bar": "पातळी उंचावणे",
217
+ "learning curve": "शिकण्याची प्रक्रिया",
218
+ "up and running": "सुरू आणि कार्यरत",
219
+ "back to the drawing board": "पुन्हा नव्याने योजना आखणे",
220
+
221
+ # Project-related phrases
222
+ "running into issues": "अडचणींना सामोरे जाणे",
223
+ "iron out the bugs": "त्रुटी दूर करणे",
224
+ "in the pipeline": "विचाराधीन",
225
+ "moving forward": "पुढे जाताना",
226
+ "touch base": "संपर्कात राहणे",
227
+
228
+ # Technical phrases
229
+ "user-friendly": "वापरकर्त्यास सोयीस्कर",
230
+ "cutting-edge": "अत्याधुनिक",
231
+ "state of the art": "सर्वोत्कृष्ट तंत्रज्ञान",
232
+ "proof of concept": "संकल्पनेची सिद्धता",
233
+ "game changer": "खेळ बदलणारी गोष्ट"
234
+ }
235
+
236
+ if idiom_map:
237
  sorted_idioms = sorted(idiom_map.keys(), key=len, reverse=True)
 
 
238
  pattern = '|'.join(map(re.escape, sorted_idioms))
239
 
240
  def replace_idiom(match):
241
  return idiom_map[match.group(0).lower()]
242
 
 
243
  text = re.sub(pattern, replace_idiom, text, flags=re.IGNORECASE)
244
 
245
  return text
246
 
247
+ # Async translation function with fixed idiom processing
248
+ async def translate_text_async(text, src_lang, tgt_lang, models):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  if src_lang == tgt_lang:
250
  return text
251
 
252
+ # Updated language mapping handling
253
+ src_lang_simple = src_lang.lower()
254
+ tgt_lang_simple = tgt_lang.lower()
255
+
256
+ lang_map = {"english": "eng_Latn", "hindi": "hin_Deva", "marathi": "mar_Deva"}
257
 
258
+ if src_lang_simple not in lang_map or tgt_lang_simple not in lang_map:
259
  return "Error: Unsupported language combination"
260
 
261
+ try:
262
+ # Process idioms first
263
+ preprocessed_text = preprocess_idioms(text, src_lang_simple[:2], tgt_lang_simple[:2])
264
+
265
+ tgt_lang_code = lang_map[tgt_lang_simple]
266
+ tokenizer, model = models["nllb"]
267
+
268
+ chunks = []
269
+ current_chunk = ""
270
+
271
+ # Split text into chunks while preserving sentences
272
+ for sentence in re.split('([.!?।]+)', preprocessed_text):
273
+ if sentence.strip():
274
+ if len(current_chunk) + len(sentence) < 450:
275
+ current_chunk += sentence
276
+ else:
277
+ if current_chunk:
278
+ chunks.append(current_chunk)
279
+ current_chunk = sentence
280
+
281
+ if current_chunk:
282
+ chunks.append(current_chunk)
283
+
284
+ translated_text = ""
285
+
286
+ # Translate each chunk
287
+ for chunk in chunks:
288
+ if chunk.strip():
289
+ inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
290
+ tgt_lang_id = tokenizer.convert_tokens_to_ids(tgt_lang_code)
291
+
292
+ translated = model.generate(
293
+ **inputs,
294
+ forced_bos_token_id=tgt_lang_id,
295
+ max_length=512,
296
+ num_beams=5,
297
+ length_penalty=1.0,
298
+ no_repeat_ngram_size=3
299
+ )
300
+
301
+ translated_chunk = tokenizer.decode(translated[0], skip_special_tokens=True)
302
+ translated_text += translated_chunk + " "
303
+
304
+ return translated_text.strip()
305
+ except Exception as e:
306
+ return f"Error during translation: {str(e)}"
307
+
308
+ # Synchronous wrapper for translation
309
+ def translate_text(text, src_lang, tgt_lang, models):
310
+ loop = asyncio.new_event_loop()
311
+ asyncio.set_event_loop(loop)
312
+ try:
313
+ return loop.run_until_complete(translate_text_async(text, src_lang, tgt_lang, models))
314
+ finally:
315
+ loop.close()
316
 
 
317
  def save_text_to_file(text, original_filename, prefix="translated"):
318
+ try:
319
+ # Get the original file extension and base name
320
+ base_name = os.path.splitext(os.path.basename(original_filename))[0]
321
+ output_filename = f"{prefix}_{base_name}.txt"
322
+
323
+ # Save all translations as text files for simplicity and build speed
324
+ with open(output_filename, "w", encoding="utf-8") as f:
325
+ f.write(text)
326
+
327
+ return output_filename
328
+ except Exception as e:
329
+ st.error(f"Error saving file: {str(e)}")
330
+ return None
331
 
332
+ # Modified process_document function to handle multiple formats
333
  def process_document(file, source_lang, target_lang, models):
334
  try:
 
335
  text = extract_text(file)
 
 
336
  translated_text = translate_text(text, source_lang, target_lang, models)
337
 
 
338
  if translated_text.startswith("Error:"):
339
  output_file = save_text_to_file(translated_text, file.name, prefix="error")
340
  else:
341
  output_file = save_text_to_file(translated_text, file.name)
342
 
343
+ if output_file is None:
344
+ raise Exception("Failed to save output file")
345
+
346
  return output_file, translated_text
347
  except Exception as e:
348
  error_message = f"Error: {str(e)}"
349
  output_file = save_text_to_file(error_message, file.name, prefix="error")
350
  return output_file, error_message
351
 
352
+
353
+ # Modified main function to ensure proper language handling
354
  def main():
355
+ st.title("Document Translation Toolkit")
356
+
357
+ # Initialize models with error handling
 
358
  models = initialize_models()
359
+ if models is None:
360
+ st.error("Failed to initialize translation models. Please try again.")
361
+ return
362
 
363
+ # Create tabs for different translation modes
364
+ tab1, tab2 = st.tabs(["Document Translation", "Text Translation"])
365
 
366
+ # Document Translation Tab
367
+ with tab1:
368
+ st.subheader("Document Translation")
369
+ st.write("Upload a document (PDF, DOCX, TXT, or Image) and select languages.")
370
+
371
+ uploaded_file = st.file_uploader(
372
+ "Upload Document",
373
+ type=["pdf", "docx", "txt", "jpg", "jpeg", "png"],
374
+ key="doc_uploader"
375
+ )
376
+
377
+ col1, col2 = st.columns(2)
378
+ with col1:
379
+ source_lang = st.selectbox(
380
+ "Source Language",
381
+ ["English", "Hindi", "Marathi"],
382
+ index=0,
383
+ key="doc_src"
384
+ )
385
+ with col2:
386
+ target_lang = st.selectbox(
387
+ "Target Language",
388
+ ["English", "Hindi", "Marathi"],
389
+ index=1,
390
+ key="doc_tgt"
391
+ )
392
+
393
+ if uploaded_file is not None and st.button("Translate Document"):
394
+ try:
395
+ with st.spinner("Translating..."):
396
+ # Extract and show input text
397
+ input_text = extract_text(uploaded_file)
398
+ st.subheader("Input Text")
399
+ st.text_area("Original Text", input_text, height=200)
400
+
401
+ # Translate and show output text
402
+ output_file, result_text = process_document(
403
+ uploaded_file,
404
+ source_lang.lower(),
405
+ target_lang.lower(),
406
+ models
407
+ )
408
+
409
+ st.subheader("Translated Text")
410
+ st.text_area("Translation", result_text, height=200)
411
+
412
+ # Provide download button with correct MIME type
413
+ if output_file and os.path.exists(output_file):
414
+ with open(output_file, "rb") as file:
415
+ # Set appropriate MIME type based on file extension
416
+ ext = os.path.splitext(output_file)[1].lower()
417
+ mime_types = {
418
+ '.pdf': 'application/pdf',
419
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
420
+ '.txt': 'text/plain',
421
+ '.jpg': 'image/jpeg',
422
+ '.jpeg': 'image/jpeg',
423
+ '.png': 'image/png'
424
+ }
425
+ mime_type = mime_types.get(ext, 'text/plain')
426
+
427
+ st.download_button(
428
+ label="Download Translated Document",
429
+ data=file,
430
+ file_name=os.path.basename(output_file),
431
+ mime=mime_type
432
+ )
433
+ else:
434
+ st.error("Failed to generate output file")
435
+ except Exception as e:
436
+ st.error(f"An error occurred during translation: {str(e)}")
437
 
438
+ # Text Translation Tab
439
+ with tab2:
440
+ st.subheader("Text Translation")
441
+ st.write("Enter text directly for translation.")
442
+
443
+ col1, col2 = st.columns(2)
444
+ with col1:
445
+ text_source_lang = st.selectbox(
446
+ "Source Language",
447
+ ["English", "Hindi", "Marathi"],
448
+ index=0,
449
+ key="text_src"
450
+ )
451
+ with col2:
452
+ text_target_lang = st.selectbox(
453
+ "Target Language",
454
+ ["English", "Hindi", "Marathi"],
455
+ index=1,
456
+ key="text_tgt"
457
+ )
458
+
459
+ input_text = st.text_area("Enter text to translate", height=150)
460
+
461
+ if input_text and st.button("Translate Text"):
462
+ try:
463
+ with st.spinner("Translating..."):
464
+ # Translate the input text
465
+ translated_text = translate_text(
466
+ input_text,
467
+ text_source_lang.lower(),
468
+ text_target_lang.lower(),
469
+ models
470
+ )
471
+
472
+ # Show translation result
473
+ st.text_area("Translation", translated_text, height=150)
474
+
475
+ # Add download button for translated text
476
+ st.download_button(
477
+ label="Download Translation",
478
+ data=translated_text,
479
+ file_name="translation.txt",
480
+ mime="text/plain"
481
+ )
482
+ except Exception as e:
483
+ st.error(f"An error occurred during translation: {str(e)}")
484
 
485
  if __name__ == "__main__":
486
+ try:
487
+ main()
488
+ except Exception as e:
489
+ st.error(f"Application error: {str(e)}")