gauravchand11 commited on
Commit
35445ba
·
verified ·
1 Parent(s): 8981936

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +572 -0
app.py ADDED
@@ -0,0 +1,572 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
+ import gradio as gr
3
+ from PyPDF2 import PdfReader
4
+ import docx
5
+ import os
6
+ import re
7
+ import torch
8
+ from datetime import datetime
9
+ import pytz
10
+ from io import BytesIO
11
+ from docx import Document
12
+ import tempfile
13
+
14
+ # Load translation model
15
+ def load_translation_model():
16
+ try:
17
+ model_name = "facebook/nllb-200-distilled-600M"
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
20
+ return tokenizer, model
21
+ except Exception as e:
22
+ print(f"Error loading model: {str(e)}")
23
+ return None, None
24
+
25
+ # Initialize models
26
+ tokenizer, model = load_translation_model()
27
+ MODELS = {"nllb": (tokenizer, model)} if tokenizer and model else None
28
+
29
+ # Extract text from documents
30
+ def extract_text(file):
31
+ try:
32
+ if isinstance(file, str): # File path provided
33
+ ext = os.path.splitext(file)[1].lower()
34
+ else: # File object provided
35
+ ext = os.path.splitext(file.name)[1].lower()
36
+
37
+ if ext == ".pdf":
38
+ try:
39
+ # Create a BytesIO object to hold the file content
40
+ if isinstance(file, str):
41
+ with open(file, 'rb') as f:
42
+ file_content = BytesIO(f.read())
43
+ else:
44
+ file_content = BytesIO(file.read())
45
+
46
+ # Create PdfReader object from the BytesIO
47
+ reader = PdfReader(file_content)
48
+ text = ""
49
+ for page in reader.pages:
50
+ text += page.extract_text() + "\n"
51
+ return text.strip()
52
+ except Exception as e:
53
+ raise Exception(f"PDF extraction error: {str(e)}")
54
+ finally:
55
+ if 'file_content' in locals():
56
+ file_content.close()
57
+
58
+ elif ext == ".docx":
59
+ if isinstance(file, str):
60
+ doc = docx.Document(file)
61
+ else:
62
+ doc = docx.Document(file)
63
+ text = ""
64
+ for para in doc.paragraphs:
65
+ text += para.text + "\n"
66
+ return text.strip()
67
+
68
+ elif ext == ".txt":
69
+ if isinstance(file, str):
70
+ with open(file, 'r', encoding='utf-8') as f:
71
+ return f.read().strip()
72
+ else:
73
+ return file.read().decode("utf-8").strip()
74
+ else:
75
+ raise ValueError("Unsupported file format")
76
+ except Exception as e:
77
+ raise Exception(f"Error extracting text: {str(e)}")
78
+
79
+ # Preprocess idioms
80
+ def preprocess_idioms(text, src_lang, tgt_lang):
81
+ idiom_map = {}
82
+
83
+ if src_lang == "en" and tgt_lang == "hi":
84
+ idiom_map = {
85
+ "no piece of cake": "कोई आसान काम नहीं",
86
+ "piece of cake": "बहुत आसान काम",
87
+ "bite the bullet": "दांतों तले उंगली दबाना",
88
+ "tackle it head-on": "सीधे मुकाबला करना",
89
+ "fell into place": "सब कुछ ठीक हो गया",
90
+ "see the light at the end of the tunnel": "मुश्किलों के अंत में उम्मीद की किरण दिखना",
91
+ "with a little perseverance": "थोड़े से धैर्य से",
92
+
93
+ # Additional common idioms
94
+ "break a leg": "बहुत बहुत शुभकामनाएं",
95
+ "hit the nail on the head": "बिल्कुल सही बात कहना",
96
+ "once in a blue moon": "बहुत कम, कभी-कभार",
97
+ "under the weather": "तबीयत ठीक नहीं",
98
+ "cost an arm and a leg": "बहुत महंगा",
99
+ "beating around the bush": "इधर-उधर की बात करना",
100
+ "call it a day": "काम समाप्त करना",
101
+ "burn the midnight oil": "रात-रात भर जागकर काम करना",
102
+ "get the ball rolling": "शुरुआत करना",
103
+ "pull yourself together": "खुद को संभालो",
104
+ "shoot yourself in the foot": "अपना ही नुकसान करना",
105
+ "take it with a grain of salt": "संदेह से लेना",
106
+ "the last straw": "सहनशीलता की आखिरी सीमा",
107
+ "time flies": "समय पंख लगाकर उड़ता है",
108
+ "wrap your head around": "समझने की कोशिश करना",
109
+ "cut corners": "काम में छोटा रास्ता अपनाना",
110
+ "back to square one": "फिर से शुरू से",
111
+ "blessing in disguise": "छिपा हुआ वरदान",
112
+ "cry over spilled milk": "बीती बात पर पछताना",
113
+ "keep your chin up": "हिम्मत रखना",
114
+
115
+ # Work-related idioms
116
+ "think outside the box": "नए तरीके से सोचना",
117
+ "raise the bar": "मानक ऊंचा करना",
118
+ "learning curve": "सीखने की प्रक्रिया",
119
+ "up and running": "चालू और कार्यरत",
120
+ "back to the drawing board": "फिर से योजना बनाना",
121
+
122
+ # Project-related phrases
123
+ "running into issues": "समस्याओं का सामना करना",
124
+ "iron out the bugs": "खामियां दूर करना",
125
+ "in the pipeline": "विचाराधीन",
126
+ "moving forward": "आगे बढ़ते हुए",
127
+ "touch base": "संपर्क में रहना",
128
+
129
+ # Technical phrases
130
+ "user-friendly": "उपयोगकर्ता के अनुकूल",
131
+ "cutting-edge": "अत्याधुनिक",
132
+ "state of the art": "अत्याधुनिक तकनीक",
133
+ "proof of concept": "व्यवहार्यता का प्रमाण",
134
+ "game changer": "खेल बदलने वाला",
135
+
136
+ "a blessing in disguise": "छुपा हुआ वरदान",
137
+ "actions speak louder than words": "कर्म शब्दों से अधिक प्रभावी होते हैं",
138
+ "add fuel to the fire": "आग में घी डालना",
139
+ "barking up the wrong tree": "गलत दिशा में प्रयास करना",
140
+ "best of both worlds": "दोनों चीजों का लाभ",
141
+ "cut to the chase": "मुद्दे पर आना",
142
+ "don't judge a book by its cover": "किसी को उसके रूप से मत आंकिए",
143
+ "easy does it": "धीरे-धीरे करो",
144
+ "every cloud has a silver lining": "हर मुश्किल में आशा की किरण होती है",
145
+ "get a taste of your own medicine": "जैसा किया वैसा भुगतो",
146
+ "hit the sack": "सोने जाना",
147
+ "let the cat out of the bag": "राज़ खोल देना",
148
+ "miss the boat": "मौका चूक जाना",
149
+ "no pain no gain": "बिना मेहनत के कुछ नहीं मिलता",
150
+ "on the ball": "सचेत और सतर्क",
151
+ "pull the plug": "काम रोक देना",
152
+ "spill the beans": "राज़ खोलना",
153
+ "the ball is in your court": "अब निर्णय तुम्हारे हाथ में है",
154
+ "through thick and thin": "हर परिस्थिति में",
155
+ "you can't have your cake and eat it too": "दोनों फायदे एक साथ नहीं हो सकते"
156
+ }
157
+ elif src_lang == "en" and tgt_lang == "mr":
158
+ idiom_map = {
159
+ "no piece of cake": "सोपं काम नाही",
160
+ "piece of cake": "अतिशय सोपं काम",
161
+ "bite the bullet": "कठीण निर्णय घेणे",
162
+ "tackle it head-on": "समस्येला थेट सामोरे जाणे",
163
+ "fell into place": "सगळं व्यवस्थित झालं",
164
+ "see the light at the end of the tunnel": "अंधारातून उजेडाची किरण दिसणे",
165
+ "with a little perseverance": "थोड्या धीराने",
166
+ "break a leg": "खूप शुभेच्छा",
167
+ "hit the nail on the head": "अगदी बरोबर बोललात",
168
+ "once in a blue moon": "क्वचितच, कधीतरी",
169
+ "under the weather": "तब्येत ठीक नसणे",
170
+ "cost an arm and a leg": "खूप महाग",
171
+ "beating around the bush": "गोल गोल फिरवणे",
172
+ "call it a day": "दिवसाचं काम संपवणे",
173
+ "burn the midnight oil": "रात्रंदिवस मेहनत करणे",
174
+ "get the ball rolling": "सुरुवात करणे",
175
+ "pull yourself together": "स्वतःला सावरा",
176
+ "shoot yourself in the foot": "स्वतःचेच पाय स्वतः कापणे",
177
+ "take it with a grain of salt": "साशंक दृष्टीने पाहणे",
178
+ "the last straw": "सहनशक्तीची शेवटची मर्यादा",
179
+ "time flies": "वेळ पंख लावून उडतो",
180
+ "wrap your head around": "समजून घेण्याचा प्रयत्न करणे",
181
+ "cut corners": "कमी वेळात काम उरकणे",
182
+ "back to square one": "पुन्हा सुरुवातीला",
183
+ "blessing in disguise": "आशीर्वाद लपलेला",
184
+ "cry over spilled milk": "झालेल्या गोष्टीसाठी रडत बसणे",
185
+ "keep your chin up": "धीर धरा",
186
+
187
+ # Work-related idioms
188
+ "think outside the box": "वेगळ्या पद्धतीने विचार करणे",
189
+ "raise the bar": "पातळी उंचावणे",
190
+ "learning curve": "शिकण्याची प्रक्रिया",
191
+ "up and running": "सुरू आणि कार्यरत",
192
+ "back to the drawing board": "पुन्हा नव्याने योजना आखणे",
193
+
194
+ # Project-related phrases
195
+ "running into issues": "अडचणींना सामोरे जाणे",
196
+ "iron out the bugs": "त्रुटी दूर करणे",
197
+ "in the pipeline": "विचाराधीन",
198
+ "moving forward": "पुढे जाताना",
199
+ "touch base": "संपर्कात राहणे",
200
+
201
+ # Technical phrases
202
+ "user-friendly": "वापरकर्त्यास सोयीस्कर",
203
+ "cutting-edge": "अत्याधुनिक",
204
+ "state of the art": "सर्वोत्कृष्ट तंत्रज्ञान",
205
+ "proof of concept": "संकल्पनेची सिद्धता",
206
+ "game changer": "खेळ बदलणारी गोष्ट",
207
+
208
+ "a blessing in disguise": "छुपलेले वरदान",
209
+ "actions speak louder than words": "कृती शब्दांपेक्षा प्रभावी असतात",
210
+ "add fuel to the fire": "आग ला फुंकर घालणे",
211
+ "barking up the wrong tree": "चुकीच्या गोष्टीकडे लक्ष देणे",
212
+ "best of both worlds": "दोनही गोष्टींचा लाभ",
213
+ "cut to the chase": "थेट मुद्द्यावर येणे",
214
+ "don't judge a book by its cover": "फक्त बाह्यरूप पाहून अंदाज लावू नका",
215
+ "easy does it": "हळूहळू करा",
216
+ "every cloud has a silver lining": "प्रत्येक संकटात संधी असते",
217
+ "get a taste of your own medicine": "जसे कराल तसे भराल",
218
+ "hit the sack": "झोपायला जाणे",
219
+ "let the cat out of the bag": "गुपित उघड करणे",
220
+ "miss the boat": "संधी गमावणे",
221
+ "no pain no gain": "कष्टाशिवाय यश नाही",
222
+ "on the ball": "सतर्क असणे",
223
+ "pull the plug": "काम बंद करणे",
224
+ "spill the beans": "गुपित सांगणे",
225
+ "the ball is in your court": "निर्णय तुमच्या हाती आहे",
226
+ "through thick and thin": "संकटसमयीही साथ देणे",
227
+ "you can't have your cake and eat it too": "सगळं काही मिळवता येत नाही"
228
+ }
229
+
230
+ if idiom_map:
231
+ # Sort idioms by length (longest first) to handle overlapping phrases
232
+ sorted_idioms = sorted(idiom_map.keys(), key=len, reverse=True)
233
+ pattern = '|'.join(map(re.escape, sorted_idioms))
234
+
235
+ # Create a regex pattern and replace idioms
236
+ if pattern:
237
+ regex = re.compile(pattern, flags=re.IGNORECASE)
238
+ text = regex.sub(lambda m: idiom_map[m.group(0).lower()], text)
239
+
240
+ return text
241
+
242
+ # Translation function
243
+ def translate_text(text, src_lang, tgt_lang):
244
+ if src_lang == tgt_lang:
245
+ return text
246
+
247
+ lang_map = {"English": "eng_Latn", "Hindi": "hin_Deva", "Marathi": "mar_Deva"}
248
+ src_lang_code = lang_map.get(src_lang)
249
+ tgt_lang_code = lang_map.get(tgt_lang)
250
+
251
+ if not src_lang_code or not tgt_lang_code:
252
+ return "Error: Unsupported language combination"
253
+
254
+ try:
255
+ # First apply idiom preprocessing
256
+ preprocessed_text = preprocess_idioms(text, src_lang[:2].lower(), tgt_lang[:2].lower())
257
+ tokenizer, model = MODELS["nllb"]
258
+
259
+ chunks = []
260
+ current_chunk = ""
261
+
262
+ # Split text into manageable chunks
263
+ for sentence in re.split('([.!?।]+)', preprocessed_text):
264
+ if sentence.strip():
265
+ if len(current_chunk) + len(sentence) < 450:
266
+ current_chunk += sentence
267
+ else:
268
+ if current_chunk:
269
+ chunks.append(current_chunk)
270
+ current_chunk = sentence
271
+
272
+ if current_chunk:
273
+ chunks.append(current_chunk)
274
+
275
+ translated_text = ""
276
+
277
+ # Translate each chunk
278
+ for chunk in chunks:
279
+ if chunk.strip():
280
+ inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
281
+ tgt_lang_id = tokenizer.convert_tokens_to_ids(tgt_lang_code)
282
+
283
+ translated = model.generate(
284
+ **inputs,
285
+ forced_bos_token_id=tgt_lang_id,
286
+ max_length=512,
287
+ num_beams=5,
288
+ length_penalty=1.0,
289
+ no_repeat_ngram_size=3
290
+ )
291
+
292
+ translated_chunk = tokenizer.decode(translated[0], skip_special_tokens=True)
293
+ translated_text += translated_chunk + " "
294
+
295
+ return translated_text.strip()
296
+ except Exception as e:
297
+ return f"Error during translation: {str(e)}"
298
+
299
+ # Document translation function
300
+ def translate_document(file, source_lang, target_lang):
301
+ try:
302
+ if file is None:
303
+ return "Please upload a file", None
304
+
305
+ input_ext = os.path.splitext(file.name)[1].lower()
306
+ temp_dir = tempfile.gettempdir()
307
+
308
+ # Change output extension to .txt for PDF inputs
309
+ if input_ext == '.pdf':
310
+ output_filename = f"translated_{os.path.splitext(os.path.basename(file.name))[0]}.txt"
311
+ else:
312
+ output_filename = f"translated_{os.path.splitext(os.path.basename(file.name))[0]}{input_ext}"
313
+
314
+ output_path = os.path.join(temp_dir, output_filename)
315
+
316
+
317
+ if input_ext == '.pdf':
318
+ try:
319
+ # Create a BytesIO object for the PDF content
320
+ if isinstance(file, str):
321
+ with open(file, 'rb') as f:
322
+ file_content = BytesIO(f.read())
323
+ else:
324
+ file_content = BytesIO(file.read())
325
+
326
+ # Create PdfReader object
327
+ reader = PdfReader(file_content)
328
+ translated_pages = []
329
+
330
+
331
+
332
+ # Process each page while preserving structure
333
+ for page_num, page in enumerate(reader.pages, 1):
334
+ # Extract text from the page
335
+ page_text = page.extract_text()
336
+ if not page_text.strip():
337
+ continue
338
+
339
+
340
+ # Split into paragraphs while preserving structure
341
+ paragraphs = page_text.split('\n\n')
342
+ translated_paragraphs = []
343
+
344
+ for paragraph in paragraphs:
345
+ # Handle individual lines within paragraphs
346
+ lines = paragraph.split('\n')
347
+ translated_lines = []
348
+
349
+ for line in lines:
350
+ if line.strip():
351
+ translated_line = translate_text(line, source_lang, target_lang)
352
+ translated_lines.append(translated_line)
353
+ else:
354
+ translated_lines.append('') # Preserve empty lines
355
+
356
+ translated_paragraphs.append('\n'.join(translated_lines))
357
+
358
+ # Combine translated paragraphs with proper spacing
359
+ translated_pages.append('\n\n'.join(translated_paragraphs))
360
+
361
+ # Combine all translated pages
362
+ final_text = '\n\n'.join(translated_pages)
363
+
364
+ # Save as formatted txt file
365
+ with open(output_path, 'w', encoding='utf-8') as f:
366
+ f.write(final_text)
367
+
368
+ return final_text, output_path
369
+
370
+ except Exception as e:
371
+ raise Exception(f"PDF processing error: {str(e)}")
372
+ finally:
373
+ if 'file_content' in locals():
374
+ file_content.close()
375
+
376
+ elif input_ext == '.docx':
377
+ # Handle DOCX with formatting preservation
378
+ doc = Document(file)
379
+ new_doc = Document()
380
+
381
+ # Copy styles from original document
382
+ for style in doc.styles:
383
+ if style.name not in new_doc.styles:
384
+ new_doc.styles.add_style(
385
+ style.name,
386
+ style.type,
387
+ True if style.base_style else False
388
+ )
389
+
390
+ # Process each paragraph while preserving formatting
391
+ for para in doc.paragraphs:
392
+ if not para.text.strip():
393
+ # Preserve empty paragraphs
394
+ new_doc.add_paragraph()
395
+ continue
396
+
397
+ # Create new paragraph with same style
398
+ new_para = new_doc.add_paragraph(style=para.style.name if para.style else None)
399
+
400
+ # Buffer to collect text for translation
401
+ runs_buffer = []
402
+ formatting_map = []
403
+
404
+ # Collect text and formatting information
405
+ for run in para.runs:
406
+ if run.text.strip():
407
+ runs_buffer.append(run.text)
408
+ # Store formatting attributes
409
+ formatting_map.append({
410
+ 'bold': run.bold,
411
+ 'italic': run.italic,
412
+ 'underline': run.underline,
413
+ 'font_size': run.font.size if run.font.size else None,
414
+ 'font_name': run.font.name if run.font.name else None,
415
+ 'color': run.font.color.rgb if run.font.color and run.font.color.rgb else None
416
+ })
417
+
418
+ if runs_buffer:
419
+ # Translate the combined text
420
+ combined_text = " ".join(runs_buffer)
421
+ translated_text = translate_text(combined_text, source_lang, target_lang)
422
+
423
+ # Split translated text approximately matching original structure
424
+ translated_parts = translated_text.split()
425
+ avg_len = len(translated_parts) // len(formatting_map)
426
+
427
+ # Apply formatting to translated parts
428
+ current_index = 0
429
+ for i, format_info in enumerate(formatting_map):
430
+ # Calculate text chunk for this run
431
+ end_index = min(current_index + avg_len, len(translated_parts))
432
+ if i == len(formatting_map) - 1:
433
+ # Last run gets all remaining text
434
+ end_index = len(translated_parts)
435
+
436
+ chunk_text = " ".join(translated_parts[current_index:end_index])
437
+ current_index = end_index
438
+
439
+ # Create new run with preserved formatting
440
+ new_run = new_para.add_run(chunk_text + " ")
441
+ new_run.bold = format_info['bold']
442
+ new_run.italic = format_info['italic']
443
+ new_run.underline = format_info['underline']
444
+ if format_info['font_size']:
445
+ new_run.font.size = format_info['font_size']
446
+ if format_info['font_name']:
447
+ new_run.font.name = format_info['font_name']
448
+ if format_info['color']:
449
+ new_run.font.color.rgb = format_info['color']
450
+
451
+ # Save the formatted document
452
+ new_doc.save(output_path)
453
+
454
+ # Return both text content and file
455
+ text_content = "\n".join(para.text for para in new_doc.paragraphs if para.text.strip())
456
+ return text_content, output_path
457
+
458
+ elif input_ext == '.txt':
459
+ # Handle TXT with line formatting preservation
460
+ input_text = extract_text(file)
461
+ if not input_text:
462
+ return "Could not extract text from the document", None
463
+
464
+ # Split into paragraphs while preserving line breaks
465
+ paragraphs = input_text.split('\n\n')
466
+ translated_paragraphs = []
467
+
468
+ for paragraph in paragraphs:
469
+ # Handle individual lines within paragraphs
470
+ lines = paragraph.split('\n')
471
+ translated_lines = []
472
+
473
+ for line in lines:
474
+ if line.strip():
475
+ translated_line = translate_text(line, source_lang, target_lang)
476
+ translated_lines.append(translated_line)
477
+ else:
478
+ translated_lines.append('') # Preserve empty lines
479
+
480
+ translated_paragraphs.append('\n'.join(translated_lines))
481
+
482
+ # Combine translated paragraphs with double line breaks
483
+ final_text = '\n\n'.join(translated_paragraphs)
484
+
485
+ # Save as formatted txt file
486
+ with open(output_path, 'w', encoding='utf-8') as f:
487
+ f.write(final_text)
488
+
489
+ return final_text, output_path
490
+
491
+ else:
492
+ # For other file types, use the original translation logic
493
+ input_text = extract_text(file)
494
+ if input_text is None:
495
+ return "Could not extract text from the document", None
496
+
497
+ translated_text = translate_text(input_text, source_lang, target_lang)
498
+
499
+ with open(output_path, 'w', encoding='utf-8') as f:
500
+ f.write(translated_text)
501
+
502
+ return translated_text, output_path
503
+
504
+ except Exception as e:
505
+ return f"Error: {str(e)}", None
506
+
507
+ # Direct text translation function
508
+ def translate_text_direct(text, source_lang, target_lang):
509
+ if not text:
510
+ return "Please enter some text"
511
+ return translate_text(text, source_lang, target_lang)
512
+
513
+ # Get current time in UTC
514
+ def get_current_time():
515
+ utc_now = datetime.now(pytz.UTC)
516
+ return utc_now.strftime("%Y-%m-%d %H:%M:%S")
517
+
518
+ # Create Gradio interface
519
+ def create_interface():
520
+ # Add header with timestamp and user info
521
+ header = gr.Markdown(
522
+ f"""
523
+ # Document Translation Toolkit
524
+ *Current Date and Time (UTC):* {get_current_time()}
525
+ *Current User's Login:* gauravchand
526
+ """
527
+ )
528
+
529
+ # Document Translation Interface
530
+ doc_interface = gr.Interface(
531
+ fn=translate_document,
532
+ inputs=[
533
+ gr.File(label="Upload Document (PDF, DOCX, or TXT)"),
534
+ gr.Dropdown(choices=["English", "Hindi", "Marathi"], label="Source Language", value="English"),
535
+ gr.Dropdown(choices=["English", "Hindi", "Marathi"], label="Target Language", value="Hindi")
536
+ ],
537
+ outputs=[
538
+ gr.Textbox(label="Translation", lines=10),
539
+ gr.File(label="Download Translation")
540
+ ],
541
+ title="Document Translation",
542
+ description="Upload a document to translate"
543
+ )
544
+
545
+ # Text Translation Interface
546
+ text_interface = gr.Interface(
547
+ fn=translate_text_direct,
548
+ inputs=[
549
+ gr.Textbox(lines=5, label="Enter text to translate"),
550
+ gr.Dropdown(choices=["English", "Hindi", "Marathi"], label="Source Language", value="English"),
551
+ gr.Dropdown(choices=["English", "Hindi", "Marathi"], label="Target Language", value="Hindi")
552
+ ],
553
+ outputs=gr.Textbox(label="Translation", lines=5),
554
+ title="Text Translation",
555
+ description="Enter text directly to translate"
556
+ )
557
+
558
+ # Combine interfaces with header
559
+ demo = gr.Blocks()
560
+ with demo:
561
+ header.render()
562
+ gr.TabbedInterface(
563
+ [doc_interface, text_interface],
564
+ tab_names=["Document Translation", "Text Translation"]
565
+ )
566
+
567
+ return demo
568
+
569
+ # Launch the app
570
+ if __name__ == "__main__":
571
+ demo = create_interface()
572
+ demo.launch()