Spaces:

gauravchand11
/

try

Build error

App Files Files Community

gauravchand11 commited on Apr 6

Commit

f88b938

verified ·

1 Parent(s): 97c654f

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -15

app.py CHANGED Viewed

@@ -8,12 +8,13 @@ from pathlib import Path
 import tempfile
 from typing import Union, Tuple
 import os
-from datetime import datetime, timezone
 import sys
-# Display current information
-st.sidebar.text(f"Current Time (UTC): {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
-st.sidebar.text(f"User: {os.environ.get('USER', 'gauravchand')}")
 # Get Hugging Face token from environment variables
 HF_TOKEN = os.environ.get('HF_TOKEN')
@@ -37,7 +38,7 @@ MT5_LANG_CODES = {
 @st.cache_resource
 def load_models():
-    """Load and cache the translation, context interpretation, and grammar correction models."""
     try:
         # Set device
         device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -60,7 +61,6 @@ def load_models():
         nllb_tokenizer = AutoTokenizer.from_pretrained(
             "facebook/nllb-200-distilled-600M",
             token=HF_TOKEN,
-            src_lang="eng_Latn",  # Default source language
             trust_remote_code=True
         )
         nllb_model = AutoModelForSeq2SeqLM.from_pretrained(
@@ -98,7 +98,6 @@ def load_models():
         st.error("Detailed error information:")
         st.error(f"Python version: {sys.version}")
         st.error(f"PyTorch version: {torch.__version__}")
-        st.error(f"Transformers version: {transformers.__version__}")
         raise e
 def extract_text_from_file(uploaded_file) -> str:
@@ -171,6 +170,7 @@ def interpret_context(text: str, gemma_tuple: Tuple) -> str:
         outputs = model.generate(
             **inputs,
             max_length=512,
             temperature=0.3,
             pad_token_id=tokenizer.eos_token_id,
             num_return_sequences=1
@@ -191,18 +191,20 @@ def translate_text(text: str, source_lang: str, target_lang: str, nllb_tuple: Tu
     translated_batches = []
     for batch in batches:
-        # Set the source language for the tokenizer
-        tokenizer.src_lang = source_lang
-        # Prepare the input text
         inputs = tokenizer(batch, return_tensors="pt", max_length=512, truncation=True)
         inputs = {k: v.to(model.device) for k, v in inputs.items()}
-        # Generate translation with forced target language
         outputs = model.generate(
             **inputs,
-            forced_bos_token_id=tokenizer.get_lang_id(target_lang),
             max_length=512,
             temperature=0.7,
             num_beams=5,
             num_return_sequences=1
@@ -242,9 +244,9 @@ def correct_grammar(text: str, target_lang: str, mt5_tuple: Tuple) -> str:
             **inputs,
             max_length=512,
             num_beams=5,
             temperature=0.7,
             top_p=0.9,
-            do_sample=True,
             num_return_sequences=1
         )
@@ -309,7 +311,6 @@ def main():
             with st.spinner("Processing document..."):
                 # Extract text
                 text = extract_text_from_file(uploaded_file)
-                st.text_area("Extracted Text:", value=text, height=150)
                 # Interpret context
                 with st.spinner("Interpreting context..."):

 import tempfile
 from typing import Union, Tuple
 import os
 import sys
+from datetime import datetime, timezone
+# Display current information in sidebar
+st.sidebar.text(f"Current Date and Time (UTC):")
+st.sidebar.text(datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'))
+st.sidebar.text(f"Current User's Login: {os.environ.get('USER', 'gauravchand')}")
 # Get Hugging Face token from environment variables
 HF_TOKEN = os.environ.get('HF_TOKEN')
 @st.cache_resource
 def load_models():
+    """Load and cache the translation and context interpretation models."""
     try:
         # Set device
         device = "cuda" if torch.cuda.is_available() else "cpu"
         nllb_tokenizer = AutoTokenizer.from_pretrained(
             "facebook/nllb-200-distilled-600M",
             token=HF_TOKEN,
             trust_remote_code=True
         )
         nllb_model = AutoModelForSeq2SeqLM.from_pretrained(
         st.error("Detailed error information:")
         st.error(f"Python version: {sys.version}")
         st.error(f"PyTorch version: {torch.__version__}")
         raise e
 def extract_text_from_file(uploaded_file) -> str:
         outputs = model.generate(
             **inputs,
             max_length=512,
+            do_sample=True,
             temperature=0.3,
             pad_token_id=tokenizer.eos_token_id,
             num_return_sequences=1
     translated_batches = []
     for batch in batches:
+        # Prepare the input text with source language token
         inputs = tokenizer(batch, return_tensors="pt", max_length=512, truncation=True)
         inputs = {k: v.to(model.device) for k, v in inputs.items()}
+        # Get target language token ID
+        target_lang_token = f"___{target_lang}___"
+        target_lang_id = tokenizer.convert_tokens_to_ids(target_lang_token)
+        # Generate translation
         outputs = model.generate(
             **inputs,
+            forced_bos_token_id=target_lang_id,
             max_length=512,
+            do_sample=True,
             temperature=0.7,
             num_beams=5,
             num_return_sequences=1
             **inputs,
             max_length=512,
             num_beams=5,
+            do_sample=True,
             temperature=0.7,
             top_p=0.9,
             num_return_sequences=1
         )
             with st.spinner("Processing document..."):
                 # Extract text
                 text = extract_text_from_file(uploaded_file)
                 # Interpret context
                 with st.spinner("Interpreting context..."):