Spaces:

zurin14
/

Text_summarization

Runtime error

App Files Files Community

zurin14 commited on Mar 26

Commit

ab96885

verified ·

1 Parent(s): ceda26e

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -11

app.py CHANGED Viewed

@@ -18,45 +18,62 @@ def fetch_text_from_url(url):
     except Exception as e:
         return None, f"Error fetching URL: {e}"
 # Function to summarize text using T5
 def summarize_t5(text, size):
-    model_name = "t5-small"
     tokenizer = T5Tokenizer.from_pretrained(model_name)
     model = T5ForConditionalGeneration.from_pretrained(model_name)
     input_text = f"summarize: {text}"
     inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
     if size == "Short":
-        max_len = 50
     elif size == "Medium":
-        max_len = 100
     else:  # Long
-        max_len = 200
-    summary_ids = model.generate(inputs["input_ids"], max_length=max_len, min_length=10, length_penalty=2.0, num_beams=4)
     summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
     return summary
 # Function to summarize text using BART
 def summarize_bart(text, size):
-    model_name = "facebook/bart-large-cnn"
     tokenizer = BartTokenizer.from_pretrained(model_name)
     model = BartForConditionalGeneration.from_pretrained(model_name)
     inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
     if size == "Short":
-        max_len = 50
     elif size == "Medium":
-        max_len = 100
     else:  # Long
-        max_len = 200
-    summary_ids = model.generate(inputs["input_ids"], max_length=max_len, min_length=10, length_penalty=2.0, num_beams=4)
     summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
     return summary
 # Function to convert text to speech and save as a file
 def text_to_speech(text):
     tts = gtts.gTTS(text)

     except Exception as e:
         return None, f"Error fetching URL: {e}"
+# Function to summarize text using T5
 # Function to summarize text using T5
 def summarize_t5(text, size):
+    model_name = "C:\\Users\\zurin\\Desktop\\text summarization\\fine_tuned_t52"
     tokenizer = T5Tokenizer.from_pretrained(model_name)
     model = T5ForConditionalGeneration.from_pretrained(model_name)
     input_text = f"summarize: {text}"
     inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
+    # Define length parameters
     if size == "Short":
+        min_len, max_len = 30, 50
     elif size == "Medium":
+        min_len, max_len = 50, 100
     else:  # Long
+        min_len, max_len = 100, 200
+    summary_ids = model.generate(
+        inputs["input_ids"],
+        max_length=max_len,
+        min_length=min_len,  # Use the specified min_length instead of fixed 10
+        length_penalty=1.0,  # Reduced from 2.0 to allow more length variation
+        num_beams=4,
+        early_stopping=True
+    )
     summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
     return summary
 # Function to summarize text using BART
 def summarize_bart(text, size):
+    model_name = "C:\\Users\\zurin\\Desktop\\text summarization\\fine_tuned_bart"
     tokenizer = BartTokenizer.from_pretrained(model_name)
     model = BartForConditionalGeneration.from_pretrained(model_name)
     inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
+    # Define length parameters
     if size == "Short":
+        min_len, max_len = 30, 50
     elif size == "Medium":
+        min_len, max_len = 50, 100
     else:  # Long
+        min_len, max_len = 100, 200
+    summary_ids = model.generate(
+        inputs["input_ids"],
+        max_length=max_len,
+        min_length=min_len,
+        length_penalty=0.8,  # Reduced from 1.0 to encourage length variation
+        num_beams=6,
+        no_repeat_ngram_size=2,  # Added to prevent repetition
+        early_stopping=True
+    )
     summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
     return summary
 # Function to convert text to speech and save as a file
 def text_to_speech(text):
     tts = gtts.gTTS(text)