Spaces:

Dhanush4149
/

Text_summarization_using_llm

Sleeping

App Files Files Community

Dhanush4149 commited on 28 days ago

Commit

de13ccb

verified ·

1 Parent(s): 038a86f

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -76

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import streamlit as st
-from transformers import pipeline
 import traceback
 # Use Hugging Face Spaces' recommended persistent storage
@@ -16,72 +16,80 @@ def ensure_cache_dir():
     os.makedirs(CACHE_DIR, exist_ok=True)
     return CACHE_DIR
-def load_pipelines():
     """
-    Load summarization pipelines with persistent caching.
     Returns:
-        dict: Dictionary of model pipelines
     """
     try:
         # Ensure cache directory exists
         cache_dir = ensure_cache_dir()
-        # Define model paths within the cache directory
-        bart_cache = os.path.join(cache_dir, "bart-large-cnn")
-        t5_cache = os.path.join(cache_dir, "t5-large")
-        pegasus_cache = os.path.join(cache_dir, "pegasus-cnn_dailymail")
-        # Load pipelines with explicit cache directories
-        bart_pipeline = pipeline(
-            "summarization",
-            model="facebook/bart-large-cnn",
-            cache_dir=bart_cache
         )
-        t5_pipeline = pipeline(
-            "summarization",
-            model="t5-large",
-            cache_dir=t5_cache
-        )
-        pegasus_pipeline = pipeline(
-            "summarization",
-            model="google/pegasus-cnn_dailymail",
-            cache_dir=pegasus_cache
         )
-        return {
-            'BART': bart_pipeline,
-            'T5': t5_pipeline,
-            'Pegasus': pegasus_pipeline
-        }
     except Exception as e:
-        st.error(f"Error loading models: {str(e)}")
         st.error(traceback.format_exc())
-        return {}
-def generate_summary(pipeline, text, model_name):
     """
-    Generate summary for a specific model with error handling.
     Args:
-        pipeline: Hugging Face summarization pipeline
         text (str): Input text to summarize
-        model_name (str): Name of the model
     Returns:
-        str: Generated summary or error message
     """
     try:
-        prompt = "Summarize the below paragraph"
-        summary = pipeline(f"{prompt}\n{text}",
-                           max_length=150,
-                           min_length=50,
-                           length_penalty=2.0,
-                           num_beams=4,
-                           early_stopping=True)[0]['summary_text']
         return summary
     except Exception as e:
-        error_msg = f"Error in {model_name} summarization: {str(e)}"
         st.error(error_msg)
         return error_msg
@@ -91,6 +99,13 @@ def main():
     # Display cache directory info (optional)
     st.info(f"Models will be cached in: {CACHE_DIR}")
     # Text input
     text_input = st.text_area("Enter text to summarize:")
@@ -100,44 +115,33 @@ def main():
             st.error("Please enter text to summarize.")
             return
-        # Load pipelines
-        pipelines = load_pipelines()
-        if not pipelines:
-            st.error("Failed to load models. Please check your internet connection or try again later.")
-            return
         # Create columns for progressive display
         bart_col, t5_col, pegasus_col = st.columns(3)
-        # BART Summary
-        with bart_col:
-            with st.spinner('Generating BART Summary...'):
-                bart_progress = st.progress(0)
-                bart_progress.progress(50)
-                bart_summary = generate_summary(pipelines['BART'], text_input, 'BART')
-                bart_progress.progress(100)
-                st.subheader("BART Summary")
-                st.write(bart_summary)
-        # T5 Summary
-        with t5_col:
-            with st.spinner('Generating T5 Summary...'):
-                t5_progress = st.progress(0)
-                t5_progress.progress(50)
-                t5_summary = generate_summary(pipelines['T5'], text_input, 'T5')
-                t5_progress.progress(100)
-                st.subheader("T5 Summary")
-                st.write(t5_summary)
-        # Pegasus Summary
-        with pegasus_col:
-            with st.spinner('Generating Pegasus Summary...'):
-                pegasus_progress = st.progress(0)
-                pegasus_progress.progress(50)
-                pegasus_summary = generate_summary(pipelines['Pegasus'], text_input, 'Pegasus')
-                pegasus_progress.progress(100)
-                st.subheader("Pegasus Summary")
-                st.write(pegasus_summary)
 if __name__ == "__main__":
     main()

 import os
 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import traceback
 # Use Hugging Face Spaces' recommended persistent storage
     os.makedirs(CACHE_DIR, exist_ok=True)
     return CACHE_DIR
+def load_model_and_tokenizer(model_name):
     """
+    Load model and tokenizer with persistent caching.
+    Args:
+        model_name (str): Name of the model to load
     Returns:
+        tuple: (model, tokenizer)
     """
     try:
         # Ensure cache directory exists
         cache_dir = ensure_cache_dir()
+        # Construct full cache path for this model
+        model_cache_path = os.path.join(cache_dir, model_name.replace('/', '_'))
+        # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            cache_dir=model_cache_path
         )
+        # Load model
+        model = AutoModelForSeq2SeqLM.from_pretrained(
+            model_name,
+            cache_dir=model_cache_path
         )
+        return model, tokenizer
     except Exception as e:
+        st.error(f"Error loading {model_name}: {str(e)}")
         st.error(traceback.format_exc())
+        return None, None
+def generate_summary(model, tokenizer, text, max_length=150):
     """
+    Generate summary using a specific model and tokenizer.
     Args:
+        model: Hugging Face model
+        tokenizer: Hugging Face tokenizer
         text (str): Input text to summarize
+        max_length (int): Maximum length of summary
     Returns:
+        str: Generated summary
     """
     try:
+        # Prepare input
+        inputs = tokenizer(
+            f"summarize: {text}",
+            max_length=512,
+            return_tensors="pt",
+            truncation=True
+        )
+        # Generate summary
+        summary_ids = model.generate(
+            inputs.input_ids,
+            num_beams=4,
+            max_length=max_length,
+            early_stopping=True
+        )
+        # Decode summary
+        summary = tokenizer.decode(
+            summary_ids[0],
+            skip_special_tokens=True
+        )
         return summary
     except Exception as e:
+        error_msg = f"Error in summarization: {str(e)}"
         st.error(error_msg)
         return error_msg
     # Display cache directory info (optional)
     st.info(f"Models will be cached in: {CACHE_DIR}")
+    # Define models
+    models_to_load = {
+        'BART': 'facebook/bart-large-cnn',
+        'T5': 't5-large',
+        'Pegasus': 'google/pegasus-cnn_dailymail'
+    }
     # Text input
     text_input = st.text_area("Enter text to summarize:")
             st.error("Please enter text to summarize.")
             return
         # Create columns for progressive display
         bart_col, t5_col, pegasus_col = st.columns(3)
+        # Function to process each model
+        def process_model(col, model_name, model_path):
+            with col:
+                with st.spinner(f'Generating {model_name} Summary...'):
+                    progress = st.progress(0)
+                    progress.progress(50)
+                    # Load model and tokenizer
+                    model, tokenizer = load_model_and_tokenizer(model_path)
+                    if model and tokenizer:
+                        # Generate summary
+                        summary = generate_summary(model, tokenizer, text_input)
+                        progress.progress(100)
+                        st.subheader(f"{model_name} Summary")
+                        st.write(summary)
+                    else:
+                        st.error(f"Failed to load {model_name} model")
+        # Process each model
+        process_model(bart_col, 'BART', 'facebook/bart-large-cnn')
+        process_model(t5_col, 'T5', 't5-large')
+        process_model(pegasus_col, 'Pegasus', 'google/pegasus-cnn_dailymail')
 if __name__ == "__main__":
     main()