Spaces:

frankai98
/

Tokentesting

Sleeping

App Files Files Community

frankai98 commited on Mar 13

Commit

9c4bbfa

verified ·

1 Parent(s): 219393b

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -43

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ from transformers import pipeline
 from huggingface_hub import login
 from streamlit.components.v1 import html
 import pandas as pd
-import re
 # Retrieve the token from environment variables
 hf_token = os.environ.get("HF_TOKEN")
@@ -17,9 +16,7 @@ if not hf_token:
 # Login with the token
 login(token=hf_token)
-# Initialize session state for timer and results
-if 'result' not in st.session_state:
-    st.session_state.result = {}
 if 'timer_started' not in st.session_state:
     st.session_state.timer_started = False
 if 'timer_frozen' not in st.session_state:
@@ -51,55 +48,44 @@ def timer():
     </script>
     """
-st.set_page_config(page_title="Sentiment & Report Generator", page_icon="📝")
-st.header("Sentiment Analysis & Report Generation with Gemma")
-# Introduction for the Hugging Face interface
-st.write("""
-Welcome to the Sentiment Analysis & Report Generator app!
-This tool leverages Hugging Face’s models to analyze your text by scoring candidate documents based on a query.
-The input along with their scores is then used to generate a detailed report explaining key insights.
-You can either paste your query text directly into the text area and optionally upload a CSV file containing candidate documents.
-If no CSV is provided, the query text will be split into sentences to serve as candidate documents.
-""")
 # Load models with caching to avoid reloading on every run
 @st.cache_resource
 def load_models():
-    # Load the text-classification pipeline (acting as our scoring model).
-    sentiment_pipe = pipeline("text-classification", model="mixedbread-ai/mxbai-rerank-base-v1")
     # Load the Gemma text generation pipeline.
-    gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it")
-    return sentiment_pipe, gemma_pipe
-sentiment_pipe, gemma_pipe = load_models()
-# Input: Query text and file upload for candidate documents
-query_input = st.text_area("Enter your query text for analysis:")
-uploaded_file = st.file_uploader("Upload Candidate Documents (CSV format)", type=["csv"])
-# Prepare candidate documents
-candidate_docs = []
 if uploaded_file is not None:
     try:
         df = pd.read_csv(uploaded_file)
-        # Try to use a column named 'document'; if not present, take the first column.
-        if 'document' in df.columns:
-            candidate_docs = df['document'].dropna().astype(str).tolist()
-        else:
-            candidate_docs = df.iloc[:, 0].dropna().astype(str).tolist()
     except Exception as e:
         st.error(f"Error reading CSV file: {e}")
 else:
-    # If no CSV uploaded, split the query text into sentences as candidate documents.
-    if query_input.strip():
-        candidate_docs = re.split(r'(?<=[.!?])\s+', query_input.strip())
 if st.button("Generate Report"):
     if not query_input.strip():
         st.error("Please enter a query text!")
-    elif not candidate_docs:
-        st.error("No candidate documents available. Please enter text or upload a CSV file.")
     else:
         if not st.session_state.timer_started and not st.session_state.timer_frozen:
             st.session_state.timer_started = True
@@ -107,38 +93,36 @@ if st.button("Generate Report"):
         status_text = st.empty()
         progress_bar = st.progress(0)
         try:
-            # Stage 1: Score candidate documents without reranking.
             status_text.markdown("**🔍 Scoring candidate documents...**")
             progress_bar.progress(0)
-            # Create query-document pairs and score each pair.
             scored_docs = []
             for doc in candidate_docs:
                 combined_text = f"Query: {query_input} Document: {doc}"
-                result = sentiment_pipe(combined_text)[0]
-                # Append the document along with its score.
                 scored_docs.append((doc, result["score"]))
             progress_bar.progress(50)
-            # Stage 2: Generate Report using Gemma, using the scored candidate documents.
             status_text.markdown("**📝 Generating report with Gemma...**")
             prompt = f"""
 Generate a detailed report based on the following analysis.
 Query:
 "{query_input}"
-Candidate Documents with their scores:
 {scored_docs}
-Please provide a concise summary report explaining the insights derived from this analysis.
 """
             report = gemma_pipe(prompt, max_length=200)
             progress_bar.progress(100)
             status_text.success("**✅ Generation complete!**")
             html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
             st.session_state.timer_frozen = True
-            st.write("**Scored Candidate Documents:**", scored_docs)
             st.write("**Generated Report:**", report[0]['generated_text'])
         except Exception as e:
             html("<script>document.getElementById('timer').remove();</script>")
             status_text.error(f"**❌ Error:** {str(e)}")
-            progress_bar.empty()

 from huggingface_hub import login
 from streamlit.components.v1 import html
 import pandas as pd
 # Retrieve the token from environment variables
 hf_token = os.environ.get("HF_TOKEN")
 # Login with the token
 login(token=hf_token)
+# Initialize session state for timer
 if 'timer_started' not in st.session_state:
     st.session_state.timer_started = False
 if 'timer_frozen' not in st.session_state:
     </script>
     """
+st.set_page_config(page_title="Review Scorer & Report Generator", page_icon="📝")
+st.header("Review Scorer & Report Generator")
+# Concise introduction
+st.write("This model will score your reviews in your CSV file and generate a report based on those results.")
 # Load models with caching to avoid reloading on every run
 @st.cache_resource
 def load_models():
+    # Load the scoring model via pipeline.
+    score_pipe = pipeline("text-classification", model="mixedbread-ai/mxbai-rerank-base-v1")
     # Load the Gemma text generation pipeline.
+    gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it", use_auth_token=hf_token)
+    return score_pipe, gemma_pipe
+score_pipe, gemma_pipe = load_models()
+# Input: Query text for scoring and CSV file upload for candidate reviews
+query_input = st.text_area("Enter your query text for analysis (this does not need to be part of the CSV):")
+uploaded_file = st.file_uploader("Upload Reviews CSV File (must contain a 'document' column)", type=["csv"])
 if uploaded_file is not None:
     try:
         df = pd.read_csv(uploaded_file)
+        if 'document' not in df.columns:
+            st.error("CSV must contain a 'document' column.")
+            st.stop()
+        candidate_docs = df['document'].dropna().astype(str).tolist()
     except Exception as e:
         st.error(f"Error reading CSV file: {e}")
+        st.stop()
 else:
+    st.error("Please upload a CSV file.")
+    st.stop()
 if st.button("Generate Report"):
     if not query_input.strip():
         st.error("Please enter a query text!")
     else:
         if not st.session_state.timer_started and not st.session_state.timer_frozen:
             st.session_state.timer_started = True
         status_text = st.empty()
         progress_bar = st.progress(0)
         try:
+            # Stage 1: Score candidate documents using the provided query.
             status_text.markdown("**🔍 Scoring candidate documents...**")
             progress_bar.progress(0)
             scored_docs = []
             for doc in candidate_docs:
                 combined_text = f"Query: {query_input} Document: {doc}"
+                result = score_pipe(combined_text)[0]
                 scored_docs.append((doc, result["score"]))
             progress_bar.progress(50)
+            # Stage 2: Generate Report using Gemma, including query and scored results.
             status_text.markdown("**📝 Generating report with Gemma...**")
             prompt = f"""
 Generate a detailed report based on the following analysis.
 Query:
 "{query_input}"
+Candidate Reviews with their scores:
 {scored_docs}
+Please provide a concise summary report explaining the insights derived from these scores.
 """
             report = gemma_pipe(prompt, max_length=200)
             progress_bar.progress(100)
             status_text.success("**✅ Generation complete!**")
             html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
             st.session_state.timer_frozen = True
+            st.write("**Scored Candidate Reviews:**", scored_docs)
             st.write("**Generated Report:**", report[0]['generated_text'])
         except Exception as e:
             html("<script>document.getElementById('timer').remove();</script>")
             status_text.error(f"**❌ Error:** {str(e)}")
+            progress_bar.empty()