Spaces:

frankai98
/

Tokentesting

Sleeping

App Files Files Community

frankai98 commited on Mar 13

Commit

3899916

verified ·

1 Parent(s): f9bd764

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -24

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from transformers import pipeline
 from huggingface_hub import login
 from streamlit.components.v1 import html
 import pandas as pd
 # Retrieve the token from environment variables
 hf_token = os.environ.get("HF_TOKEN")
@@ -56,40 +57,49 @@ st.header("Sentiment Analysis & Report Generation with Gemma")
 # Introduction for the Hugging Face interface
 st.write("""
 Welcome to the Sentiment Analysis & Report Generator app!
-This tool leverages Hugging Face’s models to analyze your text and generate a detailed report explaining key insights.
-You can either paste your review text directly into the text area or upload a CSV file containing your reviews.
 """)
 # Load models with caching to avoid reloading on every run
 @st.cache_resource
 def load_models():
-    # Load the "reranker" model via pipeline.
-    sentiment_pipe = pipeline("text-classification", model="Alibaba-NLP/gte-multilingual-reranker-base", trust_remote_code=True)
     # Load the Gemma text generation pipeline.
-    gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it")
     return sentiment_pipe, gemma_pipe
 sentiment_pipe, gemma_pipe = load_models()
-# Provide two options for input: file upload (CSV) or text area
-uploaded_file = st.file_uploader("Upload Review File (CSV format)", type=["csv"])
-user_input = st.text_area("Or, enter your text for analysis and report generation:")
 if uploaded_file is not None:
     try:
-        # Read the CSV file; if a column named 'review' exists, use it.
         df = pd.read_csv(uploaded_file)
-        if 'review' in df.columns:
-            user_input = " ".join(df['review'].astype(str).tolist())
         else:
-            # Otherwise, join all text from the first column.
-            user_input = " ".join(df.iloc[:, 0].astype(str).tolist())
     except Exception as e:
-        st.error(f"Error reading file: {e}")
 if st.button("Generate Report"):
-    if not user_input.strip():
-        st.error("Please enter some text!")
     else:
         if not st.session_state.timer_started and not st.session_state.timer_frozen:
             st.session_state.timer_started = True
@@ -97,20 +107,24 @@ if st.button("Generate Report"):
         status_text = st.empty()
         progress_bar = st.progress(0)
         try:
-            # Stage 1: Reranking analysis using the sentiment pipeline
             status_text.markdown("**🔍 Running reranking analysis...**")
             progress_bar.progress(0)
-            rerank_result = sentiment_pipe(user_input)
             progress_bar.progress(50)
-            # Stage 2: Generate Report using Gemma, using the rerank result
             status_text.markdown("**📝 Generating report with Gemma...**")
             prompt = f"""
 Generate a detailed report based on the following analysis.
-Original text:
-"{user_input}"
-Reranking analysis result:
-{rerank_result}
 Please provide a concise summary report explaining the insights derived from this analysis.
 """
             report = gemma_pipe(prompt, max_length=200)
@@ -118,7 +132,7 @@ Please provide a concise summary report explaining the insights derived from thi
             status_text.success("**✅ Generation complete!**")
             html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
             st.session_state.timer_frozen = True
-            st.write("**Reranking Analysis Result:**", rerank_result)
             st.write("**Generated Report:**", report[0]['generated_text'])
         except Exception as e:
             html("<script>document.getElementById('timer').remove();</script>")

 from huggingface_hub import login
 from streamlit.components.v1 import html
 import pandas as pd
+import re
 # Retrieve the token from environment variables
 hf_token = os.environ.get("HF_TOKEN")
 # Introduction for the Hugging Face interface
 st.write("""
 Welcome to the Sentiment Analysis & Report Generator app!
+This tool leverages Hugging Face’s models to analyze your text by reranking candidate documents based on a query,
+and then generates a detailed report explaining key insights.
+You can either paste your query text directly into the text area and optionally upload a CSV file containing candidate documents.
+If no CSV is provided, the query text will be split into sentences to serve as candidate documents.
 """)
 # Load models with caching to avoid reloading on every run
 @st.cache_resource
 def load_models():
+    # Load the reranker model via pipeline.
+    sentiment_pipe = pipeline("text-classification", model="mixedbread-ai/mxbai-rerank-base-v1")
     # Load the Gemma text generation pipeline.
+    gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it", use_auth_token=hf_token)
     return sentiment_pipe, gemma_pipe
 sentiment_pipe, gemma_pipe = load_models()
+# Input: Query text and file upload for candidate documents
+query_input = st.text_area("Enter your query text for analysis:")
+uploaded_file = st.file_uploader("Upload Candidate Documents (CSV format)", type=["csv"])
+# Prepare candidate documents
+candidate_docs = []
 if uploaded_file is not None:
     try:
         df = pd.read_csv(uploaded_file)
+        # Try to use a column named 'document'; if not present, take the first column.
+        if 'document' in df.columns:
+            candidate_docs = df['document'].dropna().astype(str).tolist()
         else:
+            candidate_docs = df.iloc[:, 0].dropna().astype(str).tolist()
     except Exception as e:
+        st.error(f"Error reading CSV file: {e}")
+else:
+    # If no CSV uploaded, split the query text into sentences as candidate documents.
+    if query_input.strip():
+        candidate_docs = re.split(r'(?<=[.!?]) +', query_input.strip())
 if st.button("Generate Report"):
+    if not query_input.strip():
+        st.error("Please enter a query text!")
+    elif not candidate_docs:
+        st.error("No candidate documents available. Please enter text or upload a CSV file.")
     else:
         if not st.session_state.timer_started and not st.session_state.timer_frozen:
             st.session_state.timer_started = True
         status_text = st.empty()
         progress_bar = st.progress(0)
         try:
+            # Stage 1: Reranking analysis using the model's rank method.
             status_text.markdown("**🔍 Running reranking analysis...**")
             progress_bar.progress(0)
+            # Use the pipeline's underlying model to rank candidate documents with the given query.
+            # Note: We access the model via sentiment_pipe.model.
+            results = sentiment_pipe.model.rank(query_input, candidate_docs, return_documents=True, top_k=3)
             progress_bar.progress(50)
+            # Stage 2: Generate Report using Gemma, using the reranking result.
             status_text.markdown("**📝 Generating report with Gemma...**")
             prompt = f"""
 Generate a detailed report based on the following analysis.
+Query:
+"{query_input}"
+Candidate Documents:
+{candidate_docs}
+Reranking Analysis Result (Top 3):
+{results}
 Please provide a concise summary report explaining the insights derived from this analysis.
 """
             report = gemma_pipe(prompt, max_length=200)
             status_text.success("**✅ Generation complete!**")
             html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
             st.session_state.timer_frozen = True
+            st.write("**Reranking Analysis Result:**", results)
             st.write("**Generated Report:**", report[0]['generated_text'])
         except Exception as e:
             html("<script>document.getElementById('timer').remove();</script>")