Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ from transformers import pipeline
|
|
6 |
from huggingface_hub import login
|
7 |
from streamlit.components.v1 import html
|
8 |
import pandas as pd
|
|
|
9 |
|
10 |
# Retrieve the token from environment variables
|
11 |
hf_token = os.environ.get("HF_TOKEN")
|
@@ -56,40 +57,49 @@ st.header("Sentiment Analysis & Report Generation with Gemma")
|
|
56 |
# Introduction for the Hugging Face interface
|
57 |
st.write("""
|
58 |
Welcome to the Sentiment Analysis & Report Generator app!
|
59 |
-
This tool leverages Hugging Face’s models to analyze your text
|
60 |
-
|
|
|
|
|
61 |
""")
|
62 |
|
63 |
# Load models with caching to avoid reloading on every run
|
64 |
@st.cache_resource
|
65 |
def load_models():
|
66 |
-
# Load the
|
67 |
-
sentiment_pipe = pipeline("text-classification", model="
|
68 |
# Load the Gemma text generation pipeline.
|
69 |
-
gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it")
|
70 |
return sentiment_pipe, gemma_pipe
|
71 |
|
72 |
sentiment_pipe, gemma_pipe = load_models()
|
73 |
|
74 |
-
#
|
75 |
-
|
76 |
-
|
77 |
|
|
|
|
|
78 |
if uploaded_file is not None:
|
79 |
try:
|
80 |
-
# Read the CSV file; if a column named 'review' exists, use it.
|
81 |
df = pd.read_csv(uploaded_file)
|
82 |
-
|
83 |
-
|
|
|
84 |
else:
|
85 |
-
|
86 |
-
user_input = " ".join(df.iloc[:, 0].astype(str).tolist())
|
87 |
except Exception as e:
|
88 |
-
st.error(f"Error reading file: {e}")
|
|
|
|
|
|
|
|
|
89 |
|
90 |
if st.button("Generate Report"):
|
91 |
-
if not
|
92 |
-
st.error("Please enter
|
|
|
|
|
93 |
else:
|
94 |
if not st.session_state.timer_started and not st.session_state.timer_frozen:
|
95 |
st.session_state.timer_started = True
|
@@ -97,20 +107,24 @@ if st.button("Generate Report"):
|
|
97 |
status_text = st.empty()
|
98 |
progress_bar = st.progress(0)
|
99 |
try:
|
100 |
-
# Stage 1: Reranking analysis using the
|
101 |
status_text.markdown("**🔍 Running reranking analysis...**")
|
102 |
progress_bar.progress(0)
|
103 |
-
|
|
|
|
|
104 |
progress_bar.progress(50)
|
105 |
|
106 |
-
# Stage 2: Generate Report using Gemma, using the
|
107 |
status_text.markdown("**📝 Generating report with Gemma...**")
|
108 |
prompt = f"""
|
109 |
Generate a detailed report based on the following analysis.
|
110 |
-
|
111 |
-
"{
|
112 |
-
|
113 |
-
{
|
|
|
|
|
114 |
Please provide a concise summary report explaining the insights derived from this analysis.
|
115 |
"""
|
116 |
report = gemma_pipe(prompt, max_length=200)
|
@@ -118,7 +132,7 @@ Please provide a concise summary report explaining the insights derived from thi
|
|
118 |
status_text.success("**✅ Generation complete!**")
|
119 |
html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
|
120 |
st.session_state.timer_frozen = True
|
121 |
-
st.write("**Reranking Analysis Result:**",
|
122 |
st.write("**Generated Report:**", report[0]['generated_text'])
|
123 |
except Exception as e:
|
124 |
html("<script>document.getElementById('timer').remove();</script>")
|
|
|
6 |
from huggingface_hub import login
|
7 |
from streamlit.components.v1 import html
|
8 |
import pandas as pd
|
9 |
+
import re
|
10 |
|
11 |
# Retrieve the token from environment variables
|
12 |
hf_token = os.environ.get("HF_TOKEN")
|
|
|
57 |
# Introduction for the Hugging Face interface
|
58 |
st.write("""
|
59 |
Welcome to the Sentiment Analysis & Report Generator app!
|
60 |
+
This tool leverages Hugging Face’s models to analyze your text by reranking candidate documents based on a query,
|
61 |
+
and then generates a detailed report explaining key insights.
|
62 |
+
You can either paste your query text directly into the text area and optionally upload a CSV file containing candidate documents.
|
63 |
+
If no CSV is provided, the query text will be split into sentences to serve as candidate documents.
|
64 |
""")
|
65 |
|
66 |
# Load models with caching to avoid reloading on every run
|
67 |
@st.cache_resource
|
68 |
def load_models():
|
69 |
+
# Load the reranker model via pipeline.
|
70 |
+
sentiment_pipe = pipeline("text-classification", model="mixedbread-ai/mxbai-rerank-base-v1")
|
71 |
# Load the Gemma text generation pipeline.
|
72 |
+
gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it", use_auth_token=hf_token)
|
73 |
return sentiment_pipe, gemma_pipe
|
74 |
|
75 |
sentiment_pipe, gemma_pipe = load_models()
|
76 |
|
77 |
+
# Input: Query text and file upload for candidate documents
|
78 |
+
query_input = st.text_area("Enter your query text for analysis:")
|
79 |
+
uploaded_file = st.file_uploader("Upload Candidate Documents (CSV format)", type=["csv"])
|
80 |
|
81 |
+
# Prepare candidate documents
|
82 |
+
candidate_docs = []
|
83 |
if uploaded_file is not None:
|
84 |
try:
|
|
|
85 |
df = pd.read_csv(uploaded_file)
|
86 |
+
# Try to use a column named 'document'; if not present, take the first column.
|
87 |
+
if 'document' in df.columns:
|
88 |
+
candidate_docs = df['document'].dropna().astype(str).tolist()
|
89 |
else:
|
90 |
+
candidate_docs = df.iloc[:, 0].dropna().astype(str).tolist()
|
|
|
91 |
except Exception as e:
|
92 |
+
st.error(f"Error reading CSV file: {e}")
|
93 |
+
else:
|
94 |
+
# If no CSV uploaded, split the query text into sentences as candidate documents.
|
95 |
+
if query_input.strip():
|
96 |
+
candidate_docs = re.split(r'(?<=[.!?]) +', query_input.strip())
|
97 |
|
98 |
if st.button("Generate Report"):
|
99 |
+
if not query_input.strip():
|
100 |
+
st.error("Please enter a query text!")
|
101 |
+
elif not candidate_docs:
|
102 |
+
st.error("No candidate documents available. Please enter text or upload a CSV file.")
|
103 |
else:
|
104 |
if not st.session_state.timer_started and not st.session_state.timer_frozen:
|
105 |
st.session_state.timer_started = True
|
|
|
107 |
status_text = st.empty()
|
108 |
progress_bar = st.progress(0)
|
109 |
try:
|
110 |
+
# Stage 1: Reranking analysis using the model's rank method.
|
111 |
status_text.markdown("**🔍 Running reranking analysis...**")
|
112 |
progress_bar.progress(0)
|
113 |
+
# Use the pipeline's underlying model to rank candidate documents with the given query.
|
114 |
+
# Note: We access the model via sentiment_pipe.model.
|
115 |
+
results = sentiment_pipe.model.rank(query_input, candidate_docs, return_documents=True, top_k=3)
|
116 |
progress_bar.progress(50)
|
117 |
|
118 |
+
# Stage 2: Generate Report using Gemma, using the reranking result.
|
119 |
status_text.markdown("**📝 Generating report with Gemma...**")
|
120 |
prompt = f"""
|
121 |
Generate a detailed report based on the following analysis.
|
122 |
+
Query:
|
123 |
+
"{query_input}"
|
124 |
+
Candidate Documents:
|
125 |
+
{candidate_docs}
|
126 |
+
Reranking Analysis Result (Top 3):
|
127 |
+
{results}
|
128 |
Please provide a concise summary report explaining the insights derived from this analysis.
|
129 |
"""
|
130 |
report = gemma_pipe(prompt, max_length=200)
|
|
|
132 |
status_text.success("**✅ Generation complete!**")
|
133 |
html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
|
134 |
st.session_state.timer_frozen = True
|
135 |
+
st.write("**Reranking Analysis Result:**", results)
|
136 |
st.write("**Generated Report:**", report[0]['generated_text'])
|
137 |
except Exception as e:
|
138 |
html("<script>document.getElementById('timer').remove();</script>")
|