frankai98 commited on
Commit
3899916
·
verified ·
1 Parent(s): f9bd764

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -24
app.py CHANGED
@@ -6,6 +6,7 @@ from transformers import pipeline
6
  from huggingface_hub import login
7
  from streamlit.components.v1 import html
8
  import pandas as pd
 
9
 
10
  # Retrieve the token from environment variables
11
  hf_token = os.environ.get("HF_TOKEN")
@@ -56,40 +57,49 @@ st.header("Sentiment Analysis & Report Generation with Gemma")
56
  # Introduction for the Hugging Face interface
57
  st.write("""
58
  Welcome to the Sentiment Analysis & Report Generator app!
59
- This tool leverages Hugging Face’s models to analyze your text and generate a detailed report explaining key insights.
60
- You can either paste your review text directly into the text area or upload a CSV file containing your reviews.
 
 
61
  """)
62
 
63
  # Load models with caching to avoid reloading on every run
64
  @st.cache_resource
65
  def load_models():
66
- # Load the "reranker" model via pipeline.
67
- sentiment_pipe = pipeline("text-classification", model="Alibaba-NLP/gte-multilingual-reranker-base", trust_remote_code=True)
68
  # Load the Gemma text generation pipeline.
69
- gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it")
70
  return sentiment_pipe, gemma_pipe
71
 
72
  sentiment_pipe, gemma_pipe = load_models()
73
 
74
- # Provide two options for input: file upload (CSV) or text area
75
- uploaded_file = st.file_uploader("Upload Review File (CSV format)", type=["csv"])
76
- user_input = st.text_area("Or, enter your text for analysis and report generation:")
77
 
 
 
78
  if uploaded_file is not None:
79
  try:
80
- # Read the CSV file; if a column named 'review' exists, use it.
81
  df = pd.read_csv(uploaded_file)
82
- if 'review' in df.columns:
83
- user_input = " ".join(df['review'].astype(str).tolist())
 
84
  else:
85
- # Otherwise, join all text from the first column.
86
- user_input = " ".join(df.iloc[:, 0].astype(str).tolist())
87
  except Exception as e:
88
- st.error(f"Error reading file: {e}")
 
 
 
 
89
 
90
  if st.button("Generate Report"):
91
- if not user_input.strip():
92
- st.error("Please enter some text!")
 
 
93
  else:
94
  if not st.session_state.timer_started and not st.session_state.timer_frozen:
95
  st.session_state.timer_started = True
@@ -97,20 +107,24 @@ if st.button("Generate Report"):
97
  status_text = st.empty()
98
  progress_bar = st.progress(0)
99
  try:
100
- # Stage 1: Reranking analysis using the sentiment pipeline
101
  status_text.markdown("**🔍 Running reranking analysis...**")
102
  progress_bar.progress(0)
103
- rerank_result = sentiment_pipe(user_input)
 
 
104
  progress_bar.progress(50)
105
 
106
- # Stage 2: Generate Report using Gemma, using the rerank result
107
  status_text.markdown("**📝 Generating report with Gemma...**")
108
  prompt = f"""
109
  Generate a detailed report based on the following analysis.
110
- Original text:
111
- "{user_input}"
112
- Reranking analysis result:
113
- {rerank_result}
 
 
114
  Please provide a concise summary report explaining the insights derived from this analysis.
115
  """
116
  report = gemma_pipe(prompt, max_length=200)
@@ -118,7 +132,7 @@ Please provide a concise summary report explaining the insights derived from thi
118
  status_text.success("**✅ Generation complete!**")
119
  html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
120
  st.session_state.timer_frozen = True
121
- st.write("**Reranking Analysis Result:**", rerank_result)
122
  st.write("**Generated Report:**", report[0]['generated_text'])
123
  except Exception as e:
124
  html("<script>document.getElementById('timer').remove();</script>")
 
6
  from huggingface_hub import login
7
  from streamlit.components.v1 import html
8
  import pandas as pd
9
+ import re
10
 
11
  # Retrieve the token from environment variables
12
  hf_token = os.environ.get("HF_TOKEN")
 
57
  # Introduction for the Hugging Face interface
58
  st.write("""
59
  Welcome to the Sentiment Analysis & Report Generator app!
60
+ This tool leverages Hugging Face’s models to analyze your text by reranking candidate documents based on a query,
61
+ and then generates a detailed report explaining key insights.
62
+ You can either paste your query text directly into the text area and optionally upload a CSV file containing candidate documents.
63
+ If no CSV is provided, the query text will be split into sentences to serve as candidate documents.
64
  """)
65
 
66
  # Load models with caching to avoid reloading on every run
67
  @st.cache_resource
68
  def load_models():
69
+ # Load the reranker model via pipeline.
70
+ sentiment_pipe = pipeline("text-classification", model="mixedbread-ai/mxbai-rerank-base-v1")
71
  # Load the Gemma text generation pipeline.
72
+ gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it", use_auth_token=hf_token)
73
  return sentiment_pipe, gemma_pipe
74
 
75
  sentiment_pipe, gemma_pipe = load_models()
76
 
77
+ # Input: Query text and file upload for candidate documents
78
+ query_input = st.text_area("Enter your query text for analysis:")
79
+ uploaded_file = st.file_uploader("Upload Candidate Documents (CSV format)", type=["csv"])
80
 
81
+ # Prepare candidate documents
82
+ candidate_docs = []
83
  if uploaded_file is not None:
84
  try:
 
85
  df = pd.read_csv(uploaded_file)
86
+ # Try to use a column named 'document'; if not present, take the first column.
87
+ if 'document' in df.columns:
88
+ candidate_docs = df['document'].dropna().astype(str).tolist()
89
  else:
90
+ candidate_docs = df.iloc[:, 0].dropna().astype(str).tolist()
 
91
  except Exception as e:
92
+ st.error(f"Error reading CSV file: {e}")
93
+ else:
94
+ # If no CSV uploaded, split the query text into sentences as candidate documents.
95
+ if query_input.strip():
96
+ candidate_docs = re.split(r'(?<=[.!?]) +', query_input.strip())
97
 
98
  if st.button("Generate Report"):
99
+ if not query_input.strip():
100
+ st.error("Please enter a query text!")
101
+ elif not candidate_docs:
102
+ st.error("No candidate documents available. Please enter text or upload a CSV file.")
103
  else:
104
  if not st.session_state.timer_started and not st.session_state.timer_frozen:
105
  st.session_state.timer_started = True
 
107
  status_text = st.empty()
108
  progress_bar = st.progress(0)
109
  try:
110
+ # Stage 1: Reranking analysis using the model's rank method.
111
  status_text.markdown("**🔍 Running reranking analysis...**")
112
  progress_bar.progress(0)
113
+ # Use the pipeline's underlying model to rank candidate documents with the given query.
114
+ # Note: We access the model via sentiment_pipe.model.
115
+ results = sentiment_pipe.model.rank(query_input, candidate_docs, return_documents=True, top_k=3)
116
  progress_bar.progress(50)
117
 
118
+ # Stage 2: Generate Report using Gemma, using the reranking result.
119
  status_text.markdown("**📝 Generating report with Gemma...**")
120
  prompt = f"""
121
  Generate a detailed report based on the following analysis.
122
+ Query:
123
+ "{query_input}"
124
+ Candidate Documents:
125
+ {candidate_docs}
126
+ Reranking Analysis Result (Top 3):
127
+ {results}
128
  Please provide a concise summary report explaining the insights derived from this analysis.
129
  """
130
  report = gemma_pipe(prompt, max_length=200)
 
132
  status_text.success("**✅ Generation complete!**")
133
  html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
134
  st.session_state.timer_frozen = True
135
+ st.write("**Reranking Analysis Result:**", results)
136
  st.write("**Generated Report:**", report[0]['generated_text'])
137
  except Exception as e:
138
  html("<script>document.getElementById('timer').remove();</script>")