frankai98 commited on
Commit
9c4bbfa
Β·
verified Β·
1 Parent(s): 219393b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -43
app.py CHANGED
@@ -6,7 +6,6 @@ from transformers import pipeline
6
  from huggingface_hub import login
7
  from streamlit.components.v1 import html
8
  import pandas as pd
9
- import re
10
 
11
  # Retrieve the token from environment variables
12
  hf_token = os.environ.get("HF_TOKEN")
@@ -17,9 +16,7 @@ if not hf_token:
17
  # Login with the token
18
  login(token=hf_token)
19
 
20
- # Initialize session state for timer and results
21
- if 'result' not in st.session_state:
22
- st.session_state.result = {}
23
  if 'timer_started' not in st.session_state:
24
  st.session_state.timer_started = False
25
  if 'timer_frozen' not in st.session_state:
@@ -51,55 +48,44 @@ def timer():
51
  </script>
52
  """
53
 
54
- st.set_page_config(page_title="Sentiment & Report Generator", page_icon="πŸ“")
55
- st.header("Sentiment Analysis & Report Generation with Gemma")
56
 
57
- # Introduction for the Hugging Face interface
58
- st.write("""
59
- Welcome to the Sentiment Analysis & Report Generator app!
60
- This tool leverages Hugging Face’s models to analyze your text by scoring candidate documents based on a query.
61
- The input along with their scores is then used to generate a detailed report explaining key insights.
62
- You can either paste your query text directly into the text area and optionally upload a CSV file containing candidate documents.
63
- If no CSV is provided, the query text will be split into sentences to serve as candidate documents.
64
- """)
65
 
66
  # Load models with caching to avoid reloading on every run
67
  @st.cache_resource
68
  def load_models():
69
- # Load the text-classification pipeline (acting as our scoring model).
70
- sentiment_pipe = pipeline("text-classification", model="mixedbread-ai/mxbai-rerank-base-v1")
71
  # Load the Gemma text generation pipeline.
72
- gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it")
73
- return sentiment_pipe, gemma_pipe
74
 
75
- sentiment_pipe, gemma_pipe = load_models()
76
 
77
- # Input: Query text and file upload for candidate documents
78
- query_input = st.text_area("Enter your query text for analysis:")
79
- uploaded_file = st.file_uploader("Upload Candidate Documents (CSV format)", type=["csv"])
80
 
81
- # Prepare candidate documents
82
- candidate_docs = []
83
  if uploaded_file is not None:
84
  try:
85
  df = pd.read_csv(uploaded_file)
86
- # Try to use a column named 'document'; if not present, take the first column.
87
- if 'document' in df.columns:
88
- candidate_docs = df['document'].dropna().astype(str).tolist()
89
- else:
90
- candidate_docs = df.iloc[:, 0].dropna().astype(str).tolist()
91
  except Exception as e:
92
  st.error(f"Error reading CSV file: {e}")
 
93
  else:
94
- # If no CSV uploaded, split the query text into sentences as candidate documents.
95
- if query_input.strip():
96
- candidate_docs = re.split(r'(?<=[.!?])\s+', query_input.strip())
97
 
98
  if st.button("Generate Report"):
99
  if not query_input.strip():
100
  st.error("Please enter a query text!")
101
- elif not candidate_docs:
102
- st.error("No candidate documents available. Please enter text or upload a CSV file.")
103
  else:
104
  if not st.session_state.timer_started and not st.session_state.timer_frozen:
105
  st.session_state.timer_started = True
@@ -107,38 +93,36 @@ if st.button("Generate Report"):
107
  status_text = st.empty()
108
  progress_bar = st.progress(0)
109
  try:
110
- # Stage 1: Score candidate documents without reranking.
111
  status_text.markdown("**πŸ” Scoring candidate documents...**")
112
  progress_bar.progress(0)
113
 
114
- # Create query-document pairs and score each pair.
115
  scored_docs = []
116
  for doc in candidate_docs:
117
  combined_text = f"Query: {query_input} Document: {doc}"
118
- result = sentiment_pipe(combined_text)[0]
119
- # Append the document along with its score.
120
  scored_docs.append((doc, result["score"]))
121
 
122
  progress_bar.progress(50)
123
 
124
- # Stage 2: Generate Report using Gemma, using the scored candidate documents.
125
  status_text.markdown("**πŸ“ Generating report with Gemma...**")
126
  prompt = f"""
127
  Generate a detailed report based on the following analysis.
128
  Query:
129
  "{query_input}"
130
- Candidate Documents with their scores:
131
  {scored_docs}
132
- Please provide a concise summary report explaining the insights derived from this analysis.
133
  """
134
  report = gemma_pipe(prompt, max_length=200)
135
  progress_bar.progress(100)
136
  status_text.success("**βœ… Generation complete!**")
137
  html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
138
  st.session_state.timer_frozen = True
139
- st.write("**Scored Candidate Documents:**", scored_docs)
140
  st.write("**Generated Report:**", report[0]['generated_text'])
141
  except Exception as e:
142
  html("<script>document.getElementById('timer').remove();</script>")
143
  status_text.error(f"**❌ Error:** {str(e)}")
144
- progress_bar.empty()
 
6
  from huggingface_hub import login
7
  from streamlit.components.v1 import html
8
  import pandas as pd
 
9
 
10
  # Retrieve the token from environment variables
11
  hf_token = os.environ.get("HF_TOKEN")
 
16
  # Login with the token
17
  login(token=hf_token)
18
 
19
+ # Initialize session state for timer
 
 
20
  if 'timer_started' not in st.session_state:
21
  st.session_state.timer_started = False
22
  if 'timer_frozen' not in st.session_state:
 
48
  </script>
49
  """
50
 
51
+ st.set_page_config(page_title="Review Scorer & Report Generator", page_icon="πŸ“")
52
+ st.header("Review Scorer & Report Generator")
53
 
54
+ # Concise introduction
55
+ st.write("This model will score your reviews in your CSV file and generate a report based on those results.")
 
 
 
 
 
 
56
 
57
  # Load models with caching to avoid reloading on every run
58
  @st.cache_resource
59
  def load_models():
60
+ # Load the scoring model via pipeline.
61
+ score_pipe = pipeline("text-classification", model="mixedbread-ai/mxbai-rerank-base-v1")
62
  # Load the Gemma text generation pipeline.
63
+ gemma_pipe = pipeline("text-generation", model="google/gemma-3-1b-it", use_auth_token=hf_token)
64
+ return score_pipe, gemma_pipe
65
 
66
+ score_pipe, gemma_pipe = load_models()
67
 
68
+ # Input: Query text for scoring and CSV file upload for candidate reviews
69
+ query_input = st.text_area("Enter your query text for analysis (this does not need to be part of the CSV):")
70
+ uploaded_file = st.file_uploader("Upload Reviews CSV File (must contain a 'document' column)", type=["csv"])
71
 
 
 
72
  if uploaded_file is not None:
73
  try:
74
  df = pd.read_csv(uploaded_file)
75
+ if 'document' not in df.columns:
76
+ st.error("CSV must contain a 'document' column.")
77
+ st.stop()
78
+ candidate_docs = df['document'].dropna().astype(str).tolist()
 
79
  except Exception as e:
80
  st.error(f"Error reading CSV file: {e}")
81
+ st.stop()
82
  else:
83
+ st.error("Please upload a CSV file.")
84
+ st.stop()
 
85
 
86
  if st.button("Generate Report"):
87
  if not query_input.strip():
88
  st.error("Please enter a query text!")
 
 
89
  else:
90
  if not st.session_state.timer_started and not st.session_state.timer_frozen:
91
  st.session_state.timer_started = True
 
93
  status_text = st.empty()
94
  progress_bar = st.progress(0)
95
  try:
96
+ # Stage 1: Score candidate documents using the provided query.
97
  status_text.markdown("**πŸ” Scoring candidate documents...**")
98
  progress_bar.progress(0)
99
 
 
100
  scored_docs = []
101
  for doc in candidate_docs:
102
  combined_text = f"Query: {query_input} Document: {doc}"
103
+ result = score_pipe(combined_text)[0]
 
104
  scored_docs.append((doc, result["score"]))
105
 
106
  progress_bar.progress(50)
107
 
108
+ # Stage 2: Generate Report using Gemma, including query and scored results.
109
  status_text.markdown("**πŸ“ Generating report with Gemma...**")
110
  prompt = f"""
111
  Generate a detailed report based on the following analysis.
112
  Query:
113
  "{query_input}"
114
+ Candidate Reviews with their scores:
115
  {scored_docs}
116
+ Please provide a concise summary report explaining the insights derived from these scores.
117
  """
118
  report = gemma_pipe(prompt, max_length=200)
119
  progress_bar.progress(100)
120
  status_text.success("**βœ… Generation complete!**")
121
  html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
122
  st.session_state.timer_frozen = True
123
+ st.write("**Scored Candidate Reviews:**", scored_docs)
124
  st.write("**Generated Report:**", report[0]['generated_text'])
125
  except Exception as e:
126
  html("<script>document.getElementById('timer').remove();</script>")
127
  status_text.error(f"**❌ Error:** {str(e)}")
128
+ progress_bar.empty()