poemsforaphrodite commited on
Commit
66111ac
1 Parent(s): 3a8e960

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -26
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  # Standard library imports
2
  import datetime
3
  import base64
@@ -16,12 +18,16 @@ from sklearn.metrics.pairwise import cosine_similarity
16
  import requests
17
  from bs4 import BeautifulSoup
18
 
 
 
 
19
  load_dotenv()
20
- #test
21
 
22
  # Initialize Cohere client
23
  COHERE_API_KEY = os.environ["COHERE_API_KEY"]
24
  co = cohere.Client(COHERE_API_KEY)
 
25
 
26
  # Configuration: Set to True if running locally, False if running on Streamlit Cloud
27
  IS_LOCAL = False
@@ -50,6 +56,7 @@ def setup_streamlit():
50
  st.set_page_config(page_title="Keyword Relevance Test", layout="wide")
51
  st.title("Keyword Relevance Test Using Vector Embedding")
52
  st.divider()
 
53
 
54
  def init_session_state():
55
  if 'selected_property' not in st.session_state:
@@ -70,47 +77,55 @@ def init_session_state():
70
  st.session_state.custom_start_date = datetime.date.today() - datetime.timedelta(days=7)
71
  if 'custom_end_date' not in st.session_state:
72
  st.session_state.custom_end_date = datetime.date.today()
 
73
 
74
  # -------------
75
  # Data Processing Functions
76
  # -------------
77
 
78
  def fetch_content(url):
 
79
  try:
80
  response = requests.get(url)
81
  response.raise_for_status()
82
  soup = BeautifulSoup(response.text, 'html.parser')
83
  content = soup.get_text(separator=' ', strip=True)
 
84
  return content
85
  except requests.RequestException as e:
 
86
  return str(e)
87
 
88
  def generate_embeddings(text_list, model_type):
 
89
  if not text_list:
 
90
  return []
91
  model = 'embed-english-v3.0' if model_type == 'english' else 'embed-multilingual-v3.0'
92
  input_type = 'search_document'
93
  response = co.embed(model=model, texts=text_list, input_type=input_type)
94
  embeddings = response.embeddings
 
95
  return embeddings
96
 
97
  def calculate_relevancy_scores(df, model_type):
 
98
  try:
99
  page_contents = [fetch_content(url) for url in df['page']]
100
  page_embeddings = generate_embeddings(page_contents, model_type)
101
  query_embeddings = generate_embeddings(df['query'].tolist(), model_type)
102
  relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
103
  df = df.assign(relevancy_score=relevancy_scores)
 
104
  except Exception as e:
 
105
  st.warning(f"Error calculating relevancy scores: {e}")
106
  df = df.assign(relevancy_score=0)
107
  return df
108
 
109
  def process_gsc_data(df):
110
- # Remove the filter for queries below position 10
111
  df_sorted = df.sort_values(['impressions'], ascending=[False])
112
-
113
- # Keep only the highest impression query for each page
114
  df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
115
 
116
  if 'relevancy_score' not in df_unique.columns:
@@ -119,6 +134,7 @@ def process_gsc_data(df):
119
  df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
120
 
121
  result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
 
122
  return result
123
 
124
  # -------------
@@ -126,6 +142,7 @@ def process_gsc_data(df):
126
  # -------------
127
 
128
  def load_config():
 
129
  client_config = {
130
  "web": {
131
  "client_id": os.environ["CLIENT_ID"],
@@ -135,23 +152,29 @@ def load_config():
135
  "redirect_uris": ["https://poemsforaphrodite-gscpro.hf.space/"],
136
  }
137
  }
 
138
  return client_config
139
 
140
  def init_oauth_flow(client_config):
 
141
  scopes = ["https://www.googleapis.com/auth/webmasters.readonly"]
142
  flow = Flow.from_client_config(
143
  client_config,
144
  scopes=scopes,
145
  redirect_uri=client_config["web"]["redirect_uris"][0]
146
  )
 
147
  return flow
148
 
149
  def google_auth(client_config):
 
150
  flow = init_oauth_flow(client_config)
151
  auth_url, _ = flow.authorization_url(prompt="consent")
 
152
  return flow, auth_url
153
 
154
  def auth_search_console(client_config, credentials):
 
155
  token = {
156
  "token": credentials.token,
157
  "refresh_token": credentials.refresh_token,
@@ -161,6 +184,7 @@ def auth_search_console(client_config, credentials):
161
  "scopes": credentials.scopes,
162
  "id_token": getattr(credentials, "id_token", None),
163
  }
 
164
  return searchconsole.authenticate(client_config=client_config, credentials=token)
165
 
166
  # -------------
@@ -168,22 +192,29 @@ def auth_search_console(client_config, credentials):
168
  # -------------
169
 
170
  def list_gsc_properties(credentials):
 
171
  service = build('webmasters', 'v3', credentials=credentials)
172
  site_list = service.sites().list().execute()
173
- return [site['siteUrl'] for site in site_list.get('siteEntry', [])] or ["No properties found"]
 
 
174
 
175
  def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type=None):
 
176
  query = webproperty.query.range(start_date, end_date).search_type(search_type).dimension(*dimensions)
177
  if 'device' in dimensions and device_type and device_type != 'All Devices':
178
  query = query.filter('device', 'equals', device_type.lower())
179
  try:
180
  df = query.limit(MAX_ROWS).get().to_dataframe()
 
181
  return process_gsc_data(df)
182
  except Exception as e:
 
183
  show_error(e)
184
  return pd.DataFrame()
185
 
186
  def calculate_relevancy_scores(df, model_type):
 
187
  with st.spinner('Calculating relevancy scores...'):
188
  try:
189
  page_contents = [fetch_content(url) for url in df['page']]
@@ -191,20 +222,23 @@ def calculate_relevancy_scores(df, model_type):
191
  query_embeddings = generate_embeddings(df['query'].tolist(), model_type)
192
  relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
193
  df = df.assign(relevancy_score=relevancy_scores)
 
194
  except Exception as e:
 
195
  st.warning(f"Error calculating relevancy scores: {e}")
196
  df = df.assign(relevancy_score=0)
197
  return df
198
 
199
-
200
  # -------------
201
  # Utility Functions
202
  # -------------
203
 
204
  def update_dimensions(selected_search_type):
 
205
  return BASE_DIMENSIONS + ['device'] if selected_search_type in SEARCH_TYPES else BASE_DIMENSIONS
206
 
207
  def calc_date_range(selection, custom_start=None, custom_end=None):
 
208
  range_map = {
209
  'Last 7 Days': 7,
210
  'Last 30 Days': 30,
@@ -216,15 +250,21 @@ def calc_date_range(selection, custom_start=None, custom_end=None):
216
  today = datetime.date.today()
217
  if selection == 'Custom Range':
218
  if custom_start and custom_end:
 
219
  return custom_start, custom_end
220
  else:
 
221
  return today - datetime.timedelta(days=7), today
222
- return today - datetime.timedelta(days=range_map.get(selection, 0)), today
 
 
223
 
224
  def show_error(e):
 
225
  st.error(f"An error occurred: {e}")
226
 
227
  def property_change():
 
228
  st.session_state.selected_property = st.session_state['selected_property_selector']
229
 
230
  # -------------
@@ -232,28 +272,33 @@ def property_change():
232
  # -------------
233
 
234
  def show_dataframe(report):
 
235
  with st.expander("Preview the First 100 Rows (Unique Pages with Top Query)"):
236
  st.dataframe(report.head(DF_PREVIEW_ROWS))
237
 
238
  def download_csv_link(report):
 
239
  def to_csv(df):
240
  return df.to_csv(index=False, encoding='utf-8-sig')
241
  csv = to_csv(report)
242
  b64_csv = base64.b64encode(csv.encode()).decode()
243
  href = f'<a href="data:file/csv;base64,{b64_csv}" download="search_console_data.csv">Download CSV File</a>'
244
  st.markdown(href, unsafe_allow_html=True)
 
245
 
246
  # -------------
247
  # Streamlit UI Components
248
  # -------------
249
 
250
  def show_google_sign_in(auth_url):
 
251
  with st.sidebar:
252
  if st.button("Sign in with Google"):
253
  st.write('Please click the link below to sign in:')
254
  st.markdown(f'[Google Sign-In]({auth_url})', unsafe_allow_html=True)
255
 
256
  def show_property_selector(properties, account):
 
257
  selected_property = st.selectbox(
258
  "Select a Search Console Property:",
259
  properties,
@@ -265,6 +310,7 @@ def show_property_selector(properties, account):
265
  return account[selected_property]
266
 
267
  def show_search_type_selector():
 
268
  return st.selectbox(
269
  "Select Search Type:",
270
  SEARCH_TYPES,
@@ -273,6 +319,7 @@ def show_search_type_selector():
273
  )
274
 
275
  def show_model_type_selector():
 
276
  return st.selectbox(
277
  "Select the embedding model:",
278
  ["english", "multilingual"],
@@ -280,6 +327,7 @@ def show_model_type_selector():
280
  )
281
 
282
  def show_date_range_selector():
 
283
  return st.selectbox(
284
  "Select Date Range:",
285
  DATE_RANGE_OPTIONS,
@@ -288,10 +336,12 @@ def show_date_range_selector():
288
  )
289
 
290
  def show_custom_date_inputs():
 
291
  st.session_state.custom_start_date = st.date_input("Start Date", st.session_state.custom_start_date)
292
  st.session_state.custom_end_date = st.date_input("End Date", st.session_state.custom_end_date)
293
 
294
  def show_dimensions_selector(search_type):
 
295
  available_dimensions = update_dimensions(search_type)
296
  return st.multiselect(
297
  "Select Dimensions:",
@@ -301,57 +351,50 @@ def show_dimensions_selector(search_type):
301
  )
302
 
303
  def show_paginated_dataframe(report, rows_per_page=20):
304
- # Convert 'position' column to integer and 'impressions' to numeric
305
  report['position'] = report['position'].astype(int)
306
  report['impressions'] = pd.to_numeric(report['impressions'], errors='coerce')
307
 
308
- # Format CTR as percentage and relevancy_score with two decimal places
309
  def format_ctr(x):
310
  try:
311
  return f"{float(x):.2%}"
312
  except ValueError:
313
- return x # Return the original value if it can't be converted to float
314
 
315
  def format_relevancy_score(x):
316
  try:
317
  return f"{float(x):.2f}"
318
  except ValueError:
319
- return x # Return the original value if it can't be converted to float
320
 
321
  report['ctr'] = report['ctr'].apply(format_ctr)
322
  report['relevancy_score'] = report['relevancy_score'].apply(format_relevancy_score)
323
 
324
- # Create a clickable URL column
325
  def make_clickable(url):
326
  return f'<a href="{url}" target="_blank">{url}</a>'
327
 
328
  report['clickable_url'] = report['page'].apply(make_clickable)
329
 
330
- # Reorder columns to put clickable_url first
331
  columns = ['clickable_url', 'query', 'impressions', 'clicks', 'ctr', 'position', 'relevancy_score']
332
  report = report[columns]
333
 
334
- # Add sorting functionality
335
- sort_column = st.selectbox("Sort by:", columns[1:], index=columns[1:].index('impressions')) # Set 'impressions' as default
336
  sort_order = st.radio("Sort order:", ("Descending", "Ascending"))
337
 
338
  ascending = sort_order == "Ascending"
339
 
340
- # Convert back to numeric for sorting
341
  def safe_float_convert(x):
342
  try:
343
  return float(x.rstrip('%')) / 100 if isinstance(x, str) and x.endswith('%') else float(x)
344
  except ValueError:
345
- return 0 # Return 0 or another default value if conversion fails
346
 
347
  report['ctr_numeric'] = report['ctr'].apply(safe_float_convert)
348
  report['relevancy_score_numeric'] = report['relevancy_score'].apply(safe_float_convert)
349
 
350
- # Sort using the numeric columns
351
  sort_column_numeric = sort_column + '_numeric' if sort_column in ['ctr', 'relevancy_score'] else sort_column
352
  report = report.sort_values(by=sort_column_numeric, ascending=ascending)
353
 
354
- # Remove the temporary numeric columns
355
  report = report.drop(columns=['ctr_numeric', 'relevancy_score_numeric'])
356
 
357
  total_rows = len(report)
@@ -373,26 +416,23 @@ def show_paginated_dataframe(report, rows_per_page=20):
373
  start_idx = (st.session_state.current_page - 1) * rows_per_page
374
  end_idx = start_idx + rows_per_page
375
 
376
- # Use st.markdown to display the dataframe with clickable links
377
  st.markdown(report.iloc[start_idx:end_idx].to_html(escape=False, index=False), unsafe_allow_html=True)
 
378
  # -------------
379
  # Main Streamlit App Function
380
  # -------------
381
 
382
  def main():
 
383
  setup_streamlit()
384
  client_config = load_config()
385
 
386
  if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
387
  st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
388
 
389
- # Directly access query parameters using st.query_params
390
  query_params = st.query_params
391
-
392
- # Retrieve the 'code' parameter
393
  auth_code = query_params.get("code", None)
394
 
395
-
396
  if auth_code and 'credentials' not in st.session_state:
397
  st.session_state.auth_flow.fetch_token(code=auth_code)
398
  st.session_state.credentials = st.session_state.auth_flow.credentials
@@ -408,7 +448,7 @@ def main():
408
  webproperty = show_property_selector(properties, account)
409
  search_type = show_search_type_selector()
410
  date_range_selection = show_date_range_selector()
411
- model_type = show_model_type_selector() # Add this line
412
  if date_range_selection == 'Custom Range':
413
  show_custom_date_inputs()
414
  start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
@@ -433,7 +473,8 @@ def main():
433
  download_csv_link(st.session_state.report_data)
434
  elif st.session_state.report_data is not None:
435
  st.warning("No data found for the selected criteria.")
 
436
 
437
-
438
  if __name__ == "__main__":
 
439
  main()
 
1
+ import logging
2
+
3
  # Standard library imports
4
  import datetime
5
  import base64
 
18
  import requests
19
  from bs4 import BeautifulSoup
20
 
21
+ # Configure logging
22
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
23
+
24
  load_dotenv()
25
+ logging.info("Environment variables loaded")
26
 
27
  # Initialize Cohere client
28
  COHERE_API_KEY = os.environ["COHERE_API_KEY"]
29
  co = cohere.Client(COHERE_API_KEY)
30
+ logging.info("Cohere client initialized")
31
 
32
  # Configuration: Set to True if running locally, False if running on Streamlit Cloud
33
  IS_LOCAL = False
 
56
  st.set_page_config(page_title="Keyword Relevance Test", layout="wide")
57
  st.title("Keyword Relevance Test Using Vector Embedding")
58
  st.divider()
59
+ logging.info("Streamlit app configured")
60
 
61
  def init_session_state():
62
  if 'selected_property' not in st.session_state:
 
77
  st.session_state.custom_start_date = datetime.date.today() - datetime.timedelta(days=7)
78
  if 'custom_end_date' not in st.session_state:
79
  st.session_state.custom_end_date = datetime.date.today()
80
+ logging.info("Session state initialized")
81
 
82
  # -------------
83
  # Data Processing Functions
84
  # -------------
85
 
86
  def fetch_content(url):
87
+ logging.debug(f"Fetching content from URL: {url}")
88
  try:
89
  response = requests.get(url)
90
  response.raise_for_status()
91
  soup = BeautifulSoup(response.text, 'html.parser')
92
  content = soup.get_text(separator=' ', strip=True)
93
+ logging.debug(f"Content fetched successfully from URL: {url}")
94
  return content
95
  except requests.RequestException as e:
96
+ logging.error(f"Error fetching content from URL: {url} - {e}")
97
  return str(e)
98
 
99
  def generate_embeddings(text_list, model_type):
100
+ logging.debug(f"Generating embeddings for model type: {model_type}")
101
  if not text_list:
102
+ logging.warning("Text list is empty, returning empty embeddings")
103
  return []
104
  model = 'embed-english-v3.0' if model_type == 'english' else 'embed-multilingual-v3.0'
105
  input_type = 'search_document'
106
  response = co.embed(model=model, texts=text_list, input_type=input_type)
107
  embeddings = response.embeddings
108
+ logging.debug(f"Embeddings generated successfully for model type: {model_type}")
109
  return embeddings
110
 
111
  def calculate_relevancy_scores(df, model_type):
112
+ logging.info("Calculating relevancy scores")
113
  try:
114
  page_contents = [fetch_content(url) for url in df['page']]
115
  page_embeddings = generate_embeddings(page_contents, model_type)
116
  query_embeddings = generate_embeddings(df['query'].tolist(), model_type)
117
  relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
118
  df = df.assign(relevancy_score=relevancy_scores)
119
+ logging.info("Relevancy scores calculated successfully")
120
  except Exception as e:
121
+ logging.error(f"Error calculating relevancy scores: {e}")
122
  st.warning(f"Error calculating relevancy scores: {e}")
123
  df = df.assign(relevancy_score=0)
124
  return df
125
 
126
  def process_gsc_data(df):
127
+ logging.info("Processing GSC data")
128
  df_sorted = df.sort_values(['impressions'], ascending=[False])
 
 
129
  df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
130
 
131
  if 'relevancy_score' not in df_unique.columns:
 
134
  df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
135
 
136
  result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
137
+ logging.info("GSC data processed successfully")
138
  return result
139
 
140
  # -------------
 
142
  # -------------
143
 
144
  def load_config():
145
+ logging.info("Loading Google client configuration")
146
  client_config = {
147
  "web": {
148
  "client_id": os.environ["CLIENT_ID"],
 
152
  "redirect_uris": ["https://poemsforaphrodite-gscpro.hf.space/"],
153
  }
154
  }
155
+ logging.info("Google client configuration loaded")
156
  return client_config
157
 
158
  def init_oauth_flow(client_config):
159
+ logging.info("Initializing OAuth flow")
160
  scopes = ["https://www.googleapis.com/auth/webmasters.readonly"]
161
  flow = Flow.from_client_config(
162
  client_config,
163
  scopes=scopes,
164
  redirect_uri=client_config["web"]["redirect_uris"][0]
165
  )
166
+ logging.info("OAuth flow initialized")
167
  return flow
168
 
169
  def google_auth(client_config):
170
+ logging.info("Starting Google authentication")
171
  flow = init_oauth_flow(client_config)
172
  auth_url, _ = flow.authorization_url(prompt="consent")
173
+ logging.info("Google authentication URL generated")
174
  return flow, auth_url
175
 
176
  def auth_search_console(client_config, credentials):
177
+ logging.info("Authenticating with Google Search Console")
178
  token = {
179
  "token": credentials.token,
180
  "refresh_token": credentials.refresh_token,
 
184
  "scopes": credentials.scopes,
185
  "id_token": getattr(credentials, "id_token", None),
186
  }
187
+ logging.info("Google Search Console authenticated")
188
  return searchconsole.authenticate(client_config=client_config, credentials=token)
189
 
190
  # -------------
 
192
  # -------------
193
 
194
  def list_gsc_properties(credentials):
195
+ logging.info("Listing GSC properties")
196
  service = build('webmasters', 'v3', credentials=credentials)
197
  site_list = service.sites().list().execute()
198
+ properties = [site['siteUrl'] for site in site_list.get('siteEntry', [])] or ["No properties found"]
199
+ logging.info(f"GSC properties listed: {properties}")
200
+ return properties
201
 
202
  def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type=None):
203
+ logging.info(f"Fetching GSC data for property: {webproperty}, search_type: {search_type}, date_range: {start_date} to {end_date}, dimensions: {dimensions}, device_type: {device_type}")
204
  query = webproperty.query.range(start_date, end_date).search_type(search_type).dimension(*dimensions)
205
  if 'device' in dimensions and device_type and device_type != 'All Devices':
206
  query = query.filter('device', 'equals', device_type.lower())
207
  try:
208
  df = query.limit(MAX_ROWS).get().to_dataframe()
209
+ logging.info("GSC data fetched successfully")
210
  return process_gsc_data(df)
211
  except Exception as e:
212
+ logging.error(f"Error fetching GSC data: {e}")
213
  show_error(e)
214
  return pd.DataFrame()
215
 
216
  def calculate_relevancy_scores(df, model_type):
217
+ logging.info("Calculating relevancy scores")
218
  with st.spinner('Calculating relevancy scores...'):
219
  try:
220
  page_contents = [fetch_content(url) for url in df['page']]
 
222
  query_embeddings = generate_embeddings(df['query'].tolist(), model_type)
223
  relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
224
  df = df.assign(relevancy_score=relevancy_scores)
225
+ logging.info("Relevancy scores calculated successfully")
226
  except Exception as e:
227
+ logging.error(f"Error calculating relevancy scores: {e}")
228
  st.warning(f"Error calculating relevancy scores: {e}")
229
  df = df.assign(relevancy_score=0)
230
  return df
231
 
 
232
  # -------------
233
  # Utility Functions
234
  # -------------
235
 
236
  def update_dimensions(selected_search_type):
237
+ logging.debug(f"Updating dimensions for search type: {selected_search_type}")
238
  return BASE_DIMENSIONS + ['device'] if selected_search_type in SEARCH_TYPES else BASE_DIMENSIONS
239
 
240
  def calc_date_range(selection, custom_start=None, custom_end=None):
241
+ logging.debug(f"Calculating date range for selection: {selection}")
242
  range_map = {
243
  'Last 7 Days': 7,
244
  'Last 30 Days': 30,
 
250
  today = datetime.date.today()
251
  if selection == 'Custom Range':
252
  if custom_start and custom_end:
253
+ logging.debug(f"Custom date range: {custom_start} to {custom_end}")
254
  return custom_start, custom_end
255
  else:
256
+ logging.debug("Defaulting custom date range to last 7 days")
257
  return today - datetime.timedelta(days=7), today
258
+ date_range = today - datetime.timedelta(days=range_map.get(selection, 0)), today
259
+ logging.debug(f"Date range calculated: {date_range}")
260
+ return date_range
261
 
262
  def show_error(e):
263
+ logging.error(f"An error occurred: {e}")
264
  st.error(f"An error occurred: {e}")
265
 
266
  def property_change():
267
+ logging.info(f"Property changed to: {st.session_state['selected_property_selector']}")
268
  st.session_state.selected_property = st.session_state['selected_property_selector']
269
 
270
  # -------------
 
272
  # -------------
273
 
274
  def show_dataframe(report):
275
+ logging.info("Showing dataframe preview")
276
  with st.expander("Preview the First 100 Rows (Unique Pages with Top Query)"):
277
  st.dataframe(report.head(DF_PREVIEW_ROWS))
278
 
279
  def download_csv_link(report):
280
+ logging.info("Generating CSV download link")
281
  def to_csv(df):
282
  return df.to_csv(index=False, encoding='utf-8-sig')
283
  csv = to_csv(report)
284
  b64_csv = base64.b64encode(csv.encode()).decode()
285
  href = f'<a href="data:file/csv;base64,{b64_csv}" download="search_console_data.csv">Download CSV File</a>'
286
  st.markdown(href, unsafe_allow_html=True)
287
+ logging.info("CSV download link generated")
288
 
289
  # -------------
290
  # Streamlit UI Components
291
  # -------------
292
 
293
  def show_google_sign_in(auth_url):
294
+ logging.info("Showing Google sign-in button")
295
  with st.sidebar:
296
  if st.button("Sign in with Google"):
297
  st.write('Please click the link below to sign in:')
298
  st.markdown(f'[Google Sign-In]({auth_url})', unsafe_allow_html=True)
299
 
300
  def show_property_selector(properties, account):
301
+ logging.info("Showing property selector")
302
  selected_property = st.selectbox(
303
  "Select a Search Console Property:",
304
  properties,
 
310
  return account[selected_property]
311
 
312
  def show_search_type_selector():
313
+ logging.info("Showing search type selector")
314
  return st.selectbox(
315
  "Select Search Type:",
316
  SEARCH_TYPES,
 
319
  )
320
 
321
  def show_model_type_selector():
322
+ logging.info("Showing model type selector")
323
  return st.selectbox(
324
  "Select the embedding model:",
325
  ["english", "multilingual"],
 
327
  )
328
 
329
  def show_date_range_selector():
330
+ logging.info("Showing date range selector")
331
  return st.selectbox(
332
  "Select Date Range:",
333
  DATE_RANGE_OPTIONS,
 
336
  )
337
 
338
  def show_custom_date_inputs():
339
+ logging.info("Showing custom date inputs")
340
  st.session_state.custom_start_date = st.date_input("Start Date", st.session_state.custom_start_date)
341
  st.session_state.custom_end_date = st.date_input("End Date", st.session_state.custom_end_date)
342
 
343
  def show_dimensions_selector(search_type):
344
+ logging.info("Showing dimensions selector")
345
  available_dimensions = update_dimensions(search_type)
346
  return st.multiselect(
347
  "Select Dimensions:",
 
351
  )
352
 
353
  def show_paginated_dataframe(report, rows_per_page=20):
354
+ logging.info("Showing paginated dataframe")
355
  report['position'] = report['position'].astype(int)
356
  report['impressions'] = pd.to_numeric(report['impressions'], errors='coerce')
357
 
 
358
  def format_ctr(x):
359
  try:
360
  return f"{float(x):.2%}"
361
  except ValueError:
362
+ return x
363
 
364
  def format_relevancy_score(x):
365
  try:
366
  return f"{float(x):.2f}"
367
  except ValueError:
368
+ return x
369
 
370
  report['ctr'] = report['ctr'].apply(format_ctr)
371
  report['relevancy_score'] = report['relevancy_score'].apply(format_relevancy_score)
372
 
 
373
  def make_clickable(url):
374
  return f'<a href="{url}" target="_blank">{url}</a>'
375
 
376
  report['clickable_url'] = report['page'].apply(make_clickable)
377
 
 
378
  columns = ['clickable_url', 'query', 'impressions', 'clicks', 'ctr', 'position', 'relevancy_score']
379
  report = report[columns]
380
 
381
+ sort_column = st.selectbox("Sort by:", columns[1:], index=columns[1:].index('impressions'))
 
382
  sort_order = st.radio("Sort order:", ("Descending", "Ascending"))
383
 
384
  ascending = sort_order == "Ascending"
385
 
 
386
  def safe_float_convert(x):
387
  try:
388
  return float(x.rstrip('%')) / 100 if isinstance(x, str) and x.endswith('%') else float(x)
389
  except ValueError:
390
+ return 0
391
 
392
  report['ctr_numeric'] = report['ctr'].apply(safe_float_convert)
393
  report['relevancy_score_numeric'] = report['relevancy_score'].apply(safe_float_convert)
394
 
 
395
  sort_column_numeric = sort_column + '_numeric' if sort_column in ['ctr', 'relevancy_score'] else sort_column
396
  report = report.sort_values(by=sort_column_numeric, ascending=ascending)
397
 
 
398
  report = report.drop(columns=['ctr_numeric', 'relevancy_score_numeric'])
399
 
400
  total_rows = len(report)
 
416
  start_idx = (st.session_state.current_page - 1) * rows_per_page
417
  end_idx = start_idx + rows_per_page
418
 
 
419
  st.markdown(report.iloc[start_idx:end_idx].to_html(escape=False, index=False), unsafe_allow_html=True)
420
+
421
  # -------------
422
  # Main Streamlit App Function
423
  # -------------
424
 
425
  def main():
426
+ logging.info("Starting main function")
427
  setup_streamlit()
428
  client_config = load_config()
429
 
430
  if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
431
  st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
432
 
 
433
  query_params = st.query_params
 
 
434
  auth_code = query_params.get("code", None)
435
 
 
436
  if auth_code and 'credentials' not in st.session_state:
437
  st.session_state.auth_flow.fetch_token(code=auth_code)
438
  st.session_state.credentials = st.session_state.auth_flow.credentials
 
448
  webproperty = show_property_selector(properties, account)
449
  search_type = show_search_type_selector()
450
  date_range_selection = show_date_range_selector()
451
+ model_type = show_model_type_selector()
452
  if date_range_selection == 'Custom Range':
453
  show_custom_date_inputs()
454
  start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
 
473
  download_csv_link(st.session_state.report_data)
474
  elif st.session_state.report_data is not None:
475
  st.warning("No data found for the selected criteria.")
476
+ logging.warning("No data found for the selected criteria")
477
 
 
478
  if __name__ == "__main__":
479
+ logging.info("Running main function")
480
  main()