Spaces:
Sleeping
Sleeping
poemsforaphrodite
commited on
Commit
•
302324f
1
Parent(s):
d5343ee
Update app.py
Browse files
app.py
CHANGED
@@ -143,7 +143,7 @@ def get_serp_results(query):
|
|
143 |
def fetch_content(url):
|
144 |
logger.info(f"Fetching content from URL: {url}")
|
145 |
try:
|
146 |
-
response = requests.get(url)
|
147 |
response.raise_for_status()
|
148 |
soup = BeautifulSoup(response.text, 'html.parser')
|
149 |
content = soup.get_text(separator=' ', strip=True)
|
@@ -175,17 +175,28 @@ def analyze_competitors(row, co):
|
|
175 |
competitor_urls = get_serp_results(query)
|
176 |
|
177 |
results = []
|
178 |
-
for url in [our_url] + competitor_urls:
|
179 |
-
|
180 |
-
|
181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
|
183 |
results_df = pd.DataFrame(results).sort_values('relevancy_score', ascending=False)
|
184 |
|
185 |
logger.info(f"Competitor analysis completed. {len(results)} results obtained.")
|
186 |
return results_df
|
187 |
|
188 |
-
|
189 |
def show_competitor_analysis(row, co):
|
190 |
if st.button("Check Competitors", key=f"comp_{row['page']}"):
|
191 |
logger.info(f"Competitor analysis requested for page: {row['page']}")
|
@@ -194,20 +205,27 @@ def show_competitor_analysis(row, co):
|
|
194 |
st.write("Relevancy Score Comparison:")
|
195 |
st.dataframe(results_df)
|
196 |
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
st.
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
212 |
|
213 |
def analyze_competitors(row, co):
|
@@ -320,6 +338,10 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
|
|
320 |
def calculate_relevance_score(page_content, query, co):
|
321 |
logger.info(f"Calculating relevance score for query: {query}")
|
322 |
try:
|
|
|
|
|
|
|
|
|
323 |
page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
|
324 |
query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
|
325 |
score = cosine_similarity([query_embedding], [page_embedding])[0][0]
|
|
|
143 |
def fetch_content(url):
|
144 |
logger.info(f"Fetching content from URL: {url}")
|
145 |
try:
|
146 |
+
response = requests.get(url, timeout=10)
|
147 |
response.raise_for_status()
|
148 |
soup = BeautifulSoup(response.text, 'html.parser')
|
149 |
content = soup.get_text(separator=' ', strip=True)
|
|
|
175 |
competitor_urls = get_serp_results(query)
|
176 |
|
177 |
results = []
|
178 |
+
for url in [our_url] + competitor_urls:
|
179 |
+
try:
|
180 |
+
logger.debug(f"Fetching content for URL: {url}")
|
181 |
+
content = fetch_content(url)
|
182 |
+
if not content:
|
183 |
+
logger.warning(f"No content fetched for URL: {url}")
|
184 |
+
continue
|
185 |
+
|
186 |
+
logger.debug(f"Calculating relevance score for URL: {url}")
|
187 |
+
score = calculate_relevance_score(content, query, co)
|
188 |
+
|
189 |
+
logger.info(f"URL: {url}, Score: {score}")
|
190 |
+
results.append({'url': url, 'relevancy_score': score})
|
191 |
+
except Exception as e:
|
192 |
+
logger.error(f"Error processing URL {url}: {str(e)}")
|
193 |
+
st.error(f"Error processing URL {url}: {str(e)}")
|
194 |
|
195 |
results_df = pd.DataFrame(results).sort_values('relevancy_score', ascending=False)
|
196 |
|
197 |
logger.info(f"Competitor analysis completed. {len(results)} results obtained.")
|
198 |
return results_df
|
199 |
|
|
|
200 |
def show_competitor_analysis(row, co):
|
201 |
if st.button("Check Competitors", key=f"comp_{row['page']}"):
|
202 |
logger.info(f"Competitor analysis requested for page: {row['page']}")
|
|
|
205 |
st.write("Relevancy Score Comparison:")
|
206 |
st.dataframe(results_df)
|
207 |
|
208 |
+
our_data = results_df[results_df['url'] == row['page']]
|
209 |
+
if our_data.empty:
|
210 |
+
st.error(f"Our page '{row['page']}' is not in the results. This indicates an error in fetching or processing the page.")
|
211 |
+
logger.error(f"Our page '{row['page']}' is missing from the results.")
|
212 |
+
else:
|
213 |
+
our_rank = our_data.index[0] + 1
|
214 |
+
total_results = len(results_df)
|
215 |
+
our_score = our_data['relevancy_score'].values[0]
|
216 |
+
|
217 |
+
logger.info(f"Our page ranks {our_rank} out of {total_results} in terms of relevancy score.")
|
218 |
+
st.write(f"Our page ('{row['page']}') ranks {our_rank} out of {total_results} in terms of relevancy score.")
|
219 |
+
st.write(f"Our relevancy score: {our_score:.4f}")
|
220 |
+
|
221 |
+
if our_score == 0:
|
222 |
+
st.warning("Our page's relevancy score is 0. This might indicate an issue with content fetching or score calculation.")
|
223 |
+
elif our_rank == 1:
|
224 |
+
st.success("Your page has the highest relevancy score!")
|
225 |
+
elif our_rank <= 3:
|
226 |
+
st.info("Your page is among the top 3 most relevant results.")
|
227 |
+
elif our_rank > total_results / 2:
|
228 |
+
st.warning("Your page's relevancy score is in the lower half of the results. Consider optimizing your content.")
|
229 |
|
230 |
|
231 |
def analyze_competitors(row, co):
|
|
|
338 |
def calculate_relevance_score(page_content, query, co):
|
339 |
logger.info(f"Calculating relevance score for query: {query}")
|
340 |
try:
|
341 |
+
if not page_content:
|
342 |
+
logger.warning("Empty page content. Returning score 0.")
|
343 |
+
return 0
|
344 |
+
|
345 |
page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
|
346 |
query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
|
347 |
score = cosine_similarity([query_embedding], [page_embedding])[0][0]
|