Spaces:
Running
Running
File size: 6,732 Bytes
9697bd8 091a415 9697bd8 091a415 e09c163 eb3bf86 e09c163 9697bd8 091a415 9697bd8 091a415 e621749 f6969d6 e621749 091a415 9697bd8 1c2b453 1956784 1c2b453 6085c65 1c2b453 1956784 1c2b453 6085c65 1c2b453 a6af339 091a415 f6969d6 091a415 9697bd8 091a415 6085c65 1956784 6085c65 e621749 ce8f3a5 6085c65 1c2b453 262bbea 6085c65 1c2b453 a6af339 70a5298 091a415 e09c163 091a415 9697bd8 091a415 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import gradio as gr
import requests
import time
import pandas as pd
from datetime import datetime
DEFAULT_SYSTEM_PROMPT = """
List of Tools:
- Sitespeed Checker - Check how fast monitored sites are in seconds. To use, type 'check speed'.
- Sitemap Checker - Check if a site has a sitemap. To use, type 'check sitemap'.
- Robots.txt Checker - Check if a site has robots.txt. To use, type 'check bots'.
- Ads Checker - Shows the links of possible paid ads. To use, type the domain without 'https://www'.
"""
vLogDate = datetime.today().strftime('%Y/%m/%d')
websites = [
"https://www.banyantree.com",
"https://www.angsana.com",
"https://www.cassia.com",
"https://www.dhawa.com",
"https://www.garrya.com",
"https://www.hommhotels.com",
"https://www.foliohotels.com",
"https://www.groupbanyan.com",
"https://veya.banyantree.com",
"https://escape.banyantree.com",
"https://essentials.banyantree.com",
"https://gifting.groupbanyan.com",
"https://www.raffles.com",
"https://www.aman.com",
"https://www.rosewoodhotels.com",
"https://www.fourseasons.com",
"https://www.radissonhotels.com",
"https://www.millenniumhotels.com",
"https://www.mandarinoriental.com",
"https://www.ritzcarlton.com",
"https://www.hyatt.com",
"https://www.ihg.com"
]
BTsites = [
"https://www.banyantree.com",
"https://www.angsana.com",
"https://www.cassia.com",
"https://www.dhawa.com",
"https://www.garrya.com",
"https://www.hommhotels.com",
"https://www.foliohotels.com",
"https://www.groupbanyan.com",
"https://veya.banyantree.com",
"https://escape.banyantree.com"
]
def check_robots(url):
try:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
response = requests.head(url, timeout=10)
if response.status_code == 200 or response.status_code == 401 or response.status_code == 302:
return('OK')
else:
return(response.status_code)
except requests.exceptions.RequestException as e:
print(f"Error checking {url}: {e}")
return None
def check_sitemap(url):
try:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
new_url = url +"/sitemap.xml"
response = requests.head(url, timeout=10)
if response.status_code == 200 or response.status_code == 401 or response.status_code == 302:
return('OK')
else:
return(response.status_code)
except requests.exceptions.RequestException as e:
print(f"Error checking {url}: {e}")
return None
def generate_transparency_links(query,history):
"""
Generate links to transparency sites for a given query (e.g., brand name or website).
Args:
query (str): The brand name, website, or advertiser to search for.
Returns:
dict: A dictionary with transparency site names and their URLs.
"""
# Clean the query for URL encoding
import urllib.parse
encoded_query = query.strip()
# Base URLs for transparency sites
transparency_links = {
"Google Ads Transparency Center": f"https://adstransparency.google.com/?q={encoded_query}&domain={encoded_query}®ion=anywhere",
"Facebook Ads Library": f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&q={encoded_query}",
"TikTok Ads Library": f"https://library.tiktok.com/ads?region=all&adv_name={encoded_query}&adv_biz_ids=&query_type=1&sort_type=last_shown_date,desc",
"LinkedIn Ads Library": f"https://www.linkedin.com/ad-library/search?accountOwner={encoded_query}",
# Note: X doesn't have a public ads library, so we use a general search
"X Search for Promoted Content": f"https://x.com/search?q={encoded_query}%20promoted"}
return str(transparency_links)
def check_site_speed(url):
try:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
start_time = time.time()
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
end_time = time.time()
return round(end_time - start_time, 3) # Response time in seconds
except requests.exceptions.RequestException as e:
print(f"Error checking {url}: {e}")
return None
def runchecks(message,history):
results = []
if message == 'check speed' or message == 'Check Speed':
results.append(['Website','Speed in Seconds'])
for site in websites:
print(f"Checking site speed for {site}...")
speed = check_site_speed(site)
time.sleep(5)
if speed is not None:
print(f"Response time for {site}: {speed} seconds")
results.append([site,speed])
df_log = pd.DataFrame(results)
return str(df_log)
elif message == 'check bots' or message == 'Check Bots':
results.append(['Website','Bots Status'])
for site in BTsites:
print(f"Checking robots.txt for {site}...")
botsCheck = check_robots(site)
time.sleep(5)
if botsCheck is not None:
print(f"Bots for {site}: {botsCheck} seconds")
results.append([site,botsCheck])
df_log = pd.DataFrame(results)
return str(df_log)
elif message == 'check sitemap' or message == 'Check Sitemap':
results.append(['Website','Sitemap Status'])
for site in BTsites:
print(f"Checking sitemaps for {site}...")
mapsCheck = check_sitemap(site)
time.sleep(5)
if mapsCheck is not None:
print(f"Bots for {site}: {mapsCheck} seconds")
results.append([site,mapsCheck])
df_log = pd.DataFrame(results)
return str(df_log)
else:
return(generate_transparency_links(message,history))
Conversing = gr.ChatInterface(runchecks, chatbot=gr.Chatbot(height=600,label = "Output"), theme=gr.themes.Monochrome(),
title = 'BG ECommerce Utilities', description = DEFAULT_SYSTEM_PROMPT, css='footer {visibility: hidden}').launch()
#"Algorithm for this site is based on Readability Wiki - https://en.wikipedia.org/wiki/Readability "
if __name__ == "__main__":
Conversing.launch() |