File size: 6,732 Bytes
9697bd8
091a415
 
 
 
9697bd8
091a415
e09c163
eb3bf86
 
 
e09c163
9697bd8
 
091a415
9697bd8
091a415
 
 
e621749
 
 
 
 
f6969d6
 
 
 
 
e621749
 
 
 
 
 
 
 
 
 
091a415
9697bd8
1c2b453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1956784
1c2b453
 
6085c65
1c2b453
 
 
 
 
 
 
 
 
1956784
1c2b453
 
6085c65
1c2b453
 
 
 
a6af339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
091a415
 
 
 
 
 
 
f6969d6
091a415
 
 
9697bd8
091a415
6085c65
1956784
6085c65
e621749
 
 
 
 
 
 
 
 
ce8f3a5
6085c65
1c2b453
 
 
 
 
 
 
 
 
262bbea
6085c65
1c2b453
 
 
 
 
 
 
 
 
a6af339
70a5298
091a415
e09c163
 
091a415
9697bd8
 
 
091a415
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import gradio as gr
import requests
import time
import pandas as pd
from datetime import datetime

DEFAULT_SYSTEM_PROMPT = """
List of Tools:
- Sitespeed Checker - Check how fast monitored sites are in seconds.  To use, type 'check speed'. 
- Sitemap Checker - Check if a site has a sitemap.  To use, type 'check sitemap'. 
- Robots.txt Checker - Check if a site has robots.txt.  To use, type 'check bots'. 
- Ads Checker - Shows the links of possible paid ads.  To use, type the domain without 'https://www'.
"""

vLogDate = datetime.today().strftime('%Y/%m/%d')

websites = [
        "https://www.banyantree.com",
        "https://www.angsana.com",
        "https://www.cassia.com",
        "https://www.dhawa.com",
        "https://www.garrya.com",
        "https://www.hommhotels.com",
        "https://www.foliohotels.com",
        "https://www.groupbanyan.com",
        "https://veya.banyantree.com",
        "https://escape.banyantree.com",
        "https://essentials.banyantree.com",
        "https://gifting.groupbanyan.com",        
        "https://www.raffles.com",
        "https://www.aman.com",
        "https://www.rosewoodhotels.com",
        "https://www.fourseasons.com",
        "https://www.radissonhotels.com",
        "https://www.millenniumhotels.com",
        "https://www.mandarinoriental.com",
        "https://www.ritzcarlton.com",
        "https://www.hyatt.com",
        "https://www.ihg.com"
]

BTsites = [
        "https://www.banyantree.com",
        "https://www.angsana.com",
        "https://www.cassia.com",
        "https://www.dhawa.com",
        "https://www.garrya.com",
        "https://www.hommhotels.com",
        "https://www.foliohotels.com",
        "https://www.groupbanyan.com",
        "https://veya.banyantree.com",
        "https://escape.banyantree.com"
]

def check_robots(url):
    try:
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}        
        response = requests.head(url, timeout=10)
        if response.status_code == 200 or response.status_code == 401 or response.status_code == 302:
            return('OK')
        else:
            return(response.status_code)
    except requests.exceptions.RequestException as e:
        print(f"Error checking {url}: {e}")
        return None

def check_sitemap(url):
    try:
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}        
        new_url = url +"/sitemap.xml"
        response = requests.head(url, timeout=10)
        if response.status_code == 200 or response.status_code == 401 or response.status_code == 302:
            return('OK')
        else:
            return(response.status_code)
    except requests.exceptions.RequestException as e:
        print(f"Error checking {url}: {e}")
        return None

def generate_transparency_links(query,history):
    """
    Generate links to transparency sites for a given query (e.g., brand name or website).
    Args:
        query (str): The brand name, website, or advertiser to search for.
    Returns:
        dict: A dictionary with transparency site names and their URLs.
    """
    # Clean the query for URL encoding
    import urllib.parse
    encoded_query = query.strip()

    # Base URLs for transparency sites
    transparency_links = {
        "Google Ads Transparency Center": f"https://adstransparency.google.com/?q={encoded_query}&domain={encoded_query}&region=anywhere", 
        "Facebook Ads Library": f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&q={encoded_query}",
        "TikTok Ads Library": f"https://library.tiktok.com/ads?region=all&adv_name={encoded_query}&adv_biz_ids=&query_type=1&sort_type=last_shown_date,desc",  
        "LinkedIn Ads Library": f"https://www.linkedin.com/ad-library/search?accountOwner={encoded_query}", 
        # Note: X doesn't have a public ads library, so we use a general search
        "X Search for Promoted Content": f"https://x.com/search?q={encoded_query}%20promoted"}
    
    return str(transparency_links)

def check_site_speed(url):
    try:
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
        start_time = time.time()
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        end_time = time.time()
        return round(end_time - start_time, 3)  # Response time in seconds
    except requests.exceptions.RequestException as e:
        print(f"Error checking {url}: {e}")
        return None

def runchecks(message,history):
    results = []    
    if message == 'check speed' or message == 'Check Speed':
        results.append(['Website','Speed in Seconds'])
        for site in websites:
            print(f"Checking site speed for {site}...")
            speed = check_site_speed(site)
            time.sleep(5)
            if speed is not None:
                print(f"Response time for {site}: {speed} seconds")
                results.append([site,speed])
        df_log = pd.DataFrame(results)
        return str(df_log)
    elif message == 'check bots' or message == 'Check Bots':
        results.append(['Website','Bots Status'])
        for site in BTsites:
            print(f"Checking robots.txt for {site}...")
            botsCheck = check_robots(site)
            time.sleep(5)
            if botsCheck is not None:
                print(f"Bots for {site}: {botsCheck} seconds")
                results.append([site,botsCheck])
        df_log = pd.DataFrame(results)
        return str(df_log)
    elif message == 'check sitemap' or message == 'Check Sitemap':
        results.append(['Website','Sitemap Status'])
        for site in BTsites:
            print(f"Checking sitemaps for {site}...")
            mapsCheck = check_sitemap(site)
            time.sleep(5)
            if mapsCheck is not None:
                print(f"Bots for {site}: {mapsCheck} seconds")
                results.append([site,mapsCheck])
        df_log = pd.DataFrame(results)
        return str(df_log)
    else:
        return(generate_transparency_links(message,history))
    
Conversing = gr.ChatInterface(runchecks, chatbot=gr.Chatbot(height=600,label = "Output"), theme=gr.themes.Monochrome(),
                              title = 'BG ECommerce Utilities', description = DEFAULT_SYSTEM_PROMPT, css='footer {visibility: hidden}').launch()
#"Algorithm for this site is based on Readability Wiki - https://en.wikipedia.org/wiki/Readability "


if __name__ == "__main__":
    Conversing.launch()