menorki commited on
Commit
979df33
·
1 Parent(s): fadb44e
Files changed (1) hide show
  1. app.py +230 -5
app.py CHANGED
@@ -1,7 +1,232 @@
1
- import gradio as gr
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ NOTE 1: Start Command starting a FastAPI on render:
4
+ @see https://community.render.com/t/fastapi-python-web-service-deploying-for-hours/6662
5
+ uvicorn app:app --host 0.0.0.0 --port 10000
6
 
 
 
7
 
8
+ """
9
+
10
+ import os , sys
11
+ import datetime , requests , random , logging , time , timeit
12
+ import simplejson as json
13
+ from fastapi import FastAPI
14
+ from fastapi.responses import PlainTextResponse , HTMLResponse , Response , JSONResponse
15
+ # from fastapi import Request
16
+ from starlette.requests import Request
17
+
18
+ from bs4 import BeautifulSoup
19
+ from furl import furl
20
+ # from apscheduler.schedulers.blocking import BlockingScheduler
21
+ # from apscheduler.schedulers.background import BackgroundScheduler
22
+ from pymongo import MongoClient
23
+ import fire
24
+ import socket
25
+ import requests
26
+
27
+ from apscheduler.schedulers.background import BackgroundScheduler
28
+
29
+ HOSTNAME = socket.gethostname()
30
+
31
+ USER_AGENTS = [
32
+ "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:63.0) Gecko/20100101 Firefox/63.0",
33
+ "Mozilla/5.0 (X11; Linux i686; rv:64.0) Gecko/20100101 Firefox/64.0",
34
+ "Mozilla/5.0 (X11; Linux; rv:74.0) Gecko/20100101 Firefox/74.0",
35
+ "Mozilla/5.0 (X11; Linux ppc64le; rv:75.0) Gecko/20100101 Firefox/75.0",
36
+ "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:77.0) Gecko/20190101 Firefox/77.0"
37
+ ]
38
+
39
+ BOT_AGENTS = [
40
+ "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
41
+ "Googlebot/2.1 (+http://www.googlebot.com/bot.html)",
42
+ "Mozilla/5.0 (compatible; bingbot/2.0 +http://www.bing.com/bingbot.htm)",
43
+ "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
44
+ "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
45
+ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
46
+ "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)"
47
+ ]
48
+
49
+ # MONGODB-ATLAS SETUP
50
+ MONGOATLAS_URI = os.environ.get('MONGOATLAS_URI') or None
51
+
52
+ ##############################################################################
53
+ #
54
+ # LOGGING
55
+ #
56
+ ##############################################################################
57
+
58
+ logging.basicConfig(level=logging.INFO , format='%(message)s')
59
+ logging.getLogger("requests").setLevel(logging.ERROR)
60
+
61
+ logger = logging.getLogger(__name__)
62
+ logger.setLevel(logging.DEBUG)
63
+ logger.propagate=False
64
+
65
+ console_logger = logging.StreamHandler()
66
+ console_logger.setLevel(logging.DEBUG)
67
+ console_logger.setFormatter(logging.Formatter('%(message)s'))
68
+
69
+ logger.addHandler(console_logger)
70
+
71
+ if not MONGOATLAS_URI:
72
+ logger.warning('Coulf not read the database URI')
73
+
74
+ # Disable urllib3 warnings (sent by requests)
75
+ # requests.packages.urllib3.disable_warnings()
76
+
77
+ app = FastAPI()
78
+ #app.config.from_pyfile('flaskapp.cfg')
79
+
80
+ port = 5000
81
+ scheduler = None
82
+ proxies = {}
83
+ # local_ip = socket.gethostbyname(hostname)
84
+ if HOSTNAME == 'OCTOCORE':
85
+ #proxies = {'http': 'http://192.168.1.68:80', 'https': 'http://192.168.1.68:80'}
86
+ proxies = {'http': 'https://anonyland:[email protected]:8080', 'https': 'http://anonyland:[email protected]:8080'}
87
+ proxy_ip = '192.168.1.43:80'
88
+
89
+ @app.get('/')
90
+ def index():
91
+ #return render_template('index.html')
92
+ logger.info(f'hostname: {HOSTNAME}')
93
+ return PlainTextResponse('OK' , 200)
94
+
95
+ @app.get('/ping')
96
+ def index():
97
+ return Response(status_code=200)
98
+
99
+ @app.get("/remote_ip")
100
+ def remote_ip(request:Request):
101
+ client_host = request.client.host
102
+ return PlainTextResponse(client_host , 200)
103
+
104
+ @app.get("/task/faa_scrap_sold_listings_featured")
105
+ def faa_scrap_sold_listings_featured_local():
106
+
107
+ global proxies
108
+
109
+ timeit_request = 0
110
+ timeit_parsing = 0
111
+ timeit_mongo = 0
112
+
113
+ response_body = '?'
114
+
115
+ if not MONGOATLAS_URI:
116
+ return PlainTextResponse("ERROR: MONGOATLAS_URI is undefined" , status_code=500)
117
+
118
+ cnt_dbs = 4
119
+
120
+ headers = {
121
+ 'User-Agent': random.choice(USER_AGENTS)
122
+ }
123
+
124
+ site_url = "https://fineartamerica.com/recentprintsales.html?rand={}".format(random.randint(1000,1000000))
125
+ r=None
126
+
127
+ try:
128
+ start = time.time()
129
+ r = requests.get(site_url , proxies=proxies , timeout=30 , verify=False , headers=headers)
130
+ timeit_request = time.time()-start
131
+ except Exception as e:
132
+ response_body = str(e)
133
+
134
+ if r and r.status_code==200:
135
+
136
+ try:
137
+
138
+ start = time.time()
139
+ listings = parse_faa_sold_listings_page(r.text)
140
+ timeit_parsing = time.time() - start
141
+
142
+ d = dict()
143
+ d['date_utc'] = datetime.datetime.utcnow()
144
+ d['results'] = listings
145
+ d['processed']= False
146
+
147
+ status = "ok"
148
+
149
+ db_name = 'faa_scrap_' + str(random.randint(1,cnt_dbs))
150
+ col_name = 'faa_sl'
151
+
152
+ mongo_client = None
153
+ try:
154
+ start = time.time()
155
+ mongo_client = MongoClient(MONGOATLAS_URI)
156
+ db = mongo_client[db_name]
157
+ col = db[col_name]
158
+ r = col.insert_one(d)
159
+ timeit_mongo = time.time() - start
160
+ except Exception as e:
161
+ status = "error saving to mongodb ({})".format(str(e))
162
+ logging.error(status)
163
+ finally:
164
+ try:
165
+ mongo_client.close()
166
+ except Exception:
167
+ pass
168
+
169
+
170
+ o = dict()
171
+ o['site']="faa"
172
+ o['status']=status
173
+ o['date'] = d['date_utc']
174
+ o['results_count'] = len(listings)
175
+ o['db_name'] = db_name
176
+ o['timeit'] = {'request':timeit_request,
177
+ 'parsing':timeit_parsing,
178
+ 'db':timeit_mongo}
179
+ # o['proxy'] = json.dumps(proxies)
180
+
181
+ response_body = str(o)
182
+
183
+ except Exception as e:
184
+ response_body = str(e)
185
+
186
+ return PlainTextResponse(response_body, 200)
187
+
188
+
189
+ def parse_faa_sold_listings_page(html):
190
+
191
+ soup = BeautifulSoup(html , 'lxml') # "html.parser"
192
+
193
+ listings_els = soup.find_all('div' , {'class':'productImageDiv'})
194
+
195
+ listings = []
196
+
197
+ for i,listing_el in enumerate(listings_els):
198
+
199
+ #if listing_el['style'].find('hidden') > -1:
200
+ # continue
201
+
202
+ l = dict()
203
+
204
+ item_url = listing_el.find('a')['href']
205
+ if not item_url.startswith('http'):
206
+ item_url = 'https://fineartamerica.com/' + item_url
207
+
208
+ item_page = furl(item_url)
209
+ item_page.path.normalize()
210
+ l['item_page'] = item_page.url
211
+
212
+ l['image'] = listing_el.find('img' , {'class':'productImage'})['src']
213
+
214
+ artist_url = listing_el.find('p',{'class':'artistName'}).a['href']
215
+ if not artist_url.startswith('http'):
216
+ artist_url = 'https://fineartamerica.com/' + artist_url
217
+ artist_page = furl(artist_url)
218
+ artist_page.path.normalize()
219
+ l['artist_page'] = artist_page.url
220
+
221
+ l['artist'] = listing_el.find('p',{'class':'artistName'}).text
222
+ l['sell_info'] = listing_el.find('p' , {'class':'orderLocation'}).text
223
+
224
+ listings.append(l)
225
+
226
+ del soup
227
+
228
+ return listings
229
+
230
+ if __name__ == "__main__":
231
+ import uvicorn
232
+ uvicorn.run(app, host="0.0.0.0", port=7860)