kensvin commited on
Commit
36ab1a9
1 Parent(s): 0f1ffcf

handle url error

Browse files
Files changed (1) hide show
  1. app.py +23 -19
app.py CHANGED
@@ -21,25 +21,29 @@ item = {}
21
  LIMIT = 1000 # Limit to 1000 reviews so that processing does not take too long
22
 
23
  def scrape(URL, max_reviews=LIMIT):
24
- parsed_url = urlparse(URL)
25
- *_, SHOP, PRODUCT_KEY = parsed_url.path.split("/")
26
- product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
27
- "basicInfo"
28
- ]["id"]
29
- all_reviews = []
30
- page = 1
31
- has_next = True
32
-
33
- while has_next and len(all_reviews) <= max_reviews:
34
- response = request_product_review(product_id, page=page)
35
- data = response.json()["data"]["productrevGetProductReviewList"]
36
- reviews = data["list"]
37
- all_reviews.extend(reviews)
38
- has_next = data["hasNext"]
39
- page += 1
40
-
41
- reviews_df = pd.json_normalize(all_reviews)
42
- return reviews_df
 
 
 
 
43
 
44
  # Clean
45
  def clean(df):
 
21
  LIMIT = 1000 # Limit to 1000 reviews so that processing does not take too long
22
 
23
  def scrape(URL, max_reviews=LIMIT):
24
+ try:
25
+ parsed_url = urlparse(URL)
26
+ *_, SHOP, PRODUCT_KEY = parsed_url.path.split("/")
27
+ product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
28
+ "basicInfo"
29
+ ]["id"]
30
+ except:
31
+ raise gr.Error("Invalid URL")
32
+ else:
33
+ all_reviews = []
34
+ page = 1
35
+ has_next = True
36
+
37
+ while has_next and len(all_reviews) <= max_reviews:
38
+ response = request_product_review(product_id, page=page)
39
+ data = response.json()["data"]["productrevGetProductReviewList"]
40
+ reviews = data["list"]
41
+ all_reviews.extend(reviews)
42
+ has_next = data["hasNext"]
43
+ page += 1
44
+
45
+ reviews_df = pd.json_normalize(all_reviews)
46
+ return reviews_df
47
 
48
  # Clean
49
  def clean(df):