kensvin commited on
Commit
2d566ae
1 Parent(s): 6e16551
Files changed (1) hide show
  1. app.py +5 -0
app.py CHANGED
@@ -37,6 +37,7 @@ def scrape(product_id, max_reviews=LIMIT):
37
  reviews_df = pd.json_normalize(all_reviews)
38
  reviews_df.rename(columns={"message": "comment"}, inplace=True)
39
  reviews_df = reviews_df[["comment"]]
 
40
  return reviews_df
41
 
42
 
@@ -46,6 +47,7 @@ def get_product_id(URL):
46
  product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
47
  "basicInfo"
48
  ]["id"]
 
49
  return product_id
50
 
51
 
@@ -55,6 +57,7 @@ def clean(df):
55
  df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
56
  df["comment"] = df["comment"].apply(lambda x: clean_text(x)) # clean text
57
  df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
 
58
  return df
59
 
60
 
@@ -104,6 +107,7 @@ async def generate(URL, query):
104
  chunk_size=1000, chunk_overlap=50
105
  )
106
  docs = text_splitter.split_documents(documents)
 
107
  cache_URL = URL
108
  # Vector store
109
  db = FAISS.from_documents(docs, embeddings)
@@ -112,6 +116,7 @@ async def generate(URL, query):
112
  llm=llm, retriever=db.as_retriever()
113
  )
114
  res = await qa.ainvoke(query)
 
115
  # Process result
116
  return res["result"]
117
  except:
 
37
  reviews_df = pd.json_normalize(all_reviews)
38
  reviews_df.rename(columns={"message": "comment"}, inplace=True)
39
  reviews_df = reviews_df[["comment"]]
40
+ print(reviews_df.head())
41
  return reviews_df
42
 
43
 
 
47
  product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
48
  "basicInfo"
49
  ]["id"]
50
+ print(product_id)
51
  return product_id
52
 
53
 
 
57
  df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
58
  df["comment"] = df["comment"].apply(lambda x: clean_text(x)) # clean text
59
  df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
60
+ print("cleaned")
61
  return df
62
 
63
 
 
107
  chunk_size=1000, chunk_overlap=50
108
  )
109
  docs = text_splitter.split_documents(documents)
110
+ print("split")
111
  cache_URL = URL
112
  # Vector store
113
  db = FAISS.from_documents(docs, embeddings)
 
116
  llm=llm, retriever=db.as_retriever()
117
  )
118
  res = await qa.ainvoke(query)
119
+ print("generated")
120
  # Process result
121
  return res["result"]
122
  except: