kensvin commited on
Commit
d050f83
1 Parent(s): 2d566ae
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -20,6 +20,17 @@ item_id = ""
20
  item = {}
21
  LIMIT = 1000 # Limit to 1000 reviews so that processing does not take too long
22
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def scrape(product_id, max_reviews=LIMIT):
25
  all_reviews = []
@@ -37,7 +48,7 @@ def scrape(product_id, max_reviews=LIMIT):
37
  reviews_df = pd.json_normalize(all_reviews)
38
  reviews_df.rename(columns={"message": "comment"}, inplace=True)
39
  reviews_df = reviews_df[["comment"]]
40
- print(reviews_df.head())
41
  return reviews_df
42
 
43
 
@@ -47,7 +58,7 @@ def get_product_id(URL):
47
  product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
48
  "basicInfo"
49
  ]["id"]
50
- print(product_id)
51
  return product_id
52
 
53
 
@@ -57,7 +68,7 @@ def clean(df):
57
  df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
58
  df["comment"] = df["comment"].apply(lambda x: clean_text(x)) # clean text
59
  df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
60
- print("cleaned")
61
  return df
62
 
63
 
@@ -107,7 +118,7 @@ async def generate(URL, query):
107
  chunk_size=1000, chunk_overlap=50
108
  )
109
  docs = text_splitter.split_documents(documents)
110
- print("split")
111
  cache_URL = URL
112
  # Vector store
113
  db = FAISS.from_documents(docs, embeddings)
@@ -116,7 +127,7 @@ async def generate(URL, query):
116
  llm=llm, retriever=db.as_retriever()
117
  )
118
  res = await qa.ainvoke(query)
119
- print("generated")
120
  # Process result
121
  return res["result"]
122
  except:
 
20
  item = {}
21
  LIMIT = 1000 # Limit to 1000 reviews so that processing does not take too long
22
 
23
+ import logging
24
+
25
+ # Configure logging
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format="%(asctime)s [%(levelname)s] %(message)s",
29
+ handlers=[logging.StreamHandler()],
30
+ )
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
 
35
  def scrape(product_id, max_reviews=LIMIT):
36
  all_reviews = []
 
48
  reviews_df = pd.json_normalize(all_reviews)
49
  reviews_df.rename(columns={"message": "comment"}, inplace=True)
50
  reviews_df = reviews_df[["comment"]]
51
+ logger.info(reviews_df.head())
52
  return reviews_df
53
 
54
 
 
58
  product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
59
  "basicInfo"
60
  ]["id"]
61
+ logger.info(product_id)
62
  return product_id
63
 
64
 
 
68
  df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
69
  df["comment"] = df["comment"].apply(lambda x: clean_text(x)) # clean text
70
  df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
71
+ logger.info("cleaned")
72
  return df
73
 
74
 
 
118
  chunk_size=1000, chunk_overlap=50
119
  )
120
  docs = text_splitter.split_documents(documents)
121
+ logger.info("split")
122
  cache_URL = URL
123
  # Vector store
124
  db = FAISS.from_documents(docs, embeddings)
 
127
  llm=llm, retriever=db.as_retriever()
128
  )
129
  res = await qa.ainvoke(query)
130
+ logger.info("generated")
131
  # Process result
132
  return res["result"]
133
  except: