Spaces:
Sleeping
Sleeping
logging
Browse files
app.py
CHANGED
@@ -20,6 +20,17 @@ item_id = ""
|
|
20 |
item = {}
|
21 |
LIMIT = 1000 # Limit to 1000 reviews so that processing does not take too long
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
def scrape(product_id, max_reviews=LIMIT):
|
25 |
all_reviews = []
|
@@ -37,7 +48,7 @@ def scrape(product_id, max_reviews=LIMIT):
|
|
37 |
reviews_df = pd.json_normalize(all_reviews)
|
38 |
reviews_df.rename(columns={"message": "comment"}, inplace=True)
|
39 |
reviews_df = reviews_df[["comment"]]
|
40 |
-
|
41 |
return reviews_df
|
42 |
|
43 |
|
@@ -47,7 +58,7 @@ def get_product_id(URL):
|
|
47 |
product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
|
48 |
"basicInfo"
|
49 |
]["id"]
|
50 |
-
|
51 |
return product_id
|
52 |
|
53 |
|
@@ -57,7 +68,7 @@ def clean(df):
|
|
57 |
df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
|
58 |
df["comment"] = df["comment"].apply(lambda x: clean_text(x)) # clean text
|
59 |
df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
|
60 |
-
|
61 |
return df
|
62 |
|
63 |
|
@@ -107,7 +118,7 @@ async def generate(URL, query):
|
|
107 |
chunk_size=1000, chunk_overlap=50
|
108 |
)
|
109 |
docs = text_splitter.split_documents(documents)
|
110 |
-
|
111 |
cache_URL = URL
|
112 |
# Vector store
|
113 |
db = FAISS.from_documents(docs, embeddings)
|
@@ -116,7 +127,7 @@ async def generate(URL, query):
|
|
116 |
llm=llm, retriever=db.as_retriever()
|
117 |
)
|
118 |
res = await qa.ainvoke(query)
|
119 |
-
|
120 |
# Process result
|
121 |
return res["result"]
|
122 |
except:
|
|
|
20 |
item = {}
|
21 |
LIMIT = 1000 # Limit to 1000 reviews so that processing does not take too long
|
22 |
|
23 |
+
import logging
|
24 |
+
|
25 |
+
# Configure logging
|
26 |
+
logging.basicConfig(
|
27 |
+
level=logging.INFO,
|
28 |
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
29 |
+
handlers=[logging.StreamHandler()],
|
30 |
+
)
|
31 |
+
|
32 |
+
logger = logging.getLogger(__name__)
|
33 |
+
|
34 |
|
35 |
def scrape(product_id, max_reviews=LIMIT):
|
36 |
all_reviews = []
|
|
|
48 |
reviews_df = pd.json_normalize(all_reviews)
|
49 |
reviews_df.rename(columns={"message": "comment"}, inplace=True)
|
50 |
reviews_df = reviews_df[["comment"]]
|
51 |
+
logger.info(reviews_df.head())
|
52 |
return reviews_df
|
53 |
|
54 |
|
|
|
58 |
product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
|
59 |
"basicInfo"
|
60 |
]["id"]
|
61 |
+
logger.info(product_id)
|
62 |
return product_id
|
63 |
|
64 |
|
|
|
68 |
df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
|
69 |
df["comment"] = df["comment"].apply(lambda x: clean_text(x)) # clean text
|
70 |
df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
|
71 |
+
logger.info("cleaned")
|
72 |
return df
|
73 |
|
74 |
|
|
|
118 |
chunk_size=1000, chunk_overlap=50
|
119 |
)
|
120 |
docs = text_splitter.split_documents(documents)
|
121 |
+
logger.info("split")
|
122 |
cache_URL = URL
|
123 |
# Vector store
|
124 |
db = FAISS.from_documents(docs, embeddings)
|
|
|
127 |
llm=llm, retriever=db.as_retriever()
|
128 |
)
|
129 |
res = await qa.ainvoke(query)
|
130 |
+
logger.info("generated")
|
131 |
# Process result
|
132 |
return res["result"]
|
133 |
except:
|