Spaces:
Sleeping
Sleeping
debugging
Browse files
app.py
CHANGED
@@ -37,6 +37,7 @@ def scrape(product_id, max_reviews=LIMIT):
|
|
37 |
reviews_df = pd.json_normalize(all_reviews)
|
38 |
reviews_df.rename(columns={"message": "comment"}, inplace=True)
|
39 |
reviews_df = reviews_df[["comment"]]
|
|
|
40 |
return reviews_df
|
41 |
|
42 |
|
@@ -46,6 +47,7 @@ def get_product_id(URL):
|
|
46 |
product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
|
47 |
"basicInfo"
|
48 |
]["id"]
|
|
|
49 |
return product_id
|
50 |
|
51 |
|
@@ -55,6 +57,7 @@ def clean(df):
|
|
55 |
df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
|
56 |
df["comment"] = df["comment"].apply(lambda x: clean_text(x)) # clean text
|
57 |
df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
|
|
|
58 |
return df
|
59 |
|
60 |
|
@@ -104,6 +107,7 @@ async def generate(URL, query):
|
|
104 |
chunk_size=1000, chunk_overlap=50
|
105 |
)
|
106 |
docs = text_splitter.split_documents(documents)
|
|
|
107 |
cache_URL = URL
|
108 |
# Vector store
|
109 |
db = FAISS.from_documents(docs, embeddings)
|
@@ -112,6 +116,7 @@ async def generate(URL, query):
|
|
112 |
llm=llm, retriever=db.as_retriever()
|
113 |
)
|
114 |
res = await qa.ainvoke(query)
|
|
|
115 |
# Process result
|
116 |
return res["result"]
|
117 |
except:
|
|
|
37 |
reviews_df = pd.json_normalize(all_reviews)
|
38 |
reviews_df.rename(columns={"message": "comment"}, inplace=True)
|
39 |
reviews_df = reviews_df[["comment"]]
|
40 |
+
print(reviews_df.head())
|
41 |
return reviews_df
|
42 |
|
43 |
|
|
|
47 |
product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
|
48 |
"basicInfo"
|
49 |
]["id"]
|
50 |
+
print(product_id)
|
51 |
return product_id
|
52 |
|
53 |
|
|
|
57 |
df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
|
58 |
df["comment"] = df["comment"].apply(lambda x: clean_text(x)) # clean text
|
59 |
df = df[df["comment"] != ""].reset_index(drop=True) # remove empty reviews
|
60 |
+
print("cleaned")
|
61 |
return df
|
62 |
|
63 |
|
|
|
107 |
chunk_size=1000, chunk_overlap=50
|
108 |
)
|
109 |
docs = text_splitter.split_documents(documents)
|
110 |
+
print("split")
|
111 |
cache_URL = URL
|
112 |
# Vector store
|
113 |
db = FAISS.from_documents(docs, embeddings)
|
|
|
116 |
llm=llm, retriever=db.as_retriever()
|
117 |
)
|
118 |
res = await qa.ainvoke(query)
|
119 |
+
print("generated")
|
120 |
# Process result
|
121 |
return res["result"]
|
122 |
except:
|