Spaces:

kensvin
/

ringkas-ulas

Sleeping

kensvin commited on May 18, 2024

Commit

2d566ae

1 Parent(s): 6e16551

debugging

Files changed (1) hide show

app.py CHANGED Viewed

@@ -37,6 +37,7 @@ def scrape(product_id, max_reviews=LIMIT):
     reviews_df = pd.json_normalize(all_reviews)
     reviews_df.rename(columns={"message": "comment"}, inplace=True)
     reviews_df = reviews_df[["comment"]]
     return reviews_df
@@ -46,6 +47,7 @@ def get_product_id(URL):
     product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
         "basicInfo"
     ]["id"]
     return product_id
@@ -55,6 +57,7 @@ def clean(df):
     df = df[df["comment"] != ""].reset_index(drop=True)  # remove empty reviews
     df["comment"] = df["comment"].apply(lambda x: clean_text(x))  # clean text
     df = df[df["comment"] != ""].reset_index(drop=True)  # remove empty reviews
     return df
@@ -104,6 +107,7 @@ async def generate(URL, query):
                         chunk_size=1000, chunk_overlap=50
                     )
                     docs = text_splitter.split_documents(documents)
                     cache_URL = URL
                     # Vector store
                     db = FAISS.from_documents(docs, embeddings)
@@ -112,6 +116,7 @@ async def generate(URL, query):
                         llm=llm, retriever=db.as_retriever()
                     )
                     res = await qa.ainvoke(query)
                     # Process result
                     return res["result"]
         except:

     reviews_df = pd.json_normalize(all_reviews)
     reviews_df.rename(columns={"message": "comment"}, inplace=True)
     reviews_df = reviews_df[["comment"]]
+    print(reviews_df.head())
     return reviews_df
     product_id = request_product_id(SHOP, PRODUCT_KEY).json()["data"]["pdpGetLayout"][
         "basicInfo"
     ]["id"]
+    print(product_id)
     return product_id
     df = df[df["comment"] != ""].reset_index(drop=True)  # remove empty reviews
     df["comment"] = df["comment"].apply(lambda x: clean_text(x))  # clean text
     df = df[df["comment"] != ""].reset_index(drop=True)  # remove empty reviews
+    print("cleaned")
     return df
                         chunk_size=1000, chunk_overlap=50
                     )
                     docs = text_splitter.split_documents(documents)
+                    print("split")
                     cache_URL = URL
                     # Vector store
                     db = FAISS.from_documents(docs, embeddings)
                         llm=llm, retriever=db.as_retriever()
                     )
                     res = await qa.ainvoke(query)
+                    print("generated")
                     # Process result
                     return res["result"]
         except: