Spaces:

kusa04
/

g13_DL_project

Sleeping

App Files Files Community

kusa04 commited on Mar 20

Commit

012fb93

verified ·

1 Parent(s): c2d6542

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -8

app.py CHANGED Viewed

@@ -49,13 +49,16 @@ def load_summarize_pipeline(): # summarize_pipeline
     return summarize_pipeline
-def summarize_txt(summarize_pipeline, texts):
-    st.write("start to summarize...")
     summary = summarize_pipeline(texts, max_length=10, num_return_sequences=1)
-    st.write("redy for summary text...")
     result = summary[0]["summary_text"]
     # print("summarized...")
-    st.write("summarized...")
     return result
@@ -105,12 +108,13 @@ if st.button("Scrape & Summarize"):
         progress_text = st.empty()
         total_limit = 5000  # Maximum number of submissions to check
         df = scrape_reddit_data(search_query, total_limit)
-        progress_text.text(f"Collected {len(df)} valid posts.")
     with st.spinner("Loading Summarizing Pipeline"):
         summarize_pipeline = load_summarize_pipeline()
     with st.spinner("Summarizing txt data..."):
-        df["Detail_Summary"] = df["Detail"].apply(lambda x: summarize_txt(summarize_pipeline, x) if x else None)
     st.session_state["df"] = df
@@ -124,8 +128,8 @@ if st.button("Sentiment Analysis"):
     with st.spinner("Doing Sentiment Analysis..."):
         # title is short, so dont havwe to use batch processing
-        df['Title_Sentiment'] = df['Title'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x)) if x else None)
-        df['Detail_Sentiment'] = df['Detail_Summary'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x)) if x else None)
         # # palarell procsssing for each row of detail
         # with ThreadPoolExecutor() as executor:

     return summarize_pipeline
+@st.cache_resource
+def summarize_txt(summarize_pipeline, texts, length):
+    if "count" not in st.session_state:
+        st.session_state.count = 0
     summary = summarize_pipeline(texts, max_length=10, num_return_sequences=1)
     result = summary[0]["summary_text"]
     # print("summarized...")
+    st.session_state.count += 1
+    st.write(f"Phase: {st.session_state.count / length}")
     return result
         progress_text = st.empty()
         total_limit = 5000  # Maximum number of submissions to check
         df = scrape_reddit_data(search_query, total_limit)
+        length = len(df)
+        progress_text.text(f"Collected {length} valid posts.")
     with st.spinner("Loading Summarizing Pipeline"):
         summarize_pipeline = load_summarize_pipeline()
     with st.spinner("Summarizing txt data..."):
+        df["Detail_Summary"] = df["Detail"].apply(lambda x: summarize_txt(summarize_pipeline, x, length) if x else None)
     st.session_state["df"] = df
     with st.spinner("Doing Sentiment Analysis..."):
         # title is short, so dont havwe to use batch processing
+        df['Title_Sentiment'] = df['Title'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x), length) if x else None)
+        df['Detail_Sentiment'] = df['Detail_Summary'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x), length) if x else None)
         # # palarell procsssing for each row of detail
         # with ThreadPoolExecutor() as executor: