kusa04 commited on
Commit
012fb93
·
verified ·
1 Parent(s): c2d6542

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -49,13 +49,16 @@ def load_summarize_pipeline(): # summarize_pipeline
49
  return summarize_pipeline
50
 
51
 
52
- def summarize_txt(summarize_pipeline, texts):
53
- st.write("start to summarize...")
 
 
 
54
  summary = summarize_pipeline(texts, max_length=10, num_return_sequences=1)
55
- st.write("redy for summary text...")
56
  result = summary[0]["summary_text"]
57
  # print("summarized...")
58
- st.write("summarized...")
 
59
  return result
60
 
61
 
@@ -105,12 +108,13 @@ if st.button("Scrape & Summarize"):
105
  progress_text = st.empty()
106
  total_limit = 5000 # Maximum number of submissions to check
107
  df = scrape_reddit_data(search_query, total_limit)
108
- progress_text.text(f"Collected {len(df)} valid posts.")
 
109
 
110
  with st.spinner("Loading Summarizing Pipeline"):
111
  summarize_pipeline = load_summarize_pipeline()
112
  with st.spinner("Summarizing txt data..."):
113
- df["Detail_Summary"] = df["Detail"].apply(lambda x: summarize_txt(summarize_pipeline, x) if x else None)
114
 
115
  st.session_state["df"] = df
116
 
@@ -124,8 +128,8 @@ if st.button("Sentiment Analysis"):
124
 
125
  with st.spinner("Doing Sentiment Analysis..."):
126
  # title is short, so dont havwe to use batch processing
127
- df['Title_Sentiment'] = df['Title'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x)) if x else None)
128
- df['Detail_Sentiment'] = df['Detail_Summary'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x)) if x else None)
129
 
130
  # # palarell procsssing for each row of detail
131
  # with ThreadPoolExecutor() as executor:
 
49
  return summarize_pipeline
50
 
51
 
52
+ @st.cache_resource
53
+ def summarize_txt(summarize_pipeline, texts, length):
54
+ if "count" not in st.session_state:
55
+ st.session_state.count = 0
56
+
57
  summary = summarize_pipeline(texts, max_length=10, num_return_sequences=1)
 
58
  result = summary[0]["summary_text"]
59
  # print("summarized...")
60
+ st.session_state.count += 1
61
+ st.write(f"Phase: {st.session_state.count / length}")
62
  return result
63
 
64
 
 
108
  progress_text = st.empty()
109
  total_limit = 5000 # Maximum number of submissions to check
110
  df = scrape_reddit_data(search_query, total_limit)
111
+ length = len(df)
112
+ progress_text.text(f"Collected {length} valid posts.")
113
 
114
  with st.spinner("Loading Summarizing Pipeline"):
115
  summarize_pipeline = load_summarize_pipeline()
116
  with st.spinner("Summarizing txt data..."):
117
+ df["Detail_Summary"] = df["Detail"].apply(lambda x: summarize_txt(summarize_pipeline, x, length) if x else None)
118
 
119
  st.session_state["df"] = df
120
 
 
128
 
129
  with st.spinner("Doing Sentiment Analysis..."):
130
  # title is short, so dont havwe to use batch processing
131
+ df['Title_Sentiment'] = df['Title'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x), length) if x else None)
132
+ df['Detail_Sentiment'] = df['Detail_Summary'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x), length) if x else None)
133
 
134
  # # palarell procsssing for each row of detail
135
  # with ThreadPoolExecutor() as executor: