Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -49,13 +49,16 @@ def load_summarize_pipeline(): # summarize_pipeline
|
|
49 |
return summarize_pipeline
|
50 |
|
51 |
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
54 |
summary = summarize_pipeline(texts, max_length=10, num_return_sequences=1)
|
55 |
-
st.write("redy for summary text...")
|
56 |
result = summary[0]["summary_text"]
|
57 |
# print("summarized...")
|
58 |
-
st.
|
|
|
59 |
return result
|
60 |
|
61 |
|
@@ -105,12 +108,13 @@ if st.button("Scrape & Summarize"):
|
|
105 |
progress_text = st.empty()
|
106 |
total_limit = 5000 # Maximum number of submissions to check
|
107 |
df = scrape_reddit_data(search_query, total_limit)
|
108 |
-
|
|
|
109 |
|
110 |
with st.spinner("Loading Summarizing Pipeline"):
|
111 |
summarize_pipeline = load_summarize_pipeline()
|
112 |
with st.spinner("Summarizing txt data..."):
|
113 |
-
df["Detail_Summary"] = df["Detail"].apply(lambda x: summarize_txt(summarize_pipeline, x) if x else None)
|
114 |
|
115 |
st.session_state["df"] = df
|
116 |
|
@@ -124,8 +128,8 @@ if st.button("Sentiment Analysis"):
|
|
124 |
|
125 |
with st.spinner("Doing Sentiment Analysis..."):
|
126 |
# title is short, so dont havwe to use batch processing
|
127 |
-
df['Title_Sentiment'] = df['Title'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x)) if x else None)
|
128 |
-
df['Detail_Sentiment'] = df['Detail_Summary'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x)) if x else None)
|
129 |
|
130 |
# # palarell procsssing for each row of detail
|
131 |
# with ThreadPoolExecutor() as executor:
|
|
|
49 |
return summarize_pipeline
|
50 |
|
51 |
|
52 |
+
@st.cache_resource
|
53 |
+
def summarize_txt(summarize_pipeline, texts, length):
|
54 |
+
if "count" not in st.session_state:
|
55 |
+
st.session_state.count = 0
|
56 |
+
|
57 |
summary = summarize_pipeline(texts, max_length=10, num_return_sequences=1)
|
|
|
58 |
result = summary[0]["summary_text"]
|
59 |
# print("summarized...")
|
60 |
+
st.session_state.count += 1
|
61 |
+
st.write(f"Phase: {st.session_state.count / length}")
|
62 |
return result
|
63 |
|
64 |
|
|
|
108 |
progress_text = st.empty()
|
109 |
total_limit = 5000 # Maximum number of submissions to check
|
110 |
df = scrape_reddit_data(search_query, total_limit)
|
111 |
+
length = len(df)
|
112 |
+
progress_text.text(f"Collected {length} valid posts.")
|
113 |
|
114 |
with st.spinner("Loading Summarizing Pipeline"):
|
115 |
summarize_pipeline = load_summarize_pipeline()
|
116 |
with st.spinner("Summarizing txt data..."):
|
117 |
+
df["Detail_Summary"] = df["Detail"].apply(lambda x: summarize_txt(summarize_pipeline, x, length) if x else None)
|
118 |
|
119 |
st.session_state["df"] = df
|
120 |
|
|
|
128 |
|
129 |
with st.spinner("Doing Sentiment Analysis..."):
|
130 |
# title is short, so dont havwe to use batch processing
|
131 |
+
df['Title_Sentiment'] = df['Title'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x), length) if x else None)
|
132 |
+
df['Detail_Sentiment'] = df['Detail_Summary'].apply(lambda x: safe_sentiment(sentiment_pipeline, text=preprocess_text(x), length) if x else None)
|
133 |
|
134 |
# # palarell procsssing for each row of detail
|
135 |
# with ThreadPoolExecutor() as executor:
|