Spaces:
Build error
Build error
Update variables.py
Browse files- variables.py +35 -1
variables.py
CHANGED
@@ -24,6 +24,40 @@ from langchain.schema import (
|
|
24 |
SystemMessage
|
25 |
)
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
@st.experimental_singleton(suppress_st_warning=True)
|
28 |
def get_latest_file():
|
29 |
'''Get the latest file from output folder'''
|
@@ -44,7 +78,7 @@ def get_latest_file():
|
|
44 |
return file_contents
|
45 |
|
46 |
@st.experimental_singleton(suppress_st_warning=True)
|
47 |
-
def
|
48 |
'''Process file with latest tweets'''
|
49 |
|
50 |
# Split tweets int chunks
|
|
|
24 |
SystemMessage
|
25 |
)
|
26 |
|
27 |
+
@st.experimental_singleton(suppress_st_warning=True)
|
28 |
+
def load_models():
|
29 |
+
'''load sentimant and topic clssification models'''
|
30 |
+
sent_pipe = pipeline(task,model=sent_model_id, tokenizer=sent_model_id)
|
31 |
+
topic_pipe = pipeline(task, model=topic_model_id, tokenizer=topic_model_id)
|
32 |
+
|
33 |
+
return sent_pipe, topic_pipe
|
34 |
+
|
35 |
+
@st.cache(allow_output_mutation=True, suppress_st_warning=True)
|
36 |
+
def process_tweets(df,df_users):
|
37 |
+
'''process tweets into a dataframe'''
|
38 |
+
|
39 |
+
df['author'] = df['author'].astype(np.int64)
|
40 |
+
|
41 |
+
df_merged = df.merge(df_users, on='author')
|
42 |
+
|
43 |
+
tweet_list = df_merged['tweet'].tolist()
|
44 |
+
|
45 |
+
sentiment, topic = pd.DataFrame(sentiment_classifier(tweet_list)), pd.DataFrame(topic_classifier(tweet_list))
|
46 |
+
|
47 |
+
sentiment.rename(columns={'score':'sentiment_confidence','label':'sentiment'}, inplace=True)
|
48 |
+
|
49 |
+
topic.rename(columns={'score':'topic_confidence','label':'topic'}, inplace=True)
|
50 |
+
|
51 |
+
df_group = pd.concat([df_merged,sentiment,topic],axis=1)
|
52 |
+
|
53 |
+
df_group[['sentiment_confidence','topic_confidence']] = df_group[['sentiment_confidence','topic_confidence']].round(2).mul(100)
|
54 |
+
|
55 |
+
df_tweets = df_group[['creation_time','username','tweet','sentiment','topic','sentiment_confidence','topic_confidence']]
|
56 |
+
|
57 |
+
df_tweets = df_tweets.sort_values(by=['creation_time'],ascending=False)
|
58 |
+
|
59 |
+
return df_tweets
|
60 |
+
|
61 |
@st.experimental_singleton(suppress_st_warning=True)
|
62 |
def get_latest_file():
|
63 |
'''Get the latest file from output folder'''
|
|
|
78 |
return file_contents
|
79 |
|
80 |
@st.experimental_singleton(suppress_st_warning=True)
|
81 |
+
def embed_tweets(file,model,query):
|
82 |
'''Process file with latest tweets'''
|
83 |
|
84 |
# Split tweets int chunks
|