Spaces:

nickmuchi
/

fintweet-GPT-Search

Build error

App Files Files Community

nickmuchi commited on Mar 12, 2023

Commit

b628185

•

1 Parent(s): 978b2e2

Update variables.py

Browse files

Files changed (1) hide show

variables.py +35 -1

variables.py CHANGED Viewed

@@ -24,6 +24,40 @@ from langchain.schema import (
     SystemMessage
 )
 @st.experimental_singleton(suppress_st_warning=True)
 def get_latest_file():
     '''Get the latest file from output folder'''
@@ -44,7 +78,7 @@ def get_latest_file():
     return file_contents
 @st.experimental_singleton(suppress_st_warning=True)
-def process_tweets(file,model,query):
     '''Process file with latest tweets'''
     # Split tweets int chunks

     SystemMessage
 )
+@st.experimental_singleton(suppress_st_warning=True)
+def load_models():
+    '''load sentimant and topic clssification models'''
+    sent_pipe = pipeline(task,model=sent_model_id, tokenizer=sent_model_id)
+    topic_pipe = pipeline(task, model=topic_model_id, tokenizer=topic_model_id)
+    return sent_pipe, topic_pipe
+@st.cache(allow_output_mutation=True, suppress_st_warning=True)
+def process_tweets(df,df_users):
+    '''process tweets into a dataframe'''
+    df['author'] = df['author'].astype(np.int64)
+    df_merged = df.merge(df_users, on='author')
+    tweet_list = df_merged['tweet'].tolist()
+    sentiment, topic = pd.DataFrame(sentiment_classifier(tweet_list)), pd.DataFrame(topic_classifier(tweet_list))
+    sentiment.rename(columns={'score':'sentiment_confidence','label':'sentiment'}, inplace=True)
+    topic.rename(columns={'score':'topic_confidence','label':'topic'}, inplace=True)
+    df_group = pd.concat([df_merged,sentiment,topic],axis=1)
+    df_group[['sentiment_confidence','topic_confidence']] = df_group[['sentiment_confidence','topic_confidence']].round(2).mul(100)
+    df_tweets = df_group[['creation_time','username','tweet','sentiment','topic','sentiment_confidence','topic_confidence']]
+    df_tweets = df_tweets.sort_values(by=['creation_time'],ascending=False)
+    return df_tweets
 @st.experimental_singleton(suppress_st_warning=True)
 def get_latest_file():
     '''Get the latest file from output folder'''
     return file_contents
 @st.experimental_singleton(suppress_st_warning=True)
+def embed_tweets(file,model,query):
     '''Process file with latest tweets'''
     # Split tweets int chunks