##Variables import os import streamlit as st import pathlib from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import FAISS from langchain.chat_models.openai import ChatOpenAI from langchain import VectorDBQA import pandas as pd from langchain.chat_models import ChatOpenAI from langchain.prompts.chat import ( ChatPromptTemplate, SystemMessagePromptTemplate, AIMessagePromptTemplate, HumanMessagePromptTemplate, ) from langchain.schema import ( AIMessage, HumanMessage, SystemMessage ) @st.experimental_singleton(suppress_st_warning=True) def get_latest_file(): '''Get the latest file from output folder''' # set the directory path directory_path = "output/" # create a list of all text files in the directory and sort by modification time text_files = sorted(pathlib.Path(directory_path).glob("*.txt"), key=lambda f: f.stat().st_mtime) # get the latest modified file latest_file = text_files[-1] # open the file and read its contents with open(latest_file, "r") as f: file_contents = f.read() return file_contents @st.experimental_singleton(suppress_st_warning=True) def process_tweets(file,embed_model,query): '''Process file with latest tweets''' # Split tweets int chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) texts = text_splitter.split_text(file) model = bi_enc_dict[embed_model] if model == "hkunlp/instructor-large": emb = HuggingFaceInstructEmbeddings(model_name=model, query_instruction='Represent the Financial question for retrieving supporting documents: ', embed_instruction='Represent the Financial document for retrieval: ') elif model == "sentence-transformers/all-mpnet-base-v2": emb = HuggingFaceEmbeddings(model_name=model) docsearch = FAISS.from_texts(texts, emb) chain_type_kwargs = {"prompt": prompt} chain = VectorDBQA.from_chain_type( ChatOpenAI(temperature=0), chain_type="stuff", vectorstore=docsearch, chain_type_kwargs=chain_type_kwargs ) result = chain({"query": query}) return result CONFIG = { "bearer_token": os.environ.get("bearer_token") } sent_model_id = 'nickmuchi/optimum-finbert-tone-finetuned-fintwitter-classification' topic_model_id = 'nickmuchi/optimum-finbert-tone-finetuned-finance-topic-classification' task = 'text-classification' sentiments = {"0": "Bearish", "1": "Bullish", "2": "Neutral"} topics = { "0": "Analyst Update", "1": "Fed | Central Banks", "2": "Company | Product News", "3": "Treasuries | Corporate Debt", "4": "Dividend", "5": "Earnings", "6": "Energy | Oil", "7": "Financials", "8": "Currencies", "9": "General News | Opinion", "10": "Gold | Metals | Materials", "11": "IPO", "12": "Legal | Regulation", "13": "M&A | Investments", "14": "Macro", "15": "Markets", "16": "Politics", "17": "Personnel Change", "18": "Stock Commentary", "19": "Stock Movement", } user_name = [ "Investing.com", "(((The Daily Shot)))", "Bloomberg Markets", "FirstSquawk", "MarketWatch", "markets", "FinancialTimes", "CNBC", "ReutersBiz", "BreakingNews", "LiveSquawk", "NYSE", "WSJmarkets", "FT", "TheStreet", "ftfinancenews", "BloombergTV", "Nasdaq", "NYSE", "federalreserve", "NewYorkFed", "sffed", "WSJCentralBanks", "RichmondFed", "ecb", "stlouisfed", "WorldBank", "MarketCurrents", "OpenOutcrier", "BullTradeFinder", "WallStChatter", "Briefingcom", "SeekingAlpha", "realDonaldTrump", "AswathDamodaran", "ukarlewitz", "alphatrends", "Investor666", "ACInvestorBlog", "ZorTrades", "ScottNations", "TradersCorner", "TraderGoalieOne", "option_snipper", "jasonleavitt", "LMT978", "OptionsHawk", "andrewbtodd", "Terri1618", "SunriseTrader", "traderstewie", "TMLTrader", "IncredibleTrade", "NYFedResearch", "YahooFinance", "business", "economics", "IMFNews", "Market_Screener", "QuickTake", "NewsFromBW", "BNCommodities", ] user_id = [ "988955288", "423769635", "69620713", "59393368", "3295423333", "624413", "69620713", "4898091", "20402945", "15110357", "6017542", "21323268", "28164923", "18949452", "15281391", "11014272", "35002876", "18639734", "21323268", "26538229", "15072071", "117237387", "327484803", "16532451", "83466368", "71567590", "27860681", "15296897", "2334614718", "2222635612", "3382363841", "72928001", "23059499", "25073877", "33216611", "37284991", "15246621", "293458690", "55561590", "18560146", "244978426", "85523269", "276714687", "2806294664", "16205561", "1064700308", "61342056", "184126162", "405820375", "787439438964068352", "52166809", "2715646770", "47247213", "374672240", "19546277", "34713362", "144274618", "25098482", "102325185", "252751061", "976297820532518914", "804556370", ] def convert_user_names(user_name: list): '''convert user_names to tweepy format''' users = [] for user in user_name: users.append(f"from:{user}") return " OR ".join(users)