quantamentalfinance commited on
Commit
5448bd5
1 Parent(s): fca03df

added code

Browse files
Files changed (2) hide show
  1. app.py +103 -1
  2. requirements.txt +5 -0
app.py CHANGED
@@ -1 +1,103 @@
1
- app.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # for setting/extracting environment variables such as API keysimport os
2
+
3
+ ### 1. For Web Scraping
4
+ # for querying Financial Modelling Prep API
5
+ from urllib.request import urlopen
6
+ import json
7
+
8
+ ### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
9
+ # for tokenizing texts and splitting them into chunks of documents
10
+ from transformers import GPT2TokenizerFast
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ # for turning documents into embeddings before putting them in vector store
13
+ from langchain.embeddings import HuggingFaceEmbeddings
14
+ # for vector store for documents
15
+ from langchain.vectorstores import Chroma
16
+
17
+ ### 3. For Querying LLM
18
+ # for loading HuggingFace LLM models from the hub
19
+ from langchain.llms import HuggingFaceHub
20
+ # for querying LLM conveniently using the context
21
+ from langchain.chains.question_answering import load_qa_chain
22
+
23
+ ### 4. For Gradio App UI
24
+ import gradio as gr
25
+
26
+ os.environ['FMP_API_KEY'] = 'your_api_key'
27
+ fmp_api_key = os.environ['FMP_API_KEY']
28
+
29
+ if os.path.exists("chromadb_earnings_transcripts_extracted"):
30
+ os.system("rm -r chromadb_earnings_transcripts_extracted")
31
+ if os.path.exists("earnings_transcripts_chromadb.zip"):
32
+ os.system("rm earnings_transcripts_chromadb.zip")
33
+
34
+ os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
35
+ os.system("unzip earnings_transcripts_chromadb.zip -d chromadb_earnings_transcripts_extracted")
36
+
37
+ # initialize the default model for embedding the tokenized texts, the articles are stored in this embedded form in the vector database
38
+ hf_embeddings = HuggingFaceEmbeddings()
39
+
40
+ chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
41
+
42
+ # Load the huggingface inference endpoint of an LLM model
43
+ # Name of the LLM model we are using, feel free to try others!
44
+ model = "mistralai/Mistral-7B-Instruct-v0.1"
45
+
46
+ # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
47
+ hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":300})
48
+
49
+ def source_question_answer(query,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub=hf_llm):
50
+ """
51
+ Return answer to the query
52
+ """
53
+ input_docs = vectorstore.similarity_search(query,k=4)
54
+ qa_chain = load_qa_chain(llm, chain_type="stuff")
55
+ query = f"[INST]According to the earnings calls transcripts earlier, {query}[INST]"
56
+
57
+ response = qa_chain.run(input_documents=input_docs, question=query)
58
+ source_docs_1 = input_docs[0].page_content
59
+ source_docs_2 = input_docs[1].page_content
60
+ source_docs_3 = input_docs[2].page_content
61
+ source_docs_4 = input_docs[3].page_content
62
+
63
+ source_title_1 = input_docs[0].metadata['title']
64
+ source_title_2 = input_docs[1].metadata['title']
65
+ source_title_3 = input_docs[2].metadata['title']
66
+ source_title_4 = input_docs[3].metadata['title']
67
+
68
+ return response, source_docs_1, source_docs_2, source_docs_3, source_docs_4, source_title_1, source_title_2, source_title_3, source_title_4
69
+
70
+ with gr.Blocks() as app:
71
+ with gr.Row():
72
+ gr.HTML("<h1>Chat with Tesla 2023 Earnings Calls Transcripts</h1>")
73
+
74
+ with gr.Row():
75
+ query = gr.Textbox("Is Elon happy about Tesla?", placeholder="Enter question here...", label="Enter question")
76
+ btn = gr.Button("Ask Question")
77
+
78
+ with gr.Row():
79
+ gr.HTML("<h3>Answer</h3>")
80
+
81
+ with gr.Row():
82
+ answer = gr.Textbox(label="Answer")
83
+
84
+ with gr.Row():
85
+ gr.HTML("<h3>Sources Referenced from Tesla 2023 Earnings Calls Transcripts</h3>")
86
+
87
+ with gr.Row():
88
+ with gr.Column():
89
+ source_title_1 = gr.Markdown()
90
+ source1 = gr.Textbox(label="Source Text 1")
91
+ with gr.Column():
92
+ source_title_2 = gr.Markdown()
93
+ source2 = gr.Textbox(label="Source Text 2")
94
+
95
+ with gr.Row():
96
+ with gr.Column():
97
+ source_title_3 = gr.Markdown()
98
+ source3 = gr.Textbox(label="Source Text 3")
99
+ with gr.Column():
100
+ source_title_4 = gr.Markdown()
101
+ source4 = gr.Textbox(label="Source Text 4")
102
+
103
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==3.45.2
2
+ chromadb==0.4.13
3
+ langchain==0.0.305
4
+ transformers==4.33.3
5
+ sentence-transformers==2.2.2