quantamentalfinance commited on
Commit
3be625e
1 Parent(s): d194aec
Files changed (1) hide show
  1. app.py +17 -9
app.py CHANGED
@@ -1,5 +1,6 @@
1
- # for setting/extracting environment variables such as API keysimport os
2
  import os
 
3
  ### 1. For Web Scraping
4
  # for querying Financial Modelling Prep API
5
  from urllib.request import urlopen
@@ -7,7 +8,6 @@ import json
7
 
8
  ### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
9
  # for tokenizing texts and splitting them into chunks of documents
10
- from transformers import GPT2TokenizerFast
11
  from langchain.text_splitter import RecursiveCharacterTextSplitter
12
  # for turning documents into embeddings before putting them in vector store
13
  from langchain.embeddings import HuggingFaceEmbeddings
@@ -25,17 +25,16 @@ import gradio as gr
25
 
26
  fmp_api_key = os.environ['FMP_API_KEY']
27
 
 
 
 
28
  if os.path.exists("chromadb_earnings_transcripts_extracted"):
29
  os.system("rm -r chromadb_earnings_transcripts_extracted")
30
  if os.path.exists("earnings_transcripts_chromadb.zip"):
31
  os.system("rm earnings_transcripts_chromadb.zip")
32
-
33
  os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
34
  os.system("unzip earnings_transcripts_chromadb.zip -d chromadb_earnings_transcripts_extracted")
35
 
36
- # initialize the default model for embedding the tokenized texts, the articles are stored in this embedded form in the vector database
37
- hf_embeddings = HuggingFaceEmbeddings()
38
-
39
  chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
40
 
41
  # Load the huggingface inference endpoint of an LLM model
@@ -45,7 +44,8 @@ model = "mistralai/Mistral-7B-Instruct-v0.1"
45
  # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
46
  hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":300})
47
 
48
- def source_question_answer(query,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub=hf_llm):
 
49
  """
50
  Return answer to the query
51
  """
@@ -64,14 +64,16 @@ def source_question_answer(query,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub
64
  source_title_3 = input_docs[2].metadata['title']
65
  source_title_4 = input_docs[3].metadata['title']
66
 
67
- return response, source_docs_1, source_docs_2, source_docs_3, source_docs_4, source_title_1, source_title_2, source_title_3, source_title_4
 
68
 
69
  with gr.Blocks() as app:
 
70
  with gr.Row():
71
  gr.HTML("<h1>Chat with Tesla 2023 Earnings Calls Transcripts</h1>")
72
 
73
  with gr.Row():
74
- query = gr.Textbox("Is Elon happy about Tesla?", placeholder="Enter question here...", label="Enter question")
75
  btn = gr.Button("Ask Question")
76
 
77
  with gr.Row():
@@ -98,5 +100,11 @@ with gr.Blocks() as app:
98
  with gr.Column():
99
  source_title_4 = gr.Markdown()
100
  source4 = gr.Textbox(label="Source Text 4")
 
 
 
 
 
 
101
 
102
  app.launch()
 
1
+ # for setting/extracting environment variables such as API keys
2
  import os
3
+
4
  ### 1. For Web Scraping
5
  # for querying Financial Modelling Prep API
6
  from urllib.request import urlopen
 
8
 
9
  ### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
10
  # for tokenizing texts and splitting them into chunks of documents
 
11
  from langchain.text_splitter import RecursiveCharacterTextSplitter
12
  # for turning documents into embeddings before putting them in vector store
13
  from langchain.embeddings import HuggingFaceEmbeddings
 
25
 
26
  fmp_api_key = os.environ['FMP_API_KEY']
27
 
28
+ # initialize the default model for embedding the tokenized texts, the articles are stored in this embedded form in the vector database
29
+ hf_embeddings = HuggingFaceEmbeddings()
30
+
31
  if os.path.exists("chromadb_earnings_transcripts_extracted"):
32
  os.system("rm -r chromadb_earnings_transcripts_extracted")
33
  if os.path.exists("earnings_transcripts_chromadb.zip"):
34
  os.system("rm earnings_transcripts_chromadb.zip")
 
35
  os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
36
  os.system("unzip earnings_transcripts_chromadb.zip -d chromadb_earnings_transcripts_extracted")
37
 
 
 
 
38
  chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
39
 
40
  # Load the huggingface inference endpoint of an LLM model
 
44
  # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
45
  hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":300})
46
 
47
+
48
+ def source_question_answer(query:str,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub=hf_llm):
49
  """
50
  Return answer to the query
51
  """
 
64
  source_title_3 = input_docs[2].metadata['title']
65
  source_title_4 = input_docs[3].metadata['title']
66
 
67
+ return response,source_docs_1 ,source_docs_2,source_docs_3,source_docs_4, source_title_1, source_title_2, source_title_3, source_title_4
68
+
69
 
70
  with gr.Blocks() as app:
71
+
72
  with gr.Row():
73
  gr.HTML("<h1>Chat with Tesla 2023 Earnings Calls Transcripts</h1>")
74
 
75
  with gr.Row():
76
+ query = gr.Textbox("How is Tesla planning to expand?", placeholder="Enter question here...", label="Enter question")
77
  btn = gr.Button("Ask Question")
78
 
79
  with gr.Row():
 
100
  with gr.Column():
101
  source_title_4 = gr.Markdown()
102
  source4 = gr.Textbox(label="Source Text 4")
103
+
104
+ query.submit(fn=source_question_answer, inputs=[query],
105
+ outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
106
+
107
+ btn.click(fn=source_question_answer, inputs=[query],
108
+ outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
109
 
110
  app.launch()