Spaces:
Runtime error
Runtime error
quantamentalfinance
commited on
Commit
•
3be625e
1
Parent(s):
d194aec
updated
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
-
# for setting/extracting environment variables such as API
|
2 |
import os
|
|
|
3 |
### 1. For Web Scraping
|
4 |
# for querying Financial Modelling Prep API
|
5 |
from urllib.request import urlopen
|
@@ -7,7 +8,6 @@ import json
|
|
7 |
|
8 |
### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
|
9 |
# for tokenizing texts and splitting them into chunks of documents
|
10 |
-
from transformers import GPT2TokenizerFast
|
11 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
12 |
# for turning documents into embeddings before putting them in vector store
|
13 |
from langchain.embeddings import HuggingFaceEmbeddings
|
@@ -25,17 +25,16 @@ import gradio as gr
|
|
25 |
|
26 |
fmp_api_key = os.environ['FMP_API_KEY']
|
27 |
|
|
|
|
|
|
|
28 |
if os.path.exists("chromadb_earnings_transcripts_extracted"):
|
29 |
os.system("rm -r chromadb_earnings_transcripts_extracted")
|
30 |
if os.path.exists("earnings_transcripts_chromadb.zip"):
|
31 |
os.system("rm earnings_transcripts_chromadb.zip")
|
32 |
-
|
33 |
os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
|
34 |
os.system("unzip earnings_transcripts_chromadb.zip -d chromadb_earnings_transcripts_extracted")
|
35 |
|
36 |
-
# initialize the default model for embedding the tokenized texts, the articles are stored in this embedded form in the vector database
|
37 |
-
hf_embeddings = HuggingFaceEmbeddings()
|
38 |
-
|
39 |
chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
|
40 |
|
41 |
# Load the huggingface inference endpoint of an LLM model
|
@@ -45,7 +44,8 @@ model = "mistralai/Mistral-7B-Instruct-v0.1"
|
|
45 |
# This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
|
46 |
hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":300})
|
47 |
|
48 |
-
|
|
|
49 |
"""
|
50 |
Return answer to the query
|
51 |
"""
|
@@ -64,14 +64,16 @@ def source_question_answer(query,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub
|
|
64 |
source_title_3 = input_docs[2].metadata['title']
|
65 |
source_title_4 = input_docs[3].metadata['title']
|
66 |
|
67 |
-
return response,
|
|
|
68 |
|
69 |
with gr.Blocks() as app:
|
|
|
70 |
with gr.Row():
|
71 |
gr.HTML("<h1>Chat with Tesla 2023 Earnings Calls Transcripts</h1>")
|
72 |
|
73 |
with gr.Row():
|
74 |
-
query = gr.Textbox("
|
75 |
btn = gr.Button("Ask Question")
|
76 |
|
77 |
with gr.Row():
|
@@ -98,5 +100,11 @@ with gr.Blocks() as app:
|
|
98 |
with gr.Column():
|
99 |
source_title_4 = gr.Markdown()
|
100 |
source4 = gr.Textbox(label="Source Text 4")
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
app.launch()
|
|
|
1 |
+
# for setting/extracting environment variables such as API keys
|
2 |
import os
|
3 |
+
|
4 |
### 1. For Web Scraping
|
5 |
# for querying Financial Modelling Prep API
|
6 |
from urllib.request import urlopen
|
|
|
8 |
|
9 |
### 2. For Converting Scraped Text Into a Vector Store of Chunked Documents
|
10 |
# for tokenizing texts and splitting them into chunks of documents
|
|
|
11 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
12 |
# for turning documents into embeddings before putting them in vector store
|
13 |
from langchain.embeddings import HuggingFaceEmbeddings
|
|
|
25 |
|
26 |
fmp_api_key = os.environ['FMP_API_KEY']
|
27 |
|
28 |
+
# initialize the default model for embedding the tokenized texts, the articles are stored in this embedded form in the vector database
|
29 |
+
hf_embeddings = HuggingFaceEmbeddings()
|
30 |
+
|
31 |
if os.path.exists("chromadb_earnings_transcripts_extracted"):
|
32 |
os.system("rm -r chromadb_earnings_transcripts_extracted")
|
33 |
if os.path.exists("earnings_transcripts_chromadb.zip"):
|
34 |
os.system("rm earnings_transcripts_chromadb.zip")
|
|
|
35 |
os.system("wget https://github.com/damianboh/test_earnings_calls/raw/main/earnings_transcripts_chromadb.zip")
|
36 |
os.system("unzip earnings_transcripts_chromadb.zip -d chromadb_earnings_transcripts_extracted")
|
37 |
|
|
|
|
|
|
|
38 |
chroma_db = Chroma(persist_directory='chromadb_earnings_transcripts_extracted/chromadb_earnings_transcripts',embedding_function=hf_embeddings)
|
39 |
|
40 |
# Load the huggingface inference endpoint of an LLM model
|
|
|
44 |
# This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
|
45 |
hf_llm = HuggingFaceHub(repo_id=model,model_kwargs={'temperature':0.5,"max_new_tokens":300})
|
46 |
|
47 |
+
|
48 |
+
def source_question_answer(query:str,vectorstore:Chroma=chroma_db,llm:HuggingFaceHub=hf_llm):
|
49 |
"""
|
50 |
Return answer to the query
|
51 |
"""
|
|
|
64 |
source_title_3 = input_docs[2].metadata['title']
|
65 |
source_title_4 = input_docs[3].metadata['title']
|
66 |
|
67 |
+
return response,source_docs_1 ,source_docs_2,source_docs_3,source_docs_4, source_title_1, source_title_2, source_title_3, source_title_4
|
68 |
+
|
69 |
|
70 |
with gr.Blocks() as app:
|
71 |
+
|
72 |
with gr.Row():
|
73 |
gr.HTML("<h1>Chat with Tesla 2023 Earnings Calls Transcripts</h1>")
|
74 |
|
75 |
with gr.Row():
|
76 |
+
query = gr.Textbox("How is Tesla planning to expand?", placeholder="Enter question here...", label="Enter question")
|
77 |
btn = gr.Button("Ask Question")
|
78 |
|
79 |
with gr.Row():
|
|
|
100 |
with gr.Column():
|
101 |
source_title_4 = gr.Markdown()
|
102 |
source4 = gr.Textbox(label="Source Text 4")
|
103 |
+
|
104 |
+
query.submit(fn=source_question_answer, inputs=[query],
|
105 |
+
outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
|
106 |
+
|
107 |
+
btn.click(fn=source_question_answer, inputs=[query],
|
108 |
+
outputs=[answer, source1, source2, source3, source4, source_title_1, source_title_2, source_title_3, source_title_4])
|
109 |
|
110 |
app.launch()
|