KrishGoyani commited on
Commit
b33a445
·
verified ·
1 Parent(s): 7f7c409

Update sec_tools.py

Browse files
Files changed (1) hide show
  1. sec_tools.py +107 -107
sec_tools.py CHANGED
@@ -1,108 +1,108 @@
1
- import os
2
-
3
- import requests
4
-
5
- from langchain.tools import tool
6
- from langchain.text_splitter import CharacterTextSplitter
7
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
8
- from langchain_community.vectorstores import FAISS
9
-
10
- from sec_api import QueryApi
11
- from unstructured.partition.html import partition_html
12
-
13
- class SECTools():
14
- @tool("Search 10-Q form")
15
- def search_10q(data):
16
- """
17
- Useful to search information from the latest 10-Q form for a
18
- given stock.
19
- The input to this tool should be a pipe (|) separated text of
20
- length two, representing the stock ticker you are interested, what
21
- question you have from it.
22
- For example, `AAPL|what was last quarter's revenue`.
23
- """
24
- stock, ask = data.split("|")
25
- queryApi = QueryApi(api_key=os.environ['SEC_API_API_KEY'])
26
- query = {
27
- "query": {
28
- "query_string": {
29
- "query": f"ticker:{stock} AND formType:\"10-Q\""
30
- }
31
- },
32
- "from": "0",
33
- "size": "1",
34
- "sort": [{ "filedAt": { "order": "desc" }}]
35
- }
36
-
37
- fillings = queryApi.get_filings(query)['filings']
38
- link = fillings[0]['linkToFilingDetails']
39
- answer = SECTools.__embedding_search(link, ask)
40
- return answer
41
-
42
- @tool("Search 10-K form")
43
- def search_10k(data):
44
- """
45
- Useful to search information from the latest 10-K form for a
46
- given stock.
47
- The input to this tool should be a pipe (|) separated text of
48
- length two, representing the stock ticker you are interested, what
49
- question you have from it.
50
- For example, `AAPL|what was last year's revenue`.
51
- """
52
- stock, ask = data.split("|")
53
- queryApi = QueryApi(api_key=os.environ['SEC_API_API_KEY'])
54
- query = {
55
- "query": {
56
- "query_string": {
57
- "query": f"ticker:{stock} AND formType:\"10-K\""
58
- }
59
- },
60
- "from": "0",
61
- "size": "1",
62
- "sort": [{ "filedAt": { "order": "desc" }}]
63
- }
64
-
65
- fillings = queryApi.get_filings(query)['filings']
66
- link = fillings[0]['linkToFilingDetails']
67
- answer = SECTools.__embedding_search(link, ask)
68
- return answer
69
-
70
- def __embedding_search(url, ask):
71
- text = SECTools.__download_form_html(url)
72
- elements = partition_html(text=text)
73
- content = "\n".join([str(el) for el in elements])
74
- text_splitter = CharacterTextSplitter(
75
- separator = "\n",
76
- chunk_size = 1000,
77
- chunk_overlap = 150,
78
- length_function = len,
79
- is_separator_regex = False,
80
- )
81
- docs = text_splitter.create_documents([content])
82
- retriever = FAISS.from_documents(
83
- docs, GoogleGenerativeAIEmbeddings(model="models/embedding-001", api_key = os.getenv("GOOGLE_API_KEY"))
84
- ).as_retriever()
85
- answers = retriever.get_relevant_documents(ask, top_k=4)
86
- answers = "\n\n".join([a.page_content for a in answers])
87
- return answers
88
-
89
- def __download_form_html(url):
90
- headers = {
91
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
92
- 'Accept-Encoding': 'gzip, deflate, br',
93
- 'Accept-Language': 'en-US,en;q=0.9,pt-BR;q=0.8,pt;q=0.7',
94
- 'Cache-Control': 'max-age=0',
95
- 'Dnt': '1',
96
- 'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120"',
97
- 'Sec-Ch-Ua-Mobile': '?0',
98
- 'Sec-Ch-Ua-Platform': '"macOS"',
99
- 'Sec-Fetch-Dest': 'document',
100
- 'Sec-Fetch-Mode': 'navigate',
101
- 'Sec-Fetch-Site': 'none',
102
- 'Sec-Fetch-User': '?1',
103
- 'Upgrade-Insecure-Requests': '1',
104
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
105
- }
106
-
107
- response = requests.get(url, headers=headers)
108
  return response.text
 
1
+ import os
2
+
3
+ import requests
4
+
5
+ from langchain.tools import tool
6
+ from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
8
+ from langchain_community.vectorstores import FAISS
9
+
10
+ from sec_api import QueryApi
11
+ from unstructured.partition.html import partition_html
12
+
13
+ class SECTools():
14
+ @tool("Search 10-Q form")
15
+ def search_10q(data):
16
+ """
17
+ Useful to search information from the latest 10-Q form for a
18
+ given stock.
19
+ The input to this tool should be a pipe (|) separated text of
20
+ length two, representing the stock ticker you are interested, what
21
+ question you have from it.
22
+ For example, `AAPL|what was last quarter's revenue`.
23
+ """
24
+ stock, ask = data.split("|")
25
+ queryApi = QueryApi(api_key=os.environ['SEC_API_KEY'])
26
+ query = {
27
+ "query": {
28
+ "query_string": {
29
+ "query": f"ticker:{stock} AND formType:\"10-Q\""
30
+ }
31
+ },
32
+ "from": "0",
33
+ "size": "1",
34
+ "sort": [{ "filedAt": { "order": "desc" }}]
35
+ }
36
+
37
+ fillings = queryApi.get_filings(query)['filings']
38
+ link = fillings[0]['linkToFilingDetails']
39
+ answer = SECTools.__embedding_search(link, ask)
40
+ return answer
41
+
42
+ @tool("Search 10-K form")
43
+ def search_10k(data):
44
+ """
45
+ Useful to search information from the latest 10-K form for a
46
+ given stock.
47
+ The input to this tool should be a pipe (|) separated text of
48
+ length two, representing the stock ticker you are interested, what
49
+ question you have from it.
50
+ For example, `AAPL|what was last year's revenue`.
51
+ """
52
+ stock, ask = data.split("|")
53
+ queryApi = QueryApi(api_key=os.environ['SEC_API_API_KEY'])
54
+ query = {
55
+ "query": {
56
+ "query_string": {
57
+ "query": f"ticker:{stock} AND formType:\"10-K\""
58
+ }
59
+ },
60
+ "from": "0",
61
+ "size": "1",
62
+ "sort": [{ "filedAt": { "order": "desc" }}]
63
+ }
64
+
65
+ fillings = queryApi.get_filings(query)['filings']
66
+ link = fillings[0]['linkToFilingDetails']
67
+ answer = SECTools.__embedding_search(link, ask)
68
+ return answer
69
+
70
+ def __embedding_search(url, ask):
71
+ text = SECTools.__download_form_html(url)
72
+ elements = partition_html(text=text)
73
+ content = "\n".join([str(el) for el in elements])
74
+ text_splitter = CharacterTextSplitter(
75
+ separator = "\n",
76
+ chunk_size = 1000,
77
+ chunk_overlap = 150,
78
+ length_function = len,
79
+ is_separator_regex = False,
80
+ )
81
+ docs = text_splitter.create_documents([content])
82
+ retriever = FAISS.from_documents(
83
+ docs, GoogleGenerativeAIEmbeddings(model="models/embedding-001", api_key = os.getenv("GOOGLE_API_KEY"))
84
+ ).as_retriever()
85
+ answers = retriever.get_relevant_documents(ask, top_k=4)
86
+ answers = "\n\n".join([a.page_content for a in answers])
87
+ return answers
88
+
89
+ def __download_form_html(url):
90
+ headers = {
91
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
92
+ 'Accept-Encoding': 'gzip, deflate, br',
93
+ 'Accept-Language': 'en-US,en;q=0.9,pt-BR;q=0.8,pt;q=0.7',
94
+ 'Cache-Control': 'max-age=0',
95
+ 'Dnt': '1',
96
+ 'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120"',
97
+ 'Sec-Ch-Ua-Mobile': '?0',
98
+ 'Sec-Ch-Ua-Platform': '"macOS"',
99
+ 'Sec-Fetch-Dest': 'document',
100
+ 'Sec-Fetch-Mode': 'navigate',
101
+ 'Sec-Fetch-Site': 'none',
102
+ 'Sec-Fetch-User': '?1',
103
+ 'Upgrade-Insecure-Requests': '1',
104
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
105
+ }
106
+
107
+ response = requests.get(url, headers=headers)
108
  return response.text