Upload 2 files
Browse files- app.py +176 -0
- interaction_log.jsonl +8 -0
app.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import gradio as gr
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
import chromadb
|
5 |
+
import pandas as pd
|
6 |
+
import os
|
7 |
+
import json
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
# Load the sentence transformer model
|
11 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
12 |
+
|
13 |
+
# Initialize the ChromaDB client
|
14 |
+
chroma_client = chromadb.Client()
|
15 |
+
|
16 |
+
# Function to build the database from CSV
|
17 |
+
def build_database():
|
18 |
+
# Read the CSV file
|
19 |
+
df = pd.read_csv('collection_data.csv')
|
20 |
+
|
21 |
+
# Create a collection
|
22 |
+
collection_name = 'Dataset-10k-companies'
|
23 |
+
|
24 |
+
# Delete the existing collection if it exists
|
25 |
+
chroma_client.delete_collection(name=collection_name)
|
26 |
+
|
27 |
+
# Create a new collection
|
28 |
+
collection = chroma_client.create_collection(name=collection_name)
|
29 |
+
|
30 |
+
# Add the data from the DataFrame to the collection
|
31 |
+
collection.add(
|
32 |
+
documents=df['documents'].tolist(),
|
33 |
+
ids=df['ids'].tolist(),
|
34 |
+
metadatas=df['metadatas'].apply(eval).tolist(),
|
35 |
+
embeddings=df['embeddings'].apply(lambda x: eval(x.replace(',,', ','))).tolist()
|
36 |
+
)
|
37 |
+
|
38 |
+
return collection
|
39 |
+
|
40 |
+
# Build the database when the app starts
|
41 |
+
collection = build_database()
|
42 |
+
|
43 |
+
# Instantiate the Anyscale client
|
44 |
+
client = Anyscale(api_key=anyscale_api_key, model="meta-llama/Llama-2-70b-chat-hf")
|
45 |
+
|
46 |
+
# Function to get relevant chunks
|
47 |
+
def get_relevant_chunks(query, collection, top_n=3):
|
48 |
+
query_embedding = model.encode(query).tolist()
|
49 |
+
results = collection.query(query_embeddings=[query_embedding], n_results=top_n)
|
50 |
+
|
51 |
+
relevant_chunks = []
|
52 |
+
for i in range(len(results['documents'][0])):
|
53 |
+
chunk = results['documents'][0][i]
|
54 |
+
source = results['metadatas'][0][i]['source']
|
55 |
+
page = results['metadatas'][0][i]['page']
|
56 |
+
relevant_chunks.append((chunk, source, page))
|
57 |
+
|
58 |
+
return relevant_chunks
|
59 |
+
|
60 |
+
# Define system message for LLM
|
61 |
+
qna_system_message = """
|
62 |
+
You are an assistant to Finsights analysts. Your task is to provide relevant information about the financial performance of the companies followed by Finsights.
|
63 |
+
|
64 |
+
User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context.
|
65 |
+
The context contains references to specific portions of documents relevant to the user's query, along with source links.
|
66 |
+
The source for a context will begin with the token: ###Source.
|
67 |
+
|
68 |
+
When crafting your response:
|
69 |
+
1. Select only the context relevant to answer the question.
|
70 |
+
2. Include the source links in your response.
|
71 |
+
3. User questions will begin with the token: ###Question.
|
72 |
+
4. If the question is irrelevant to Finsights, respond with: "I am an assistant for Finsight Docs. I can only help you with questions related to Finsights."
|
73 |
+
|
74 |
+
Adhere to the following guidelines:
|
75 |
+
- Your response should only address the question asked and nothing else.
|
76 |
+
- Answer only using the context provided.
|
77 |
+
- Do not mention anything about the context in your final answer.
|
78 |
+
- If the answer is not found in the context, respond with: "I don't know."
|
79 |
+
- Always quote the source when you use the context. Cite the relevant source at the end of your response under the section - Source:
|
80 |
+
- Do not make up sources. Use only the links provided in the sources section of the context. You are prohibited from providing other links/sources.
|
81 |
+
|
82 |
+
Here is an example of how to structure your response:
|
83 |
+
|
84 |
+
Answer:
|
85 |
+
[Answer]
|
86 |
+
|
87 |
+
Source:
|
88 |
+
[Source]
|
89 |
+
"""
|
90 |
+
# Create a user message template
|
91 |
+
qna_user_message_template = """
|
92 |
+
###Context
|
93 |
+
Here are some documents and their source links that are relevant to the question mentioned below.
|
94 |
+
{context}
|
95 |
+
|
96 |
+
###Question
|
97 |
+
{question}
|
98 |
+
"""
|
99 |
+
|
100 |
+
# Function to get LLM response
|
101 |
+
def get_llm_response(prompt, max_attempts=3):
|
102 |
+
full_response = ""
|
103 |
+
for attempt in range(max_attempts):
|
104 |
+
try:
|
105 |
+
response = client.complete(prompt, max_tokens=1000) # Increase max_tokens if possible
|
106 |
+
chunk = response.text.strip()
|
107 |
+
full_response += chunk
|
108 |
+
if chunk.endswith((".", "!", "?")): # Check if response seems complete
|
109 |
+
break
|
110 |
+
else:
|
111 |
+
prompt = "Please continue from where you left off:\n" + chunk[-100:] # Use the last 100 chars as context
|
112 |
+
except Exception as e:
|
113 |
+
print(f"Attempt {attempt + 1} failed with error: {e}")
|
114 |
+
return full_response
|
115 |
+
|
116 |
+
# Prediction function
|
117 |
+
def predict(company, user_query):
|
118 |
+
try:
|
119 |
+
# Modify the query to include the company name
|
120 |
+
modified_query = f"{user_query} for {company}"
|
121 |
+
|
122 |
+
# Get relevant chunks
|
123 |
+
relevant_chunks = get_relevant_chunks(modified_query, collection)
|
124 |
+
|
125 |
+
# Prepare the context string
|
126 |
+
context = ""
|
127 |
+
for chunk, source, page in relevant_chunks:
|
128 |
+
context += chunk + "\n"
|
129 |
+
context += f"###Source {source}, Page {page}\n"
|
130 |
+
|
131 |
+
# Prepare the user message
|
132 |
+
user_message = qna_user_message_template.format(context=context, question=user_query)
|
133 |
+
|
134 |
+
# Craft the prompt to pass to the Llama model
|
135 |
+
prompt = f"{qna_system_message}\n\n{qna_user_message_template.format(context=context, question=user_query)}"
|
136 |
+
|
137 |
+
# Generate the response using the Llama model through Anyscale
|
138 |
+
answer = get_llm_response(prompt)
|
139 |
+
|
140 |
+
# Extract the generated response
|
141 |
+
# answer = response.text.strip()
|
142 |
+
|
143 |
+
# Log the interaction
|
144 |
+
log_interaction(company, user_query, context, answer)
|
145 |
+
|
146 |
+
return answer
|
147 |
+
except Exception as e:
|
148 |
+
return f"An error occurred: {str(e)}"
|
149 |
+
|
150 |
+
# Function to log interactions
|
151 |
+
def log_interaction(company, user_query, context, answer):
|
152 |
+
log_file = Path("interaction_log.jsonl")
|
153 |
+
with log_file.open("a") as f:
|
154 |
+
json.dump({
|
155 |
+
'company': company,
|
156 |
+
'user_query': user_query,
|
157 |
+
'context': context,
|
158 |
+
'answer': answer
|
159 |
+
}, f)
|
160 |
+
f.write("\n")
|
161 |
+
|
162 |
+
# Create Gradio interface
|
163 |
+
company_list = ["MSFT", "AWS", "Meta", "Google", "IBM"]
|
164 |
+
iface = gr.Interface(
|
165 |
+
fn=predict,
|
166 |
+
inputs=[
|
167 |
+
gr.Radio(company_list, label="Select Company"),
|
168 |
+
gr.Textbox(lines=2, placeholder="Enter your query here...", label="User Query")
|
169 |
+
],
|
170 |
+
outputs=gr.Textbox(label="Generated Answer"),
|
171 |
+
title="Company Reports Q&A",
|
172 |
+
description="Query the vector database and get an LLM response based on the documents in the collection."
|
173 |
+
)
|
174 |
+
|
175 |
+
# Launch the interface
|
176 |
+
iface.launch()
|
interaction_log.jsonl
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"company": "MSFT", "user_query": "What is the company revenue?", "context": "headcount -related expenses associated with sales and marketing personnel, and the costs of advertising, promotions, trade \nshows, seminars, and othe r programs. \nFiscal Year 2023 Compared with Fiscal Year 2022 \nSales and marketing expenses increased $934 million or 4% driven by 3 points of growth from the Nuance and Xandr \nacquisitions and investments in commercial sales, offset in part by a decline in Windows advertising. Sales and marketing \nincluded a favorable foreign currency impact of 2%. \nGeneral and Administrative \n \n(In millions, except percentages) 2023 2022 Percentage \nChange \n \nGeneral and administrative $ 7,575\u2004 $ 5,900\u2004 28% \nAs a percent of revenue 4%\u2009 3%\u2009 1ppt \nGeneral and administrative expenses include payroll, employee benefits, stock -based compensation expense, employee \nseverance expense incurred as part of a corporate program, and other headcount -related expenses associated with finance, \nlegal, facilities, c ertain human resources and other administrative personnel, certain taxes, and legal and other \nadministrative fees.\n###Source /content/dataset/msft-10-k-2023.pdf, Page 33\n75 Revenue and costs are generally directly attributed to our segments. However, due to the integrated structure of our \nbusiness, certain revenue recognized and costs incurred by one segment may benefit other segments. Revenue from \ncertain contracts is allocated amon g the segments based on the relative value of the underlying products and services, \nwhich can include allocation based on actual prices charged, prices when sold separately, or estimated costs plus a profit \nmargin. Cost of revenue is allocated in certain c ases based on a relative revenue methodology. Operating expenses that \nare allocated primarily include those relating to marketing of products and services from which multiple segments benefit \nand are generally allocated based on relative gross margin. \nIn addition, certain costs are incurred at a corporate level and allocated to our segments. These allocated costs generally \ninclude legal, including settlements and fines, information technology, human resources, finance, excise taxes, field selling , \nshared f acilities services, customer service and support, and severance incurred as part of a corporate program. Each \nallocation is measured differently based on the specific facts and circumstances of the costs being allocated and is generall y \nbased on relative g ross margin or relative headcount. \nSegment revenue and operating income were as follows during the periods presented: \n \n(In millions) \n \nYear Ended June 30, 2023 2022 2021 \n \nRevenue \n \nProductivity and Business Processes $ 69,274 $ 63,364 $ 53,915 \nIntelligent Cloud 87,907 74,965 59,728 \nMore Personal Computing 54,734 59,941 54,445 \nTotal $ 211,915 $ 198,270 $ 168,088 \n \nOperating Income \n \nProductivity and Business Processes $ 34,189 $ 29,690 $ 24,351 \nIntelligent Cloud 37,884 33,203 26,471 \nMore Personal Computing 16,450 20,490 19,094 \nTotal $ 88,523 $ 83,383 $ 69,916\n###Source /content/dataset/msft-10-k-2023.pdf, Page 75\n32 \u2022 Office Consumer products and cloud services revenue increased $140 million or 2%. Microsoft 365 Consumer \nsubscribers grew 12% to 67.0 million. \n\u2022 LinkedIn revenue increased $1.3 billion or 10% driven by Talent Solutions. \n\u2022 Dynamics products and cloud serv ices revenue increased $750 million or 16% driven by Dynamics 365 growth \nof 24%. \nOperating income increased $4.5 billion or 15%. \n\u2022 Gross margin increased $5.8 billion or 12% driven by growth in Office 365 Commercial and LinkedIn, as well \nas the change in accounting estimate. Gross margin percentage increased. Excluding the impact of the change \nin accounting estimate, gross margin percentage increased slightly driven by improvement in Office 365 \nCommercial, offset in part by sales mix shift to cloud offeri ngs. \n \n\u2022 Operating expenses increased $1.3 billion or 7% driven by investment in LinkedIn and employee severance \nexpenses. \nRevenue, gross margin, and operating income included an unfavorable foreign currency impact of 5%, 5%, and 8%, \nrespectively. \nIntelligent Cloud \nRevenue increased $12.9 billion or 17%. \n\u2022 Server products and cloud services revenue increased $12.6 billion or 19% driven by Azure and other cloud \nservices. Azure and other cloud services revenue grew 29% driven by growth in our consumption -based \nservices. Server products revenue decreased 1%. \n\u2022 Enterprise Services revenue increased $315 million or 4% driven by growth in Enterprise Support Services, \noffset in part by a decline in Industry Solutions (formerly Microsoft Consulting Services). \nOperating income increased $4.7 billion or 14%. \n\u2022 Gross margin increased $8.9 billion or 17% driven by growth in Azure and other cloud services and the change \nin accounting estimate. Gross margin percentage decreased slightly. Excluding the impact of the change in \naccounting estimate, gross margin percentage decreased 3 points driven by sales mix shift to Azure and other \ncloud services and a decline in Azure and other cloud services. \n\u2022 Operating expenses increased $4.2 billion or 21% driven by investment s in Azure, 4 points of growth from the\n###Source /content/dataset/msft-10-k-2023.pdf, Page 32\n", "answer": ""}
|
2 |
+
{"company": "MSFT", "user_query": "What is the company revenue?", "context": "headcount -related expenses associated with sales and marketing personnel, and the costs of advertising, promotions, trade \nshows, seminars, and othe r programs. \nFiscal Year 2023 Compared with Fiscal Year 2022 \nSales and marketing expenses increased $934 million or 4% driven by 3 points of growth from the Nuance and Xandr \nacquisitions and investments in commercial sales, offset in part by a decline in Windows advertising. Sales and marketing \nincluded a favorable foreign currency impact of 2%. \nGeneral and Administrative \n \n(In millions, except percentages) 2023 2022 Percentage \nChange \n \nGeneral and administrative $ 7,575\u2004 $ 5,900\u2004 28% \nAs a percent of revenue 4%\u2009 3%\u2009 1ppt \nGeneral and administrative expenses include payroll, employee benefits, stock -based compensation expense, employee \nseverance expense incurred as part of a corporate program, and other headcount -related expenses associated with finance, \nlegal, facilities, c ertain human resources and other administrative personnel, certain taxes, and legal and other \nadministrative fees.\n###Source /content/dataset/msft-10-k-2023.pdf, Page 33\n75 Revenue and costs are generally directly attributed to our segments. However, due to the integrated structure of our \nbusiness, certain revenue recognized and costs incurred by one segment may benefit other segments. Revenue from \ncertain contracts is allocated amon g the segments based on the relative value of the underlying products and services, \nwhich can include allocation based on actual prices charged, prices when sold separately, or estimated costs plus a profit \nmargin. Cost of revenue is allocated in certain c ases based on a relative revenue methodology. Operating expenses that \nare allocated primarily include those relating to marketing of products and services from which multiple segments benefit \nand are generally allocated based on relative gross margin. \nIn addition, certain costs are incurred at a corporate level and allocated to our segments. These allocated costs generally \ninclude legal, including settlements and fines, information technology, human resources, finance, excise taxes, field selling , \nshared f acilities services, customer service and support, and severance incurred as part of a corporate program. Each \nallocation is measured differently based on the specific facts and circumstances of the costs being allocated and is generall y \nbased on relative g ross margin or relative headcount. \nSegment revenue and operating income were as follows during the periods presented: \n \n(In millions) \n \nYear Ended June 30, 2023 2022 2021 \n \nRevenue \n \nProductivity and Business Processes $ 69,274 $ 63,364 $ 53,915 \nIntelligent Cloud 87,907 74,965 59,728 \nMore Personal Computing 54,734 59,941 54,445 \nTotal $ 211,915 $ 198,270 $ 168,088 \n \nOperating Income \n \nProductivity and Business Processes $ 34,189 $ 29,690 $ 24,351 \nIntelligent Cloud 37,884 33,203 26,471 \nMore Personal Computing 16,450 20,490 19,094 \nTotal $ 88,523 $ 83,383 $ 69,916\n###Source /content/dataset/msft-10-k-2023.pdf, Page 75\n32 \u2022 Office Consumer products and cloud services revenue increased $140 million or 2%. Microsoft 365 Consumer \nsubscribers grew 12% to 67.0 million. \n\u2022 LinkedIn revenue increased $1.3 billion or 10% driven by Talent Solutions. \n\u2022 Dynamics products and cloud serv ices revenue increased $750 million or 16% driven by Dynamics 365 growth \nof 24%. \nOperating income increased $4.5 billion or 15%. \n\u2022 Gross margin increased $5.8 billion or 12% driven by growth in Office 365 Commercial and LinkedIn, as well \nas the change in accounting estimate. Gross margin percentage increased. Excluding the impact of the change \nin accounting estimate, gross margin percentage increased slightly driven by improvement in Office 365 \nCommercial, offset in part by sales mix shift to cloud offeri ngs. \n \n\u2022 Operating expenses increased $1.3 billion or 7% driven by investment in LinkedIn and employee severance \nexpenses. \nRevenue, gross margin, and operating income included an unfavorable foreign currency impact of 5%, 5%, and 8%, \nrespectively. \nIntelligent Cloud \nRevenue increased $12.9 billion or 17%. \n\u2022 Server products and cloud services revenue increased $12.6 billion or 19% driven by Azure and other cloud \nservices. Azure and other cloud services revenue grew 29% driven by growth in our consumption -based \nservices. Server products revenue decreased 1%. \n\u2022 Enterprise Services revenue increased $315 million or 4% driven by growth in Enterprise Support Services, \noffset in part by a decline in Industry Solutions (formerly Microsoft Consulting Services). \nOperating income increased $4.7 billion or 14%. \n\u2022 Gross margin increased $8.9 billion or 17% driven by growth in Azure and other cloud services and the change \nin accounting estimate. Gross margin percentage decreased slightly. Excluding the impact of the change in \naccounting estimate, gross margin percentage decreased 3 points driven by sales mix shift to Azure and other \ncloud services and a decline in Azure and other cloud services. \n\u2022 Operating expenses increased $4.2 billion or 21% driven by investment s in Azure, 4 points of growth from the\n###Source /content/dataset/msft-10-k-2023.pdf, Page 32\n", "answer": ""}
|
3 |
+
{"company": "MSFT", "user_query": "What is the company revenue?", "context": "headcount -related expenses associated with sales and marketing personnel, and the costs of advertising, promotions, trade \nshows, seminars, and othe r programs. \nFiscal Year 2023 Compared with Fiscal Year 2022 \nSales and marketing expenses increased $934 million or 4% driven by 3 points of growth from the Nuance and Xandr \nacquisitions and investments in commercial sales, offset in part by a decline in Windows advertising. Sales and marketing \nincluded a favorable foreign currency impact of 2%. \nGeneral and Administrative \n \n(In millions, except percentages) 2023 2022 Percentage \nChange \n \nGeneral and administrative $ 7,575\u2004 $ 5,900\u2004 28% \nAs a percent of revenue 4%\u2009 3%\u2009 1ppt \nGeneral and administrative expenses include payroll, employee benefits, stock -based compensation expense, employee \nseverance expense incurred as part of a corporate program, and other headcount -related expenses associated with finance, \nlegal, facilities, c ertain human resources and other administrative personnel, certain taxes, and legal and other \nadministrative fees.\n###Source /content/dataset/msft-10-k-2023.pdf, Page 33\n75 Revenue and costs are generally directly attributed to our segments. However, due to the integrated structure of our \nbusiness, certain revenue recognized and costs incurred by one segment may benefit other segments. Revenue from \ncertain contracts is allocated amon g the segments based on the relative value of the underlying products and services, \nwhich can include allocation based on actual prices charged, prices when sold separately, or estimated costs plus a profit \nmargin. Cost of revenue is allocated in certain c ases based on a relative revenue methodology. Operating expenses that \nare allocated primarily include those relating to marketing of products and services from which multiple segments benefit \nand are generally allocated based on relative gross margin. \nIn addition, certain costs are incurred at a corporate level and allocated to our segments. These allocated costs generally \ninclude legal, including settlements and fines, information technology, human resources, finance, excise taxes, field selling , \nshared f acilities services, customer service and support, and severance incurred as part of a corporate program. Each \nallocation is measured differently based on the specific facts and circumstances of the costs being allocated and is generall y \nbased on relative g ross margin or relative headcount. \nSegment revenue and operating income were as follows during the periods presented: \n \n(In millions) \n \nYear Ended June 30, 2023 2022 2021 \n \nRevenue \n \nProductivity and Business Processes $ 69,274 $ 63,364 $ 53,915 \nIntelligent Cloud 87,907 74,965 59,728 \nMore Personal Computing 54,734 59,941 54,445 \nTotal $ 211,915 $ 198,270 $ 168,088 \n \nOperating Income \n \nProductivity and Business Processes $ 34,189 $ 29,690 $ 24,351 \nIntelligent Cloud 37,884 33,203 26,471 \nMore Personal Computing 16,450 20,490 19,094 \nTotal $ 88,523 $ 83,383 $ 69,916\n###Source /content/dataset/msft-10-k-2023.pdf, Page 75\n32 \u2022 Office Consumer products and cloud services revenue increased $140 million or 2%. Microsoft 365 Consumer \nsubscribers grew 12% to 67.0 million. \n\u2022 LinkedIn revenue increased $1.3 billion or 10% driven by Talent Solutions. \n\u2022 Dynamics products and cloud serv ices revenue increased $750 million or 16% driven by Dynamics 365 growth \nof 24%. \nOperating income increased $4.5 billion or 15%. \n\u2022 Gross margin increased $5.8 billion or 12% driven by growth in Office 365 Commercial and LinkedIn, as well \nas the change in accounting estimate. Gross margin percentage increased. Excluding the impact of the change \nin accounting estimate, gross margin percentage increased slightly driven by improvement in Office 365 \nCommercial, offset in part by sales mix shift to cloud offeri ngs. \n \n\u2022 Operating expenses increased $1.3 billion or 7% driven by investment in LinkedIn and employee severance \nexpenses. \nRevenue, gross margin, and operating income included an unfavorable foreign currency impact of 5%, 5%, and 8%, \nrespectively. \nIntelligent Cloud \nRevenue increased $12.9 billion or 17%. \n\u2022 Server products and cloud services revenue increased $12.6 billion or 19% driven by Azure and other cloud \nservices. Azure and other cloud services revenue grew 29% driven by growth in our consumption -based \nservices. Server products revenue decreased 1%. \n\u2022 Enterprise Services revenue increased $315 million or 4% driven by growth in Enterprise Support Services, \noffset in part by a decline in Industry Solutions (formerly Microsoft Consulting Services). \nOperating income increased $4.7 billion or 14%. \n\u2022 Gross margin increased $8.9 billion or 17% driven by growth in Azure and other cloud services and the change \nin accounting estimate. Gross margin percentage decreased slightly. Excluding the impact of the change in \naccounting estimate, gross margin percentage decreased 3 points driven by sales mix shift to Azure and other \ncloud services and a decline in Azure and other cloud services. \n\u2022 Operating expenses increased $4.2 billion or 21% driven by investment s in Azure, 4 points of growth from the\n###Source /content/dataset/msft-10-k-2023.pdf, Page 32\n", "answer": ""}
|
4 |
+
{"company": "AWS", "user_query": "What is the company revenue?", "context": "AWS\nNet sales $ 45,370 $ 62,202 $ 80,096 \nOperating expenses 31,839 43,670 57,255 \nOperating income $ 13,531 $ 18,532 $ 22,841 \nConsolidated\nNet sales $ 386,064 $ 469,822 $ 513,983 \nOperating expenses 363,165 444,943 501,735 \nOperating income 22,899 24,879 12,248 \nTotal non-operating income (expense) 1,279 13,272 (18,184)\nBenefit (provision) for income taxes (2,863) (4,791) 3,217 \nEquity-method investment activity, net of tax 16 4 (3)\nNet income (loss) $ 21,331 $ 33,364 $ (2,722)\n66\n###Source /content/dataset/aws-10-k-2023.pdf, Page 65\n2021 was $14.0 billion, of which $11.3 billion was recognized as revenue during the year ended December 31, 2022 and our total unearned revenue as of\nDecember 31, 2022 was $16.1 billion. Included in \u201cOther long-term liabilities\u201d on our consolidated balance sheets was $2.2 billion and $2.9 billion of unearned\nrevenue as of December 31, 2021 and 2022.\nAdditionally, we have performance obligations, primarily related to AWS, associated with commitments in customer contracts for future services that\nhave not yet been recognized in our financial statements. For contracts with original terms that exceed one year, those commitments not yet recognized were\n$110.4 billion as of December 31, 2022. The weighted average remaining life of our long-term contracts is 3.7 years. However, the amount and timing of\nrevenue recognition is largely driven by customer usage, which can extend beyond the original contractual term.\n49\n###Source /content/dataset/aws-10-k-2023.pdf, Page 48\nTable of Contents\nTechnology and Content\nTechnology and content costs include payroll and related expenses for employees involved in the research and development of new and existing products\nand services, development, design, and maintenance of our stores, curation and display of products and services made available in our online stores, and\ninfrastructure costs. Infrastructure costs include servers, networking equipment, and data center related depreciation and amortization, rent, utilities, and other\nexpenses necessary to support AWS and other Amazon businesses. Collectively, these costs reflect the investments we make in order to offer a wide variety of\nproducts and services to our customers, including expenditures related to initiatives to build and deploy innovative and efficient software and electronic devices\nand the development of a satellite network for global broadband service and autonomous vehicles for ride-hailing services. Technology and content costs are\ngenerally expensed as incurred.\nSales and Marketing\nSales and marketing costs include advertising and payroll and related expenses for personnel engaged in marketing and selling activities, including sales\ncommissions related to AWS. We pay commissions to third parties when their customer referrals result in sales. We also participate in cooperative advertising\narrangements with certain of our vendors, and other third parties.\nAdvertising and other promotional costs to market our products and services are expensed as incurred and were $10.9 billion, $16.9 billion, and $20.6\nbillion in 2020, 2021, and 2022.\nGeneral and Administrative\nGeneral and administrative expenses primarily consist of costs for corporate functions, including payroll and related expenses; facilities and equipment\nexpenses, such as depreciation and amortization expense and rent; and professional fees.\nStock-Based Compensation\nCompensation cost for all equity-classified stock awards expected to vest is measured at fair value on the date of grant and recognized over the service\nperiod. The fair value of restricted stock units is determined based on the number of shares granted and the quoted price of our common stock. Such value is\nrecognized as expense over the service period, net of estimated forfeitures, using the accelerated method. The estimated number of stock awards that will\nultimately vest requires judgment, and to the extent actual results or updated estimates differ from our current estimates, such amounts will be recorded as a\ncumulative adjustment in the period estimates are revised. We consider many factors when estimating expected forfeitures, including historical forfeiture\nexperience by grant year and employee level. Additionally, stock-based compensation includes stock appreciation rights that are expected to settle in cash.\n###Source /content/dataset/aws-10-k-2023.pdf, Page 43\n", "answer": ""}
|
5 |
+
{"company": "MSFT", "user_query": "what is the company revenue?", "context": "headcount -related expenses associated with sales and marketing personnel, and the costs of advertising, promotions, trade \nshows, seminars, and othe r programs. \nFiscal Year 2023 Compared with Fiscal Year 2022 \nSales and marketing expenses increased $934 million or 4% driven by 3 points of growth from the Nuance and Xandr \nacquisitions and investments in commercial sales, offset in part by a decline in Windows advertising. Sales and marketing \nincluded a favorable foreign currency impact of 2%. \nGeneral and Administrative \n \n(In millions, except percentages) 2023 2022 Percentage \nChange \n \nGeneral and administrative $ 7,575\u2004 $ 5,900\u2004 28% \nAs a percent of revenue 4%\u2009 3%\u2009 1ppt \nGeneral and administrative expenses include payroll, employee benefits, stock -based compensation expense, employee \nseverance expense incurred as part of a corporate program, and other headcount -related expenses associated with finance, \nlegal, facilities, c ertain human resources and other administrative personnel, certain taxes, and legal and other \nadministrative fees.\n###Source /content/dataset/msft-10-k-2023.pdf, Page 33\n75 Revenue and costs are generally directly attributed to our segments. However, due to the integrated structure of our \nbusiness, certain revenue recognized and costs incurred by one segment may benefit other segments. Revenue from \ncertain contracts is allocated amon g the segments based on the relative value of the underlying products and services, \nwhich can include allocation based on actual prices charged, prices when sold separately, or estimated costs plus a profit \nmargin. Cost of revenue is allocated in certain c ases based on a relative revenue methodology. Operating expenses that \nare allocated primarily include those relating to marketing of products and services from which multiple segments benefit \nand are generally allocated based on relative gross margin. \nIn addition, certain costs are incurred at a corporate level and allocated to our segments. These allocated costs generally \ninclude legal, including settlements and fines, information technology, human resources, finance, excise taxes, field selling , \nshared f acilities services, customer service and support, and severance incurred as part of a corporate program. Each \nallocation is measured differently based on the specific facts and circumstances of the costs being allocated and is generall y \nbased on relative g ross margin or relative headcount. \nSegment revenue and operating income were as follows during the periods presented: \n \n(In millions) \n \nYear Ended June 30, 2023 2022 2021 \n \nRevenue \n \nProductivity and Business Processes $ 69,274 $ 63,364 $ 53,915 \nIntelligent Cloud 87,907 74,965 59,728 \nMore Personal Computing 54,734 59,941 54,445 \nTotal $ 211,915 $ 198,270 $ 168,088 \n \nOperating Income \n \nProductivity and Business Processes $ 34,189 $ 29,690 $ 24,351 \nIntelligent Cloud 37,884 33,203 26,471 \nMore Personal Computing 16,450 20,490 19,094 \nTotal $ 88,523 $ 83,383 $ 69,916\n###Source /content/dataset/msft-10-k-2023.pdf, Page 75\n32 \u2022 Office Consumer products and cloud services revenue increased $140 million or 2%. Microsoft 365 Consumer \nsubscribers grew 12% to 67.0 million. \n\u2022 LinkedIn revenue increased $1.3 billion or 10% driven by Talent Solutions. \n\u2022 Dynamics products and cloud serv ices revenue increased $750 million or 16% driven by Dynamics 365 growth \nof 24%. \nOperating income increased $4.5 billion or 15%. \n\u2022 Gross margin increased $5.8 billion or 12% driven by growth in Office 365 Commercial and LinkedIn, as well \nas the change in accounting estimate. Gross margin percentage increased. Excluding the impact of the change \nin accounting estimate, gross margin percentage increased slightly driven by improvement in Office 365 \nCommercial, offset in part by sales mix shift to cloud offeri ngs. \n \n\u2022 Operating expenses increased $1.3 billion or 7% driven by investment in LinkedIn and employee severance \nexpenses. \nRevenue, gross margin, and operating income included an unfavorable foreign currency impact of 5%, 5%, and 8%, \nrespectively. \nIntelligent Cloud \nRevenue increased $12.9 billion or 17%. \n\u2022 Server products and cloud services revenue increased $12.6 billion or 19% driven by Azure and other cloud \nservices. Azure and other cloud services revenue grew 29% driven by growth in our consumption -based \nservices. Server products revenue decreased 1%. \n\u2022 Enterprise Services revenue increased $315 million or 4% driven by growth in Enterprise Support Services, \noffset in part by a decline in Industry Solutions (formerly Microsoft Consulting Services). \nOperating income increased $4.7 billion or 14%. \n\u2022 Gross margin increased $8.9 billion or 17% driven by growth in Azure and other cloud services and the change \nin accounting estimate. Gross margin percentage decreased slightly. Excluding the impact of the change in \naccounting estimate, gross margin percentage decreased 3 points driven by sales mix shift to Azure and other \ncloud services and a decline in Azure and other cloud services. \n\u2022 Operating expenses increased $4.2 billion or 21% driven by investment s in Azure, 4 points of growth from the\n###Source /content/dataset/msft-10-k-2023.pdf, Page 32\n", "answer": "Answer:\nThe company's total revenue is $211,915 million.\n\nSource:\n/content/dataset/msft-10-k-2023.pdf, Page 75I apologize, but I didn't leave off anywhere as this is the beginning of our conversation. It seems you provided some information about Microsoft's total revenue from a 10-K filing. If you'd like to discuss or ask questions about this information, I'm here to help!"}
|
6 |
+
{"company": "Google", "user_query": "what is the company revenue?", "context": "placed in service during the year ended December 31, 2023, the effect of this change in estimate was a reduction in \ndepreciation expense of $3.9 billion and an increase in net income of $3.0 billion , or $0.24 per basic and $0.24 per \ndiluted share, for the year ended December 31, 2023.\nRevenue Recognition\nRevenues are recognized when control of the promised goods or services is transferred to our customers, and \nthe collectibility of an amount that we expect in exchange for those goods or services is probable. Sales and other \nsimilar taxes are excluded from revenues. \nAdvertising Revenues\nWe generate advertising revenues primarily by delivering advertising on:\n\u2022Google Search and other properties, including revenues from traffic generated by search distribution partners \nwho use Google.com as their default search in browsers, toolbars, etc. and other Google owned and operated \nproperties like Gmail, Google Maps, and Google Play;\n\u2022YouTube properties; and\n\u2022Google Network properties, including revenues from Google Network properties participating in AdMob, \nAdSense, and Google Ad Manager.\nOur customers generally purchase advertising inventory through Google Ads, Google Ad Manager, Google \nDisplay & Video 360, and Google Marketing Platform, among others.\nWe offer advertising by delivering both performance and brand advertising. We recognize revenues for \nperformance advertising when a user engages with the advertisement . For brand advertising, we recognize revenues \nwhen the ad is displayed, or a user views the ad.\nFor ads placed on Google Network properties, we evaluate whether we are the principal (i.e., report revenues on \na gross basis) or agent (i.e., report revenues on a net basis). Generally, we report advertising revenues for ads placed \non Google Network properties on a gross basis, that is, the amounts billed to our customers are recorded as revenues, Table of Contents Alphabet Inc.\n56.\n###Source /content/dataset/google-10-k-2023.pdf, Page 56\nNote 2. Revenues \nDisaggregated Revenues\nThe following table presents revenues disaggregated by type (in millions):\nYear Ended December 31,\n2021 2022 2023\nGoogle Search & other $ 148,951 $ 162,450 $ 175,033 \nYouTube ads 28,845 29,243 31,510 \nGoogle Network 31,701 32,780 31,312 \nGoogle advertising 209,497 224,473 237,855 \nGoogle subscriptions, platforms, and devices 28,032 29,055 34,688 \nGoogle Services total 237,529 253,528 272,543 \nGoogle Cloud 19,206 26,280 33,088 \nOther Bets 753 1,068 1,527 \nHedging gains (losses) 149 1,960 236 \nTotal revenues $ 257,637 $ 282,836 $ 307,394 \nNo individual customer or groups of affiliated customers represented more than 10% of our revenues in 2021 , \n2022 , or 2023 . \nThe following table presents revenues disaggregated by geography, based on the addresses of our customers (in \nmillions):\nYear Ended December 31,\n 2021 2022 2023\nUnited States $ 117,854 46 % $ 134,814 48 % $ 146,286 47 %\nEMEA(1) 79,107 31 82,062 29 91,038 30 \nAPAC(1) 46,123 18 47,024 16 51,514 17 \nOther Americas(1) 14,404 5 16,976 6 18,320 6 \nHedging gains (losses) 149 0 1,960 1 236 0 \nTotal revenues $ 257,637 100 % $ 282,836 100 % $ 307,394 100 %\n###Source /content/dataset/google-10-k-2023.pdf, Page 63\nGoogle Services\nGoogle Services operating income increased $13.2 billion from 2022 to 2023 . The increase in operating income \nwas primarily driven by an increase in revenues, partially offset by an increase in content acquisition costs and \ncompensation expenses including an increase in SBC expense . Additionally, operating income benefited from a \nreduction in costs driven by the change in the estimated useful lives of our servers and certain network equipment.\nGoogle Cloud\nGoogle Cloud operating income of $1.7 billion for 2023 compared to an operating loss of $1.9 billion for 2022 \nrepresents an increase of $3.6 billion . The increase in operating income was primarily driven by an increase in \nrevenues, partially offset by an increase in compensation expenses l argely driven by headcount growth . Additionally, \noperating income benefited from a reduction in costs driven by the change in the estimated useful lives of our servers \nand certain network equipment.\nOther Bets\nOther Bets operating loss decreased $541 million from 2022 to 2023 primarily due to growth in revenues as well \nas a reduction in valuation-based compensation liabilities related to Other Bet companies .\nOther Income (Expense), Net\nThe following table presents OI&E, (in millions):Table of Contents Alphabet Inc.\n39.\n###Source /content/dataset/google-10-k-2023.pdf, Page 39\n", "answer": "Answer:\nThe company's total revenue is $307,394 million for the year ended December 31, 2023.\n\nSource:\n/content/dataset/google-10-k-2023.pdf, Page 63It seems like we were in the middle of discussing Google's financial report. Specifically, we were looking at the 10-K filing for 2023, and we were on page 63.\n\nTo continue, on page 63, we can see the consolidated statement of income for Google's parent company, Alphabet Inc. The statement shows the company's revenues, operating income, net income, and earnings per share for the year ended December 31, 2023.\n\nWould you like me to highlight any specific information from this page or move on to another section of the report?"}
|
7 |
+
{"company": "AWS", "user_query": "what is the company revenue?", "context": "AWS\nNet sales $ 45,370 $ 62,202 $ 80,096 \nOperating expenses 31,839 43,670 57,255 \nOperating income $ 13,531 $ 18,532 $ 22,841 \nConsolidated\nNet sales $ 386,064 $ 469,822 $ 513,983 \nOperating expenses 363,165 444,943 501,735 \nOperating income 22,899 24,879 12,248 \nTotal non-operating income (expense) 1,279 13,272 (18,184)\nBenefit (provision) for income taxes (2,863) (4,791) 3,217 \nEquity-method investment activity, net of tax 16 4 (3)\nNet income (loss) $ 21,331 $ 33,364 $ (2,722)\n66\n###Source /content/dataset/aws-10-k-2023.pdf, Page 65\n2021 was $14.0 billion, of which $11.3 billion was recognized as revenue during the year ended December 31, 2022 and our total unearned revenue as of\nDecember 31, 2022 was $16.1 billion. Included in \u201cOther long-term liabilities\u201d on our consolidated balance sheets was $2.2 billion and $2.9 billion of unearned\nrevenue as of December 31, 2021 and 2022.\nAdditionally, we have performance obligations, primarily related to AWS, associated with commitments in customer contracts for future services that\nhave not yet been recognized in our financial statements. For contracts with original terms that exceed one year, those commitments not yet recognized were\n$110.4 billion as of December 31, 2022. The weighted average remaining life of our long-term contracts is 3.7 years. However, the amount and timing of\nrevenue recognition is largely driven by customer usage, which can extend beyond the original contractual term.\n49\n###Source /content/dataset/aws-10-k-2023.pdf, Page 48\nTable of Contents\nTechnology and Content\nTechnology and content costs include payroll and related expenses for employees involved in the research and development of new and existing products\nand services, development, design, and maintenance of our stores, curation and display of products and services made available in our online stores, and\ninfrastructure costs. Infrastructure costs include servers, networking equipment, and data center related depreciation and amortization, rent, utilities, and other\nexpenses necessary to support AWS and other Amazon businesses. Collectively, these costs reflect the investments we make in order to offer a wide variety of\nproducts and services to our customers, including expenditures related to initiatives to build and deploy innovative and efficient software and electronic devices\nand the development of a satellite network for global broadband service and autonomous vehicles for ride-hailing services. Technology and content costs are\ngenerally expensed as incurred.\nSales and Marketing\nSales and marketing costs include advertising and payroll and related expenses for personnel engaged in marketing and selling activities, including sales\ncommissions related to AWS. We pay commissions to third parties when their customer referrals result in sales. We also participate in cooperative advertising\narrangements with certain of our vendors, and other third parties.\nAdvertising and other promotional costs to market our products and services are expensed as incurred and were $10.9 billion, $16.9 billion, and $20.6\nbillion in 2020, 2021, and 2022.\nGeneral and Administrative\nGeneral and administrative expenses primarily consist of costs for corporate functions, including payroll and related expenses; facilities and equipment\nexpenses, such as depreciation and amortization expense and rent; and professional fees.\nStock-Based Compensation\nCompensation cost for all equity-classified stock awards expected to vest is measured at fair value on the date of grant and recognized over the service\nperiod. The fair value of restricted stock units is determined based on the number of shares granted and the quoted price of our common stock. Such value is\nrecognized as expense over the service period, net of estimated forfeitures, using the accelerated method. The estimated number of stock awards that will\nultimately vest requires judgment, and to the extent actual results or updated estimates differ from our current estimates, such amounts will be recorded as a\ncumulative adjustment in the period estimates are revised. We consider many factors when estimating expected forfeitures, including historical forfeiture\nexperience by grant year and employee level. Additionally, stock-based compensation includes stock appreciation rights that are expected to settle in cash.\n###Source /content/dataset/aws-10-k-2023.pdf, Page 43\n", "answer": "Answer:\nThe company's revenue is $513,983 million.\n\nSource:\n/content/dataset/aws-10-k-2023.pdf, Page 65It seems like we were discussing Amazon Web Services (AWS) and its financial performance. Specifically, we were looking at the company's revenue, which was $513,983 million according to the 2023 10-K filing.\n\nWould you like to explore more financial metrics, such as AWS's operating income, net income, or cash flow? Or perhaps you'd like to discuss the company's growth strategy, competitive landscape, or recent developments in the cloud computing space?"}
|
8 |
+
{"company": "IBM", "user_query": "what is the company revenue?", "context": "In addition, there can be no assurance that the company\u2019s efforts to manage its currency and financing risks will be \nsuccessful.\nThe Company\u2019s Financial Performance Could Be Impacted by Changes in Market Liquidity Conditions and by \nCustomer Credit Risk on Receivables: The company\u2019s financial performance is exposed to a wide variety of industry sector \ndynamics worldwide, including sudden shifts in regional or global economic activity. The company\u2019s earnings and cash \nflows, as well as its access to funding, could be negatively impacted by changes in market liquidity conditions. IBM\u2019s 2023 \nAnnual Report to Stockholders includes information about the company\u2019s liquidity position. The company\u2019s client base \nincludes many enterprises worldwide, from small and medium businesses to the world\u2019s largest organizations and \ngovernments, with a significant portion of the company\u2019s revenue coming from global clients across many sectors. Most of \n8\n###Source /content/dataset/IBM-10-k-2023.pdf, Page 9\nDocuments incorporated by reference:\nPortions of IBM\u2019s Annual Report to Stockholders for the year ended December 31, 2023 are incorporated by reference into Parts I, II and IV of this Form 10-K.\nPortions of IBM\u2019s definitive Proxy Statement to be filed with the Securities and Exchange Commission and delivered to stockholders in connection with the Annual Meeting of \nStockholders to be held April 30, 2024 are incorporated by reference into Part III of this Form 10-K.\n###Source /content/dataset/IBM-10-k-2023.pdf, Page 0\nEXHIBIT 21\nINTERNATIONAL BUSINESS MACHINES CORPORATION SUBSIDIARIES\nSubsidiaries\u2014as of December 31, 2023\nIBM Argentina Sociedad de Responsabilidad Limitada Argentina 100 \nIBM Australia Limited Australia 100 \nIBM Global Financing Australia Limited Australia 100 \nIBM Oesterreich Internationale Bueromaschinen Gesellschaft m.b.H. Austria 100 \nRed Hat Austria GmbH Austria 100 \nIBM Bahamas Limited Bahamas 100 \nIBM Belgium Financial Services Company BV/SRL Belgium 100 \nInternational Business Machines of Belgium BV/SRL Belgium 100 \nWTC Insurance Corporation, Ltd. Bermuda 100 \nIBM Brasil\u2014Industria, Maquinas e Servicos Limitada Brazil 100 \nBanco IBM S.A. Brazil 100 \nIBM Bulgaria Ltd. Bulgaria 100 \nIBM Canada Limited\u2014IBM Canada Limitee Canada 100 \nIBM Global Financing Canada Corporation Canada 100 \nIBM de Chile S.A.C. Chile 100 \nIBM Global Financing de Chile SpA Chile 100 \nIBM (China) Investment Company Limited China (P.R.C.) 100 \nIBM (China) Co., Ltd. China (P.R.C.) 100 \nIBM de Colombia S.A.S. Colombia 100 \nIBM Business Transformation Center, S.r.l. Costa Rica 100 \nIBM Croatia Ltd./IBM Hrvatska d.o.o. Croatia 100 \nIBM Ceska Republika spol. s.r.o. Czech Republic 100 \nIBM Danmark ApS Denmark 100 \nIBM Global Financing Danmark ApS Denmark 100 \nRed Hat APS Denmark 100 \nIBM del Ecuador, C.A. Ecuador 100 \nIBM Egypt Business Support Services Egypt 100 \nIBM Eesti Osauhing (IBM Estonia Ou) Estonia 100 \nIBM Global Financing Finland Oy Finland 100 \nOy IBM Finland AB Finland 100 \nCompagnie IBM France, S.A.S. France 100 \nIBM France Financement, SAS France 100 \nRED HAT FRANCE France 100 \nIBM Deutschland GmbH Germany 100 \nIBM Deutschland Kreditbank GmbH Germany 100\n###Source /content/dataset/IBM-10-k-2023.pdf, Page 29\n", "answer": "I don't know.\n\nSource:\n/content/dataset/IBM-10-k-2023.pdfI apologize, but I didn't leave off anywhere as this is the beginning of our conversation. It seems you provided a source, which is a PDF file of IBM's 10-K report for 2023. If you'd like to discuss something specific about the report or have a question, I'm here to help!"}
|