Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,35 +4,40 @@ import os
|
|
4 |
import uuid
|
5 |
import json
|
6 |
from pathlib import Path
|
7 |
-
# Install required libraries if not already installed
|
8 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", "openai"])
|
9 |
-
# Install langchain_community if it is not already installed
|
10 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", "langchain_community"])
|
11 |
-
# Install sentence-transformers if it is not already installed
|
12 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", "sentence-transformers"])
|
13 |
-
# Install sentence-transformers if it is not already installed
|
14 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", "chromadb"])
|
15 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", "python-dotenv"])
|
16 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
from huggingface_hub import login, CommitScheduler
|
|
|
18 |
import gradio as gr
|
19 |
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
|
20 |
from langchain_community.vectorstores import Chroma
|
21 |
-
|
22 |
# Load environment variables from .env file
|
23 |
load_dotenv()
|
24 |
|
25 |
-
#
|
26 |
-
hf_token = os.getenv("
|
27 |
-
if not hf_token:
|
28 |
-
raise ValueError("Hugging Face token not found in environment variables. Set HF_TOKEN in your .env file.")
|
29 |
-
login(hf_token)
|
30 |
-
|
31 |
-
# Set OpenAI API key from environment variables
|
32 |
openai.api_key = os.getenv("OPENAI_API_KEY") # Ensure OPENAI_API_KEY is in your .env file
|
33 |
-
client = openai
|
34 |
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
embeddings = SentenceTransformerEmbeddings(model_name="thenlper/gte-large")
|
37 |
collection_name = 'report-10k-2024'
|
38 |
|
@@ -47,25 +52,11 @@ retriever = vectorstore_persisted.as_retriever(
|
|
47 |
search_kwargs={'k': 5}
|
48 |
)
|
49 |
|
50 |
-
# Define
|
51 |
-
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
|
52 |
-
log_folder = log_file.parent
|
53 |
-
|
54 |
-
scheduler = CommitScheduler(
|
55 |
-
repo_id="RAGREPORTS-log",
|
56 |
-
repo_type="dataset",
|
57 |
-
folder_path=log_folder,
|
58 |
-
path_in_repo="data",
|
59 |
-
every=2
|
60 |
-
)
|
61 |
-
|
62 |
-
# Define the Q&A system message
|
63 |
qna_system_message = """
|
64 |
-
You are an AI assistant
|
65 |
-
Your
|
66 |
-
|
67 |
-
Answer only based on the provided context.
|
68 |
-
If the answer is not found in the context, respond with "I don't know."
|
69 |
"""
|
70 |
|
71 |
qna_user_message_template = """
|
@@ -93,29 +84,42 @@ def predict(user_input, company):
|
|
93 |
|
94 |
# Get response from the LLM
|
95 |
try:
|
96 |
-
response =
|
97 |
model='mistralai/Mixtral-8x7B-Instruct-v0.1',
|
98 |
messages=prompt,
|
99 |
temperature=0
|
100 |
)
|
101 |
prediction = response.choices[0].message.content
|
|
|
102 |
except Exception as e:
|
103 |
prediction = str(e)
|
104 |
|
105 |
# Log inputs and outputs to a local log file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
with scheduler.lock:
|
107 |
with log_file.open("a") as f:
|
108 |
-
f.write(json.dumps(
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
113 |
f.write("\n")
|
114 |
|
115 |
return prediction
|
116 |
|
117 |
-
# Define the prediction interface function
|
118 |
def get_predict(question, company):
|
|
|
119 |
company_map = {
|
120 |
"AWS": "aws",
|
121 |
"IBM": "IBM",
|
@@ -126,10 +130,10 @@ def get_predict(question, company):
|
|
126 |
selected_company = company_map.get(company)
|
127 |
if not selected_company:
|
128 |
return "Invalid company selected"
|
129 |
-
|
130 |
return predict(question, selected_company)
|
131 |
|
132 |
-
# Set
|
133 |
with gr.Blocks(theme="gradio/seafoam@>=0.0.1,<0.1.0") as demo:
|
134 |
with gr.Row():
|
135 |
company = gr.Radio(["AWS", "IBM", "Google", "Meta", "Microsoft"], label="Select a company")
|
@@ -147,4 +151,9 @@ with gr.Blocks(theme="gradio/seafoam@>=0.0.1,<0.1.0") as demo:
|
|
147 |
demo.queue()
|
148 |
demo.launch()
|
149 |
|
|
|
|
|
|
|
|
|
|
|
150 |
|
|
|
4 |
import uuid
|
5 |
import json
|
6 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from dotenv import load_dotenv
|
8 |
+
|
9 |
+
# Install dependencies if not already installed
|
10 |
+
def install_packages():
|
11 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "openai"])
|
12 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "langchain_community"])
|
13 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "sentence-transformers"])
|
14 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "chromadb"])
|
15 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "huggingface_hub"])
|
16 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "python-dotenv"])
|
17 |
+
|
18 |
+
install_packages()
|
19 |
+
|
20 |
+
# Import installed modules
|
21 |
from huggingface_hub import login, CommitScheduler
|
22 |
+
import openai
|
23 |
import gradio as gr
|
24 |
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
|
25 |
from langchain_community.vectorstores import Chroma
|
26 |
+
|
27 |
# Load environment variables from .env file
|
28 |
load_dotenv()
|
29 |
|
30 |
+
# Get API tokens from environment variables
|
31 |
+
hf_token = os.getenv("HUGGINGFACE_TOKEN")
|
|
|
|
|
|
|
|
|
|
|
32 |
openai.api_key = os.getenv("OPENAI_API_KEY") # Ensure OPENAI_API_KEY is in your .env file
|
|
|
33 |
|
34 |
+
if hf_token is None:
|
35 |
+
raise ValueError("Hugging Face token is missing. Please check your .env file.")
|
36 |
+
|
37 |
+
# Log in to Hugging Face
|
38 |
+
login(hf_token)
|
39 |
+
|
40 |
+
# Set up embeddings and vector store
|
41 |
embeddings = SentenceTransformerEmbeddings(model_name="thenlper/gte-large")
|
42 |
collection_name = 'report-10k-2024'
|
43 |
|
|
|
52 |
search_kwargs={'k': 5}
|
53 |
)
|
54 |
|
55 |
+
# Define Q&A system message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
qna_system_message = """
|
57 |
+
You are an AI assistant for Finsights Grey Inc., helping automate extraction, summarization, and analysis of 10-K reports.
|
58 |
+
Your responses should be based solely on the context provided.
|
59 |
+
If an answer is not found in the context, respond with "I don't know."
|
|
|
|
|
60 |
"""
|
61 |
|
62 |
qna_user_message_template = """
|
|
|
84 |
|
85 |
# Get response from the LLM
|
86 |
try:
|
87 |
+
response = openai.ChatCompletion.create(
|
88 |
model='mistralai/Mixtral-8x7B-Instruct-v0.1',
|
89 |
messages=prompt,
|
90 |
temperature=0
|
91 |
)
|
92 |
prediction = response.choices[0].message.content
|
93 |
+
|
94 |
except Exception as e:
|
95 |
prediction = str(e)
|
96 |
|
97 |
# Log inputs and outputs to a local log file
|
98 |
+
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
|
99 |
+
log_folder = log_file.parent
|
100 |
+
scheduler = CommitScheduler(
|
101 |
+
repo_id="RAGREPORTS-log",
|
102 |
+
repo_type="dataset",
|
103 |
+
folder_path=log_folder,
|
104 |
+
path_in_repo="data",
|
105 |
+
every=2
|
106 |
+
)
|
107 |
+
|
108 |
with scheduler.lock:
|
109 |
with log_file.open("a") as f:
|
110 |
+
f.write(json.dumps(
|
111 |
+
{
|
112 |
+
'user_input': user_input,
|
113 |
+
'retrieved_context': context_for_query,
|
114 |
+
'model_response': prediction
|
115 |
+
}
|
116 |
+
))
|
117 |
f.write("\n")
|
118 |
|
119 |
return prediction
|
120 |
|
|
|
121 |
def get_predict(question, company):
|
122 |
+
# Map user selection to company name
|
123 |
company_map = {
|
124 |
"AWS": "aws",
|
125 |
"IBM": "IBM",
|
|
|
130 |
selected_company = company_map.get(company)
|
131 |
if not selected_company:
|
132 |
return "Invalid company selected"
|
133 |
+
|
134 |
return predict(question, selected_company)
|
135 |
|
136 |
+
# Set-up the Gradio UI
|
137 |
with gr.Blocks(theme="gradio/seafoam@>=0.0.1,<0.1.0") as demo:
|
138 |
with gr.Row():
|
139 |
company = gr.Radio(["AWS", "IBM", "Google", "Meta", "Microsoft"], label="Select a company")
|
|
|
151 |
demo.queue()
|
152 |
demo.launch()
|
153 |
|
154 |
+
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
|
159 |
|