kajila commited on
Commit
e082d15
·
verified ·
1 Parent(s): ed03664

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -46
app.py CHANGED
@@ -4,35 +4,40 @@ import os
4
  import uuid
5
  import json
6
  from pathlib import Path
7
- # Install required libraries if not already installed
8
- subprocess.check_call([sys.executable, "-m", "pip", "install", "openai"])
9
- # Install langchain_community if it is not already installed
10
- subprocess.check_call([sys.executable, "-m", "pip", "install", "langchain_community"])
11
- # Install sentence-transformers if it is not already installed
12
- subprocess.check_call([sys.executable, "-m", "pip", "install", "sentence-transformers"])
13
- # Install sentence-transformers if it is not already installed
14
- subprocess.check_call([sys.executable, "-m", "pip", "install", "chromadb"])
15
- subprocess.check_call([sys.executable, "-m", "pip", "install", "python-dotenv"])
16
  from dotenv import load_dotenv
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  from huggingface_hub import login, CommitScheduler
 
18
  import gradio as gr
19
  from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
20
  from langchain_community.vectorstores import Chroma
21
- import openai
22
  # Load environment variables from .env file
23
  load_dotenv()
24
 
25
- # Login to Hugging Face using token from environment variables
26
- hf_token = os.getenv("HF_TOKEN")
27
- if not hf_token:
28
- raise ValueError("Hugging Face token not found in environment variables. Set HF_TOKEN in your .env file.")
29
- login(hf_token)
30
-
31
- # Set OpenAI API key from environment variables
32
  openai.api_key = os.getenv("OPENAI_API_KEY") # Ensure OPENAI_API_KEY is in your .env file
33
- client = openai
34
 
35
- # Set up embeddings and vectorstore
 
 
 
 
 
 
36
  embeddings = SentenceTransformerEmbeddings(model_name="thenlper/gte-large")
37
  collection_name = 'report-10k-2024'
38
 
@@ -47,25 +52,11 @@ retriever = vectorstore_persisted.as_retriever(
47
  search_kwargs={'k': 5}
48
  )
49
 
50
- # Define logging configuration
51
- log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
52
- log_folder = log_file.parent
53
-
54
- scheduler = CommitScheduler(
55
- repo_id="RAGREPORTS-log",
56
- repo_type="dataset",
57
- folder_path=log_folder,
58
- path_in_repo="data",
59
- every=2
60
- )
61
-
62
- # Define the Q&A system message
63
  qna_system_message = """
64
- You are an AI assistant helping Finsights Grey Inc., a financial technology firm, develop a Retrieval-Augmented Generation (RAG) system to automate extraction, summarization, and analysis of 10-K reports.
65
- Your knowledge base was last updated in August 2023.
66
- User questions will start with the token: ###Question.
67
- Answer only based on the provided context.
68
- If the answer is not found in the context, respond with "I don't know."
69
  """
70
 
71
  qna_user_message_template = """
@@ -93,29 +84,42 @@ def predict(user_input, company):
93
 
94
  # Get response from the LLM
95
  try:
96
- response = client.chat.completions.create(
97
  model='mistralai/Mixtral-8x7B-Instruct-v0.1',
98
  messages=prompt,
99
  temperature=0
100
  )
101
  prediction = response.choices[0].message.content
 
102
  except Exception as e:
103
  prediction = str(e)
104
 
105
  # Log inputs and outputs to a local log file
 
 
 
 
 
 
 
 
 
 
106
  with scheduler.lock:
107
  with log_file.open("a") as f:
108
- f.write(json.dumps({
109
- 'user_input': user_input,
110
- 'retrieved_context': context_for_query,
111
- 'model_response': prediction
112
- }))
 
 
113
  f.write("\n")
114
 
115
  return prediction
116
 
117
- # Define the prediction interface function
118
  def get_predict(question, company):
 
119
  company_map = {
120
  "AWS": "aws",
121
  "IBM": "IBM",
@@ -126,10 +130,10 @@ def get_predict(question, company):
126
  selected_company = company_map.get(company)
127
  if not selected_company:
128
  return "Invalid company selected"
129
-
130
  return predict(question, selected_company)
131
 
132
- # Set up the Gradio UI
133
  with gr.Blocks(theme="gradio/seafoam@>=0.0.1,<0.1.0") as demo:
134
  with gr.Row():
135
  company = gr.Radio(["AWS", "IBM", "Google", "Meta", "Microsoft"], label="Select a company")
@@ -147,4 +151,9 @@ with gr.Blocks(theme="gradio/seafoam@>=0.0.1,<0.1.0") as demo:
147
  demo.queue()
148
  demo.launch()
149
 
 
 
 
 
 
150
 
 
4
  import uuid
5
  import json
6
  from pathlib import Path
 
 
 
 
 
 
 
 
 
7
  from dotenv import load_dotenv
8
+
9
+ # Install dependencies if not already installed
10
+ def install_packages():
11
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "openai"])
12
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "langchain_community"])
13
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "sentence-transformers"])
14
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "chromadb"])
15
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "huggingface_hub"])
16
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "python-dotenv"])
17
+
18
+ install_packages()
19
+
20
+ # Import installed modules
21
  from huggingface_hub import login, CommitScheduler
22
+ import openai
23
  import gradio as gr
24
  from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
25
  from langchain_community.vectorstores import Chroma
26
+
27
  # Load environment variables from .env file
28
  load_dotenv()
29
 
30
+ # Get API tokens from environment variables
31
+ hf_token = os.getenv("HUGGINGFACE_TOKEN")
 
 
 
 
 
32
  openai.api_key = os.getenv("OPENAI_API_KEY") # Ensure OPENAI_API_KEY is in your .env file
 
33
 
34
+ if hf_token is None:
35
+ raise ValueError("Hugging Face token is missing. Please check your .env file.")
36
+
37
+ # Log in to Hugging Face
38
+ login(hf_token)
39
+
40
+ # Set up embeddings and vector store
41
  embeddings = SentenceTransformerEmbeddings(model_name="thenlper/gte-large")
42
  collection_name = 'report-10k-2024'
43
 
 
52
  search_kwargs={'k': 5}
53
  )
54
 
55
+ # Define Q&A system message
 
 
 
 
 
 
 
 
 
 
 
 
56
  qna_system_message = """
57
+ You are an AI assistant for Finsights Grey Inc., helping automate extraction, summarization, and analysis of 10-K reports.
58
+ Your responses should be based solely on the context provided.
59
+ If an answer is not found in the context, respond with "I don't know."
 
 
60
  """
61
 
62
  qna_user_message_template = """
 
84
 
85
  # Get response from the LLM
86
  try:
87
+ response = openai.ChatCompletion.create(
88
  model='mistralai/Mixtral-8x7B-Instruct-v0.1',
89
  messages=prompt,
90
  temperature=0
91
  )
92
  prediction = response.choices[0].message.content
93
+
94
  except Exception as e:
95
  prediction = str(e)
96
 
97
  # Log inputs and outputs to a local log file
98
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
99
+ log_folder = log_file.parent
100
+ scheduler = CommitScheduler(
101
+ repo_id="RAGREPORTS-log",
102
+ repo_type="dataset",
103
+ folder_path=log_folder,
104
+ path_in_repo="data",
105
+ every=2
106
+ )
107
+
108
  with scheduler.lock:
109
  with log_file.open("a") as f:
110
+ f.write(json.dumps(
111
+ {
112
+ 'user_input': user_input,
113
+ 'retrieved_context': context_for_query,
114
+ 'model_response': prediction
115
+ }
116
+ ))
117
  f.write("\n")
118
 
119
  return prediction
120
 
 
121
  def get_predict(question, company):
122
+ # Map user selection to company name
123
  company_map = {
124
  "AWS": "aws",
125
  "IBM": "IBM",
 
130
  selected_company = company_map.get(company)
131
  if not selected_company:
132
  return "Invalid company selected"
133
+
134
  return predict(question, selected_company)
135
 
136
+ # Set-up the Gradio UI
137
  with gr.Blocks(theme="gradio/seafoam@>=0.0.1,<0.1.0") as demo:
138
  with gr.Row():
139
  company = gr.Radio(["AWS", "IBM", "Google", "Meta", "Microsoft"], label="Select a company")
 
151
  demo.queue()
152
  demo.launch()
153
 
154
+
155
+
156
+
157
+
158
+
159