Koshti10 commited on
Commit
6ffd7f9
·
verified ·
1 Parent(s): 0a13de5

Upload 11 files

Browse files
app_drive.py CHANGED
@@ -6,14 +6,15 @@ import datetime
6
  import os
7
 
8
  from lc_base.chain import openai_chain
 
9
  from driveapi.drive import upload_chat_to_drive
10
  from driveapi.drive_database import create_chroma_db
11
 
12
  # global time_diff, model_name, search_type
13
  time_diff = 0
14
- # model_name="gpt-3.5-turbo-1106"
15
  # model_name = "gpt-4-1106-preview"
16
- model_name = "gpt-4-0125-preview"
17
  search_type = "stuff"
18
  input_question = ""
19
  model_response = ""
@@ -23,20 +24,28 @@ dir = ""
23
  title = """<h1 align="center">ResearchBuddy</h1>"""
24
  description = """<br><br><h3 align="center">This is a GPT based Research Buddy to assist in navigating new research topics.</h3>"""
25
 
26
- def save_api_key(api_key):
27
- os.environ['OPENAI_API_KEY'] = str(api_key)
28
- return f"API Key saved in the environment: {api_key}"
29
 
30
  def save_drive_link(drive_link):
 
31
  os.environ['DRIVE_LINK'] = str(drive_link)
32
- print(f"API Key saved in the environment: {drive_link}")
33
  return None
34
 
35
- def create_data_from_drive():
36
  global db
 
 
 
 
 
37
  db = create_chroma_db()
38
  return "Processing Completed - You can start the chat now!"
39
 
 
 
 
 
 
40
  def user(user_message, history):
41
  return "", history + [[user_message, None]]
42
 
@@ -48,20 +57,14 @@ def respond(message, chat_history):
48
  print(type(db))
49
  question = str(message)
50
  chain = openai_chain(inp_dir=dir)
51
- # prompt = '''You are an AI assistant equipped with advanced analytical capabilities.
52
- # You have been provided with a carefully curated set of documents relevant to a specific question.
53
- # Your task is to meticulously analyze these documents and provide a comprehensive answer to the following question.
54
- # Ensure that your response is detailed, accurate, and maintains a formal, academic tone.
55
- # The information required to answer this question is contained within the documents.
56
- # Please proceed with a thorough examination to deliver a well-informed response. Question: '''
57
-
58
- # query = prompt + question
59
  query = question
60
 
61
  start_time = time.time()
62
 
63
  output = chain.get_response_from_drive(query=query, database=db, k=10, model_name=model_name, type=search_type)
64
  print(output)
 
65
 
66
  # Update global variables to log
67
  time_diff = time.time() - start_time
@@ -72,7 +75,7 @@ def respond(message, chat_history):
72
  chat_history.append((message, bot_message))
73
 
74
  time.sleep(2)
75
- return " ", chat_history
76
 
77
  def save_feedback(feedback):
78
  global user_feedback
@@ -85,9 +88,10 @@ def save_feedback(feedback):
85
  [input_question, model_response, model_name, time_diff, user_feedback]
86
  ]
87
 
88
- if user_feedback[0] != "None":
89
  upload_chat_to_drive(log_data, file_name)
90
 
 
91
  def default_feedback():
92
  return "None"
93
 
@@ -112,25 +116,24 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate"))
112
 
113
  global db
114
 
115
- with gr.Row():
116
  with gr.Column():
117
- api_key_input = gr.Textbox(lines=1, label="Enter your OpenAI API Key, then press Enter...")
 
118
 
119
- with gr.Column():
120
- drive_link_input = gr.Textbox(lines=1, label="Enter your shared drive link, then press Enter...")
121
 
122
- with gr.Row():
123
- process_files_input = gr.Button(value="Process files")
 
 
 
 
124
 
125
- with gr.Row():
126
- status_message = gr.Text(label="Status", value="Click - Process Files")
127
 
128
-
129
-
130
- api_key_input.submit(save_api_key, [api_key_input])
131
- drive_link_input.submit(fn=save_drive_link, inputs=[drive_link_input])
132
- drive_link_check = os.environ.get("DRIVE_LINK")
133
- process_files_input.click(fn=create_data_from_drive, outputs=status_message)
134
 
135
  chatbot = gr.Chatbot(height=750)
136
  msg = gr.Textbox(label="Send a message", placeholder="Send a message",
@@ -151,9 +154,8 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate"))
151
  feedback_radio = gr.Radio(
152
  choices=["1", "2", "3", "4", "5", "6", "None"],
153
  value=["None"],
154
- label="How would you rate the current response?",
155
- info="Choosing a number sends the following diagnostic data to the developer - Question, Response, Time Taken. Let it be [None] to not send any data.",
156
- )
157
 
158
  with gr.Column():
159
  feedback_text = gr.Textbox(lines=1, label="Additional comments on the current response...")
@@ -161,7 +163,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate"))
161
 
162
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
163
  msg.submit(default_feedback, outputs=[feedback_radio])
164
- msg.submit(default_text, outputs=[feedback_text])
165
 
166
  feedback_radio.change(
167
  fn=save_feedback,
@@ -173,8 +175,13 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate"))
173
  inputs=[feedback_text]
174
  )
175
 
 
 
 
 
 
176
  gr.HTML(description)
177
 
178
 
179
  chat.queue()
180
- chat.launch()
 
6
  import os
7
 
8
  from lc_base.chain import openai_chain
9
+ from lc_base.dnd_database import create_dnd_database
10
  from driveapi.drive import upload_chat_to_drive
11
  from driveapi.drive_database import create_chroma_db
12
 
13
  # global time_diff, model_name, search_type
14
  time_diff = 0
15
+ model_name="gpt-3.5-turbo-1106" # FOR TESTING
16
  # model_name = "gpt-4-1106-preview"
17
+ # model_name = "gpt-4-0125-preview"
18
  search_type = "stuff"
19
  input_question = ""
20
  model_response = ""
 
24
  title = """<h1 align="center">ResearchBuddy</h1>"""
25
  description = """<br><br><h3 align="center">This is a GPT based Research Buddy to assist in navigating new research topics.</h3>"""
26
 
 
 
 
27
 
28
  def save_drive_link(drive_link):
29
+ drive_link += "?usp=sharing"
30
  os.environ['DRIVE_LINK'] = str(drive_link)
31
+ print("Drive link saved in the environment")
32
  return None
33
 
34
+ def create_data_from_drive(drive_link):
35
  global db
36
+
37
+ drive_link += "?usp=sharing"
38
+ os.environ['DRIVE_LINK'] = str(drive_link)
39
+ print("Drive link saved in the environment! Creating Database...")
40
+
41
  db = create_chroma_db()
42
  return "Processing Completed - You can start the chat now!"
43
 
44
+ def check_pdfs(pdf_files):
45
+ global db
46
+ db = create_dnd_database(pdf_files)
47
+ return "Processing Completed - You can start the chat now!"
48
+
49
  def user(user_message, history):
50
  return "", history + [[user_message, None]]
51
 
 
57
  print(type(db))
58
  question = str(message)
59
  chain = openai_chain(inp_dir=dir)
60
+
 
 
 
 
 
 
 
61
  query = question
62
 
63
  start_time = time.time()
64
 
65
  output = chain.get_response_from_drive(query=query, database=db, k=10, model_name=model_name, type=search_type)
66
  print(output)
67
+
68
 
69
  # Update global variables to log
70
  time_diff = time.time() - start_time
 
75
  chat_history.append((message, bot_message))
76
 
77
  time.sleep(2)
78
+ return " ", chat_history
79
 
80
  def save_feedback(feedback):
81
  global user_feedback
 
88
  [input_question, model_response, model_name, time_diff, user_feedback]
89
  ]
90
 
91
+ if model_response and user_feedback[0] != "None":
92
  upload_chat_to_drive(log_data, file_name)
93
 
94
+
95
  def default_feedback():
96
  return "None"
97
 
 
116
 
117
  global db
118
 
119
+ with gr.Row(equal_height=True):
120
  with gr.Column():
121
+ with gr.Row():
122
+ pdf_files_dnd = gr.File(file_count='multiple', height=250, label="Upload PDF Files")
123
 
 
 
124
 
125
+ with gr.Column():
126
+ with gr.Row():
127
+ drive_link_input = gr.Textbox(lines=1, label="Enter your shared drive link, then press Enter...")
128
+ with gr.Row():
129
+ status_message = gr.Text(label="Status", value="⬆️Submit a (shared) drive link containing only PDFs \n-or- \n⬅️Upload PDF files", text_align='center')
130
+
131
 
132
+
133
+
134
 
135
+ drive_link_input.submit(fn=create_data_from_drive, inputs=[drive_link_input], outputs=[status_message])
136
+ pdf_files_dnd.change(fn=check_pdfs, inputs=[pdf_files_dnd], outputs=[status_message])
 
 
 
 
137
 
138
  chatbot = gr.Chatbot(height=750)
139
  msg = gr.Textbox(label="Send a message", placeholder="Send a message",
 
154
  feedback_radio = gr.Radio(
155
  choices=["1", "2", "3", "4", "5", "6", "None"],
156
  value=["None"],
157
+ label="On a scale from 1 (very unsatisfied) to 6 (very satisfied), how would you rate the current response?",
158
+ )
 
159
 
160
  with gr.Column():
161
  feedback_text = gr.Textbox(lines=1, label="Additional comments on the current response...")
 
163
 
164
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
165
  msg.submit(default_feedback, outputs=[feedback_radio])
166
+ chatbot.change(save_feedback, inputs=[feedback_radio])
167
 
168
  feedback_radio.change(
169
  fn=save_feedback,
 
175
  inputs=[feedback_text]
176
  )
177
 
178
+ feedback_text.submit(
179
+ fn=default_text,
180
+ outputs=[feedback_text]
181
+ )
182
+
183
  gr.HTML(description)
184
 
185
 
186
  chat.queue()
187
+ chat.launch()
driveapi/drive_database.py CHANGED
@@ -28,4 +28,4 @@ def create_chroma_db():
28
  print('Length of text: ' + str(len(raw_text)))
29
  db = FAISS.from_texts(texts, embedding)
30
 
31
- return db
 
28
  print('Length of text: ' + str(len(raw_text)))
29
  db = FAISS.from_texts(texts, embedding)
30
 
31
+ return db
driveapi/service.py CHANGED
@@ -26,6 +26,3 @@ def get_shared_folder_id(drive_shared_link):
26
  shared_folder_id = str(drive_shared_link[start:end])
27
 
28
  return shared_folder_id
29
-
30
-
31
-
 
26
  shared_folder_id = str(drive_shared_link[start:end])
27
 
28
  return shared_folder_id
 
 
 
lc_base/dnd_database.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from driveapi.drive import process_pdf
2
+
3
+ from langchain.embeddings.openai import OpenAIEmbeddings
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.vectorstores import FAISS
6
+
7
+ def create_dnd_database(file_list):
8
+ raw_text = ''
9
+ for pdf in file_list:
10
+ raw_text += process_pdf(pdf)
11
+
12
+ embedding = OpenAIEmbeddings()
13
+
14
+ text_splitter = CharacterTextSplitter(
15
+ separator = "\n",
16
+ chunk_size = 1000,
17
+ chunk_overlap = 200,
18
+ length_function = len,
19
+ )
20
+ texts = text_splitter.split_text(raw_text)
21
+ print('Length of text: ' + str(len(raw_text)))
22
+ db = FAISS.from_texts(texts, embedding)
23
+
24
+ return db
requirements.txt CHANGED
@@ -1,10 +1,11 @@
1
  faiss-gpu==1.7.2
2
- gradio==3.45.0
 
3
  tiktoken==0.5.1
4
  PyPDF2==3.0.1
5
  pandas==2.0.3
6
  openai==0.28.1
7
  langchain==0.0.331
8
- google-auth
9
- google-auth-httplib2
10
- google-api-python-client
 
1
  faiss-gpu==1.7.2
2
+ gradio==4.20.0
3
+ pycryptodome==3.20.0
4
  tiktoken==0.5.1
5
  PyPDF2==3.0.1
6
  pandas==2.0.3
7
  openai==0.28.1
8
  langchain==0.0.331
9
+ google-auth==2.28.1
10
+ google-auth-httplib2==0.2.0
11
+ google-api-python-client==2.120.0