Karthikeyan commited on
Commit
e8c317c
·
1 Parent(s): 077dd97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -128
app.py CHANGED
@@ -18,7 +18,12 @@ import tempfile
18
  import pandas as pd
19
  import re
20
 
 
 
 
 
21
  class ChemicalIdentifier:
 
22
  def __init__(self):
23
 
24
  openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -30,17 +35,19 @@ class ChemicalIdentifier:
30
  console_handler.setFormatter(formatter)
31
  self.logger.addHandler(console_handler)
32
 
 
33
 
34
- def upload_via_url(self,url:str)->List:
 
 
 
 
35
  """
36
  Uploads a file from a given URL and returns the loaded document.
37
-
38
  Args:
39
  url (str): The URL of the file to be uploaded.
40
-
41
  Returns:
42
  Document: The loaded document.
43
-
44
  Raises:
45
  ValueError: If the URL is not valid or the file cannot be fetched.
46
  """
@@ -69,22 +76,19 @@ class ChemicalIdentifier:
69
  raise ValueError("Error occurred while uploading the file") from e
70
 
71
 
72
- def find_chemicals(self,text:str)->str:
73
  """
74
  Extracts chemical names from the given text.
75
-
76
  Args:
77
  text (str): The text to extract chemical names from.
78
-
79
  Returns:
80
  str: The extracted chemical names in bullet form.
81
-
82
  Raises:
83
  ValueError: If an error occurs during the extraction process.
84
  """
85
 
86
  try:
87
- prompt = f"List out only all the Chemicals Names in the give text in bullet form.{text}"
88
  response = openai.Completion.create(
89
  model="text-davinci-003",
90
  prompt=prompt,
@@ -104,7 +108,7 @@ class ChemicalIdentifier:
104
  raise ValueError("Error occurred while finding chemicals") from e
105
 
106
 
107
- def get_chemicals(self,urls:str)->str:
108
  """
109
  Retrieves chemicals from the provided URLs.
110
 
@@ -121,9 +125,9 @@ class ChemicalIdentifier:
121
  try:
122
  total_chemical=[]
123
  for url in urls.split(','):
124
- webpage_text = self.upload_via_url(url)
125
- chemicals = self.find_chemicals(webpage_text)
126
- total_chemical.append(chemicals)
127
  list_of_chemicals = "".join(total_chemical)
128
  return list_of_chemicals
129
 
@@ -131,12 +135,6 @@ class ChemicalIdentifier:
131
  self.logger.error("Error occurred while getting chemicals from URLs: %s", str(e))
132
  raise ValueError("Error occurred while getting chemicals from URLs") from e
133
 
134
- def get_empty_state(self):
135
-
136
- """ Create empty Knowledge base"""
137
-
138
- return {"knowledge_base": None}
139
-
140
  def create_knowledge_base(self,docs):
141
 
142
  """Create a knowledge base from the given documents.
@@ -165,44 +163,11 @@ class ChemicalIdentifier:
165
  # Return the resulting knowledge base
166
  return knowledge_base
167
 
 
 
 
168
 
169
- def upload_file(self,file_paths):
170
- """Upload a file and create a knowledge base from its contents.
171
- Args:
172
- file_paths : The files to uploaded.
173
- Returns:
174
- tuple: A tuple containing the file name and the knowledge base.
175
- """
176
-
177
- file_paths = [single_file_path.name for single_file_path in file_paths]
178
-
179
- loaders = [UnstructuredFileLoader(file_obj, strategy="fast") for file_obj in file_paths]
180
-
181
- # Load the contents of the file using the loader
182
- docs = []
183
- for loader in loaders:
184
- docs.extend(loader.load())
185
-
186
- # Create a knowledge base from the loaded documents using the create_knowledge_base() method
187
- knowledge_base = self.create_knowledge_base(docs)
188
-
189
-
190
- # Return a tuple containing the file name and the knowledge base
191
- return file_paths, {"knowledge_base": knowledge_base}
192
-
193
-
194
-
195
- def answer_question(self,urls, state):
196
- """Answer a question based on the current knowledge base.
197
- Args:
198
- state (dict): The current state containing the knowledge base.
199
- Returns:
200
- str: The answer to the question.
201
- """
202
-
203
- result = self.get_chemicals(urls)
204
- # Retrieve the knowledge base from the state dictionary
205
- knowledge_base = state["knowledge_base"]
206
 
207
  # Set the question for which we want to find the answer
208
  question = "Identify the Chemical Capabilities Only"
@@ -229,90 +194,95 @@ class ChemicalIdentifier:
229
  # Run the question-answering chain on the input documents and question
230
  response = chain.run(input_documents=docs, question=question)
231
 
232
- Answer = response+"\n"+result
233
-
234
  # Return the response as the answer to the question
235
- return Answer
236
-
237
-
238
- def extract_excel_data(self,file_path):
239
- # Read the Excel file
240
- df = pd.read_excel(file_path)
241
-
242
- # Flatten the data to a single list
243
- data_list = []
244
- for _, row in df.iterrows():
245
- data_list.extend(row.tolist())
246
-
247
- return data_list
248
-
249
- def comparing_chemicals(self,urls,state):
250
- chemicals = self.answer_question(urls,state)
251
- excel_file_path = "Capability.xlsx"
252
- chemistry_capability = self.extract_excel_data(excel_file_path)
253
- response = openai.Completion.create(
254
- engine="text-davinci-003",
255
- prompt= f"""Analyse the following text delimited by triple backticks to return the comman chemicals.
256
- text : ```{chemicals} {chemistry_capability}```.
257
- result should be in bullet points format.
258
- """,
259
- max_tokens=300,
260
- n=1,
261
- stop=None,
262
- temperature=0,
263
- top_p=1.0,
264
- frequency_penalty=0.0,
265
- presence_penalty=0.0
266
- )
267
 
268
- result = response.choices[0].text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
- return result
271
 
272
  def gradio_interface(self)->None:
273
  """
274
  Starts the Gradio interface for chemical identification.
275
  """
276
 
277
- try:
278
- with gr.Blocks(css="style.css",theme='karthikeyan-adople/hudsonhayes-dark1') as demo:
279
- gr.HTML("""<center><img src="https://hudsonandhayes.co.uk/wp-content/uploads/2023/01/Group-479.svg" height="110px" width="280px"></center>""")
280
- state = gr.State(self.get_empty_state())
281
- gr.HTML("""<center><h1 style="color:#fff">Chemical Identifier</h1></center>""")
282
-
283
- with gr.Column(elem_id="col-container"):
284
- with gr.Row(elem_id="row-flex"):
285
- url = gr.Textbox(label="URL")
286
- with gr.Row(elem_id="row-flex"):
287
- with gr.Accordion("Upload Files", open = False):
288
- with gr.Row():
289
- with gr.Column(scale=0.90, min_width=160):
290
- file_output = gr.File()
291
- with gr.Column(scale=0.10, min_width=160):
292
- upload_button = gr.UploadButton(
293
- "Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
294
- file_count = "multiple",variant="primary")
295
- load_pdf = gr.Button("Load PDF")
296
- status = gr.Textbox(label="Status", placeholder="", interactive=False)
297
-
298
- with gr.Row():
299
- with gr.Column(scale=1, min_width=0):
300
- compare_btn = gr.Button(value="Analyse",variant="primary")
301
- with gr.Row():
302
- with gr.Column(scale=1, min_width=0):
303
- compared_result = gr.Textbox(value="",label='Chemicals :',show_label=True, placeholder="",lines=10)
304
-
305
-
306
- upload_button.upload(self.upload_file, upload_button, [file_output,state])
307
-
308
- compare_btn.click(self.comparing_chemicals,[url,state],compared_result)
309
- demo.launch()
310
 
311
- except Exception as e:
312
- self.logger.error("Error occurred while launching Gradio interface: %s", str(e))
313
- raise ValueError("Error occurred while launching Gradio interface") from e
314
 
315
  if __name__ == "__main__":
 
316
  logging.basicConfig(level=logging.DEBUG)
317
  chemical_identifier = ChemicalIdentifier()
318
  chemical_identifier.gradio_interface()
 
18
  import pandas as pd
19
  import re
20
 
21
+
22
+ # Create and Declare Global Varibale "result"
23
+ results = ''
24
+
25
  class ChemicalIdentifier:
26
+
27
  def __init__(self):
28
 
29
  openai.api_key = os.getenv("OPENAI_API_KEY")
 
35
  console_handler.setFormatter(formatter)
36
  self.logger.addHandler(console_handler)
37
 
38
+ def get_empty_state(self):
39
 
40
+ """ Create empty Knowledge base"""
41
+
42
+ return {"knowledge_base": None}
43
+
44
+ def get_content_from_url(self,url:str)->List:
45
  """
46
  Uploads a file from a given URL and returns the loaded document.
 
47
  Args:
48
  url (str): The URL of the file to be uploaded.
 
49
  Returns:
50
  Document: The loaded document.
 
51
  Raises:
52
  ValueError: If the URL is not valid or the file cannot be fetched.
53
  """
 
76
  raise ValueError("Error occurred while uploading the file") from e
77
 
78
 
79
+ def extract_chemical_names(self,text:str)->str:
80
  """
81
  Extracts chemical names from the given text.
 
82
  Args:
83
  text (str): The text to extract chemical names from.
 
84
  Returns:
85
  str: The extracted chemical names in bullet form.
 
86
  Raises:
87
  ValueError: If an error occurs during the extraction process.
88
  """
89
 
90
  try:
91
+ prompt = f"Identify the Chemical Names Only give text in bullet form {text}. Don't Generate any extra chemicals apart from given text"
92
  response = openai.Completion.create(
93
  model="text-davinci-003",
94
  prompt=prompt,
 
108
  raise ValueError("Error occurred while finding chemicals") from e
109
 
110
 
111
+ def get_chemicals_for_url(self,urls:str)->str:
112
  """
113
  Retrieves chemicals from the provided URLs.
114
 
 
125
  try:
126
  total_chemical=[]
127
  for url in urls.split(','):
128
+ webpage_text = self.get_content_from_url(url)
129
+ chemicals = self.extract_chemical_names(webpage_text)
130
+ total_chemical.append(str(url)+"\n"+chemicals+"\n\n")
131
  list_of_chemicals = "".join(total_chemical)
132
  return list_of_chemicals
133
 
 
135
  self.logger.error("Error occurred while getting chemicals from URLs: %s", str(e))
136
  raise ValueError("Error occurred while getting chemicals from URLs") from e
137
 
 
 
 
 
 
 
138
  def create_knowledge_base(self,docs):
139
 
140
  """Create a knowledge base from the given documents.
 
163
  # Return the resulting knowledge base
164
  return knowledge_base
165
 
166
+ def file_path_show(self,file_paths):
167
+ file_paths = [single_file_path.name for single_file_path in file_paths]
168
+ return file_paths
169
 
170
+ def get_chemicals_for_file(self,state,knowledge_base):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  # Set the question for which we want to find the answer
173
  question = "Identify the Chemical Capabilities Only"
 
194
  # Run the question-answering chain on the input documents and question
195
  response = chain.run(input_documents=docs, question=question)
196
 
 
 
197
  # Return the response as the answer to the question
198
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
+ def identify_chemicals_in_files(self,file_paths,state):
201
+ """Upload a file and create a knowledge base from its contents.
202
+ Args:
203
+ file_paths : The files to uploaded.
204
+ Returns:
205
+ tuple: A tuple containing the file name and the knowledge base.
206
+ """
207
+
208
+
209
+ file_paths = [single_file_path.name for single_file_path in file_paths]
210
+
211
+ for file_obj in file_paths:
212
+
213
+ loader = UnstructuredFileLoader(file_obj, strategy="fast")
214
+
215
+ # Load the contents of the file using the loader
216
+ docs =loader.load()
217
+
218
+ # Create a knowledge base from the loaded documents using the create_knowledge_base() method
219
+ knowledge_base = self.create_knowledge_base(docs)
220
+
221
+ pdf_name = os.path.basename(file_obj)
222
+ global results
223
+ final_ans = self.get_chemicals_for_file(state,knowledge_base)
224
+ results += pdf_name+"\n"+final_ans+"\n\n"
225
+
226
+ # Return a tuple containing the file name and the knowledge base
227
+ return results
228
+
229
+ def get_final_result(self,urls,file_paths,state):
230
+
231
+ if urls:
232
+ if file_paths:
233
+ urls_chemicals = self.get_chemicals_for_url(urls)
234
+ file_chemicals = self.identify_chemicals_in_files(file_paths,state)
235
+ chemicals = urls_chemicals + file_chemicals
236
+
237
+ return chemicals
238
+ else:
239
+ urls_chemicals = self.get_chemicals_for_url(urls)
240
+ return urls_chemicals
241
+ elif file_paths:
242
+ file_chemicals = self.identify_chemicals_in_files(file_paths,state)
243
+ return file_chemicals
244
+ else:
245
+ return "No Files Uploaded"
246
 
 
247
 
248
  def gradio_interface(self)->None:
249
  """
250
  Starts the Gradio interface for chemical identification.
251
  """
252
 
253
+ with gr.Blocks(css="style.css",theme='karthikeyan-adople/hudsonhayes-dark1') as demo:
254
+ gr.HTML("""<center><img src="https://hudsonandhayes.co.uk/wp-content/uploads/2023/01/Group-479.svg" height="110px" width="280px"></center>""")
255
+ state = gr.State(self.get_empty_state())
256
+ gr.HTML("""<center><h1 style="color:#fff">Chemical Identifier</h1></center>""")
257
+
258
+ with gr.Column(elem_id="col-container"):
259
+ with gr.Row(elem_id="row-flex"):
260
+ url = gr.Textbox(label="URL")
261
+ with gr.Row(elem_id="row-flex"):
262
+ with gr.Accordion("Upload Files", open = False):
263
+ with gr.Row():
264
+ with gr.Column(scale=0.90, min_width=160):
265
+ file_output = gr.File()
266
+ with gr.Column(scale=0.10, min_width=160):
267
+ upload_button = gr.UploadButton(
268
+ "Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
269
+ file_count = "multiple",variant="primary")
270
+ with gr.Row():
271
+ with gr.Column(scale=1, min_width=0):
272
+ compare_btn = gr.Button(value="Analyse",variant="primary")
273
+ with gr.Row():
274
+ with gr.Column(scale=1, min_width=0):
275
+ compared_result = gr.Textbox(value="",label='Chemicals :',show_label=True, placeholder="",lines=10)
276
+
277
+ upload_button.upload(self.file_path_show, upload_button, [file_output])
278
+
279
+ compare_btn.click(self.get_final_result,[url,upload_button,state],compared_result)
280
+
281
+ demo.launch()
 
 
 
 
282
 
 
 
 
283
 
284
  if __name__ == "__main__":
285
+
286
  logging.basicConfig(level=logging.DEBUG)
287
  chemical_identifier = ChemicalIdentifier()
288
  chemical_identifier.gradio_interface()