timeki commited on
Commit
0c4d82b
·
2 Parent(s): 7335378 7d9ec3d

Merge branch 'main' into feature/graph_recommandation

Browse files
.gitignore CHANGED
@@ -10,3 +10,4 @@ notebooks/
10
  **/.flashrank_cache/
11
 
12
  data/
 
 
10
  **/.flashrank_cache/
11
 
12
  data/
13
+ sandbox/
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🌍
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 4.19.1
8
  app_file: app.py
9
  fullWidth: true
10
  pinned: false
 
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.0.2
8
  app_file: app.py
9
  fullWidth: true
10
  pinned: false
app.py CHANGED
@@ -33,7 +33,7 @@ from collections import defaultdict
33
  # ClimateQ&A imports
34
  from climateqa.engine.llm import get_llm
35
  from climateqa.engine.vectorstore import get_pinecone_vectorstore
36
- from climateqa.knowledge.retriever import ClimateQARetriever
37
  from climateqa.engine.reranker import get_reranker
38
  from climateqa.engine.embeddings import get_embeddings_function
39
  from climateqa.engine.chains.prompts import audience_prompts
@@ -47,6 +47,8 @@ from climateqa.engine.embeddings import get_embeddings_function
47
 
48
  from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox,generate_html_graphs
49
 
 
 
50
  # Load environment variables in local mode
51
  try:
52
  from dotenv import load_dotenv
@@ -88,13 +90,12 @@ share_client = service.get_share_client(file_share_name)
88
  user_id = create_user_id()
89
 
90
 
91
- embeddings_function = get_embeddings_function()
92
- llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
93
- reranker = get_reranker("nano")
94
 
95
  # Create vectorstore and retriever
96
  vectorstore = get_pinecone_vectorstore(embeddings_function)
97
- vectorstore_graphs = Chroma(persist_directory="/home/tim/ai4s/climate_qa/climate-question-answering/data/vectorstore_owid", embedding_function=embeddings_function)
 
 
98
 
99
  # agent = make_graph_agent(llm,vectorstore,reranker)
100
  agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, reranker=reranker)
@@ -140,6 +141,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
140
  gallery = []
141
  updates = []
142
  start_streaming = False
 
143
 
144
  steps_display = {
145
  "categorize_intent":("🔄️ Analyzing user message",True),
@@ -151,11 +153,6 @@ async def chat(query,history,audience,sources,reports,current_graphs):
151
  answer_message_content = ""
152
  try:
153
  async for event in result:
154
-
155
- # if event["event"] == "on_chat_model_stream" and event["metadata"]["langgraph_node"] in ["answer_rag", "answer_rag_no_docs", "answer_chitchat", "answer_ai_impact"]:
156
- # if start_streaming == False:
157
- # start_streaming = True
158
- # history[-1] = (query,"")
159
  if "langgraph_node" in event["metadata"]:
160
  node = event["metadata"]["langgraph_node"]
161
 
@@ -163,10 +160,12 @@ async def chat(query,history,audience,sources,reports,current_graphs):
163
  try:
164
  docs = event["data"]["output"]["documents"]
165
  docs_html = []
166
- for i, d in enumerate(docs, 1):
167
- docs_html.append(make_html_source(d, i))
 
 
168
 
169
- used_documents = used_documents + [d.metadata["name"] for d in docs]
170
  history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
171
 
172
  docs_html = "".join(docs_html)
@@ -180,15 +179,15 @@ async def chat(query,history,audience,sources,reports,current_graphs):
180
  if not hasattr(history[-1], 'metadata') or history[-1].metadata["title"] != event_description: # if a new step begins
181
  history.append(ChatMessage(role="assistant", content = "", metadata={'title' :event_description}))
182
 
183
- elif event["name"] != "transform_query" and event["event"] == "on_chat_model_stream" and node in ["answer_rag", "answer_search"]:# if streaming answer
184
  if start_streaming == False:
185
  start_streaming = True
186
  history.append(ChatMessage(role="assistant", content = ""))
187
  answer_message_content += event["data"]["chunk"].content
188
  answer_message_content = parse_output_llm_with_sources(answer_message_content)
189
  history[-1] = ChatMessage(role="assistant", content = answer_message_content)
190
-
191
-
192
  elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
193
  try:
194
  recommended_content = event["data"]["output"]["recommended_content"]
@@ -239,116 +238,9 @@ async def chat(query,history,audience,sources,reports,current_graphs):
239
 
240
  except Exception as e:
241
  print(f"Error getting graphs: {e}")
242
-
243
-
244
- # history.append(ChatMessage(role="assistant", content = new_message_content))
245
-
246
- # if docs_used is True and event["metadata"]["langgraph_node"] in ["answer_rag_no_docs", "answer_chitchat", "answer_ai_impact"]:
247
- # docs_used = False
248
-
249
- # elif docs_used is True and event["name"] == "retrieve_documents" and event["event"] == "on_chain_end":
250
- # try:
251
- # docs = event["data"]["output"]["documents"]
252
- # docs_html = []
253
- # for i, d in enumerate(docs, 1):
254
- # docs_html.append(make_html_source(d, i))
255
- # docs_html = "".join(docs_html)
256
-
257
- # except Exception as e:
258
- # print(f"Error getting documents: {e}")
259
- # print(event)
260
-
261
- # # elif event["name"] == "retrieve_documents" and event["event"] == "on_chain_start":
262
- # # print(event)
263
- # # questions = event["data"]["input"]["questions"]
264
- # # questions = "\n".join([f"{i+1}. {q['question']} ({q['source']})" for i,q in enumerate(questions)])
265
- # # answer_yet = "🔄️ Searching in the knowledge base\n{questions}"
266
- # # history[-1] = (query,answer_yet)
267
-
268
- # elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
269
- # try:
270
- # recommended_content = event["data"]["output"]["recommended_content"]
271
- # # graphs = [
272
- # # {
273
- # # "embedding": x.metadata["returned_content"],
274
- # # "metadata": {
275
- # # "source": x.metadata["source"],
276
- # # "category": x.metadata["category"]
277
- # # }
278
- # # } for x in recommended_content if x.metadata["source"] == "OWID"
279
- # # ]
280
-
281
- # unique_graphs = []
282
- # seen_embeddings = set()
283
-
284
- # for x in recommended_content:
285
- # embedding = x.metadata["returned_content"]
286
-
287
- # # Check if the embedding has already been seen
288
- # if embedding not in seen_embeddings:
289
- # unique_graphs.append({
290
- # "embedding": embedding,
291
- # "metadata": {
292
- # "source": x.metadata["source"],
293
- # "category": x.metadata["category"]
294
- # }
295
- # })
296
- # # Add the embedding to the seen set
297
- # seen_embeddings.add(embedding)
298
-
299
-
300
- # categories = {}
301
- # for graph in unique_graphs:
302
- # category = graph['metadata']['category']
303
- # if category not in categories:
304
- # categories[category] = []
305
- # categories[category].append(graph['embedding'])
306
-
307
- # # graphs_html = ""
308
- # for category, embeddings in categories.items():
309
- # # graphs_html += f"<h3>{category}</h3>"
310
- # # current_graphs.append(f"<h3>{category}</h3>")
311
- # for embedding in embeddings:
312
- # current_graphs.append([embedding, category])
313
- # # graphs_html += f"<div>{embedding}</div>"
314
-
315
- # except Exception as e:
316
- # print(f"Error getting graphs: {e}")
317
-
318
- # for event_name,(event_description,display_output) in steps_display.items():
319
- # if event["name"] == event_name:
320
- # if event["event"] == "on_chain_start":
321
- # # answer_yet = f"<p><span class='loader'></span>{event_description}</p>"
322
- # # answer_yet = make_toolbox(event_description, "", checked = False)
323
- # answer_yet = event_description
324
-
325
- # history[-1] = (query,answer_yet)
326
- # # elif event["event"] == "on_chain_end":
327
- # # answer_yet = ""
328
- # # history[-1] = (query,answer_yet)
329
- # # if display_output:
330
- # # print(event["data"]["output"])
331
-
332
- # # if op['path'] == path_reformulation: # reforulated question
333
- # # try:
334
- # # output_language = op['value']["language"] # str
335
- # # output_query = op["value"]["question"]
336
- # # except Exception as e:
337
- # # raise gr.Error(f"ClimateQ&A Error: {e} - The error has been noted, try another question and if the error remains, you can contact us :)")
338
-
339
- # # if op["path"] == path_keywords:
340
- # # try:
341
- # # output_keywords = op['value']["keywords"] # str
342
- # # output_keywords = " AND ".join(output_keywords)
343
- # # except Exception as e:
344
- # # pass
345
 
346
 
347
 
348
- # history = [tuple(x) for x in history]
349
- # yield history,docs_html,output_query,output_language,gallery,current_graphs #,output_query,output_keywords
350
-
351
-
352
  if event["name"] == "transform_query" and event["event"] =="on_chain_end":
353
  if hasattr(history[-1],"content"):
354
  history[-1].content += "Decompose question into sub-questions: \n\n - " + "\n - ".join([q["question"] for q in event["data"]["output"]["remaining_questions"]])
@@ -356,7 +248,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
356
  if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
357
  print("X")
358
 
359
- yield history,docs_html,output_query,output_language,gallery, current_graphs #,output_query,output_keywords
360
 
361
  except Exception as e:
362
  print(event, "has failed")
@@ -368,7 +260,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
368
  if os.getenv("GRADIO_ENV") != "local":
369
  timestamp = str(datetime.now().timestamp())
370
  file = timestamp + ".json"
371
- prompt = history[-1][0]
372
  logs = {
373
  "user_id": str(user_id),
374
  "prompt": prompt,
@@ -376,7 +268,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
376
  "question":output_query,
377
  "sources":sources,
378
  "docs":serialize_docs(docs),
379
- "answer": history[-1][1],
380
  "time": timestamp,
381
  }
382
  log_on_azure(file, logs, share_client)
@@ -384,12 +276,49 @@ async def chat(query,history,audience,sources,reports,current_graphs):
384
  print(f"Error logging on Azure Blob Storage: {e}")
385
  raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
386
 
387
- image_dict = {}
388
- for i,doc in enumerate(docs):
 
 
 
389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  if doc.metadata["chunk_type"] == "image":
391
  try:
392
  key = f"Image {i+1}"
 
393
  image_path = doc.metadata["image_path"].split("documents/")[1]
394
  img = get_image_from_azure_blob_storage(image_path)
395
 
@@ -397,45 +326,18 @@ async def chat(query,history,audience,sources,reports,current_graphs):
397
  buffered = BytesIO()
398
  img.save(buffered, format="PNG")
399
  img_str = base64.b64encode(buffered.getvalue()).decode()
 
 
 
 
400
 
401
- # Embedding the base64 string in Markdown
402
- markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
403
- image_dict[key] = {"img":img,"md":markdown_image,"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"]}
404
  except Exception as e:
405
  print(f"Skipped adding image {i} because of {e}")
 
 
 
406
 
407
- if len(image_dict) > 0:
408
-
409
- gallery = [x["img"] for x in list(image_dict.values())]
410
- img = list(image_dict.values())[0]
411
- img_md = img["md"]
412
- img_caption = img["caption"]
413
- img_code = img["figure_code"]
414
- if img_code != "N/A":
415
- img_name = f"{img['key']} - {img['figure_code']}"
416
- else:
417
- img_name = f"{img['key']}"
418
-
419
- history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
420
-
421
- # print(f"\n\nImages:\n{gallery}")
422
-
423
- # # gallery = [x.metadata["image_path"] for x in docs if (len(x.metadata["image_path"]) > 0 and "IAS" in x.metadata["image_path"])]
424
- # # if len(gallery) > 0:
425
- # # gallery = list(set("|".join(gallery).split("|")))
426
- # # gallery = [get_image_from_azure_blob_storage(x) for x in gallery]
427
-
428
- # yield history,docs_html,output_query,output_language,gallery,current_graphs #,output_query,output_keywords
429
-
430
-
431
-
432
- # # else:
433
- # # docs_string = "No relevant passages found in the climate science reports (IPCC and IPBES)"
434
- # # complete_response = "**No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate issues).**"
435
- # # messages.append({"role": "assistant", "content": complete_response})
436
- # # gradio_format = make_pairs([a["content"] for a in messages[1:]])
437
- # # yield gradio_format, messages, docs_string
438
- yield history,docs_html,output_query,output_language,gallery, current_graphs#,output_query,output_keywords
439
 
440
 
441
  def save_feedback(feed: str, user_id):
@@ -498,6 +400,10 @@ Hello, I am ClimateQ&A, a conversational assistant designed to help you understa
498
  ⚠️ Limitations
499
  *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
500
 
 
 
 
 
501
  What do you want to learn ?
502
  """
503
 
@@ -517,8 +423,7 @@ def save_graph(saved_graphs_state, embedding, category):
517
  return saved_graphs_state, gr.Button("Graph Saved")
518
 
519
 
520
- # with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main-component") as demo:
521
- # user_id_state = gr.State([user_id])
522
 
523
  # chat_completed_state = gr.State(0)
524
  # current_graphs = gr.State([])
@@ -532,7 +437,6 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
532
 
533
  with gr.Row(elem_id="chatbot-row"):
534
  with gr.Column(scale=2):
535
- # state = gr.State([system_template])
536
  chatbot = gr.Chatbot(
537
  value = [ChatMessage(role="assistant", content=init_prompt)],
538
  type = "messages",
@@ -541,6 +445,8 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
541
  elem_id="chatbot",
542
  layout = "panel",
543
  avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
 
 
544
  )
545
 
546
  # bot.like(vote,None,None)
@@ -585,6 +491,9 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
585
  with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
586
  sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
587
  docs_textbox = gr.State("")
 
 
 
588
 
589
  # with Modal(visible = False) as config_modal:
590
  with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
@@ -656,6 +565,10 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
656
  # )
657
 
658
 
 
 
 
 
659
 
660
  #---------------------------------------------------------------------------------------
661
  # OTHER TABS
@@ -752,7 +665,7 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
752
  # history = history + [(query,None)]
753
  # history = [tuple(x) for x in history]
754
  history = history + [ChatMessage(role="user", content=query)]
755
- return (gr.update(interactive = False),gr.update(selected=3),history)
756
 
757
  def finish_chat():
758
  return (gr.update(interactive = True,value = ""),gr.update(selected=3))
@@ -765,10 +678,11 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
765
 
766
  (textbox
767
  .submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
768
- .then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
769
  .then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_textbox")
770
  .then(change_completion_status, [chat_completed_state], [chat_completed_state])
771
  # .then(lambda graphs : generate_html_graphs(graphs), [current_graphs], [graphs_container],)
 
772
  )
773
 
774
  (examples_hidden
@@ -777,7 +691,7 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
777
  # .then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_examples")
778
  # .then(change_completion_status, [chat_completed_state], [chat_completed_state])
779
  .change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
780
- .then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, current_graphs],concurrency_limit = 8,api_name = "chat_examples")
781
  .then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
782
  # .then(lambda graphs : graphs, [current_graphs], [graphs_container])
783
 
@@ -797,4 +711,4 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
797
 
798
  demo.queue()
799
 
800
- demo.launch(debug=True)
 
33
  # ClimateQ&A imports
34
  from climateqa.engine.llm import get_llm
35
  from climateqa.engine.vectorstore import get_pinecone_vectorstore
36
+ # from climateqa.knowledge.retriever import ClimateQARetriever
37
  from climateqa.engine.reranker import get_reranker
38
  from climateqa.engine.embeddings import get_embeddings_function
39
  from climateqa.engine.chains.prompts import audience_prompts
 
47
 
48
  from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox,generate_html_graphs
49
 
50
+ from front.utils import make_html_source, make_html_figure_sources,parse_output_llm_with_sources,serialize_docs,make_toolbox
51
+
52
  # Load environment variables in local mode
53
  try:
54
  from dotenv import load_dotenv
 
90
  user_id = create_user_id()
91
 
92
 
 
 
 
93
 
94
  # Create vectorstore and retriever
95
  vectorstore = get_pinecone_vectorstore(embeddings_function)
96
+ llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
97
+ reranker = get_reranker("large")
98
+ agent = make_graph_agent(llm,vectorstore,reranker)
99
 
100
  # agent = make_graph_agent(llm,vectorstore,reranker)
101
  agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, reranker=reranker)
 
141
  gallery = []
142
  updates = []
143
  start_streaming = False
144
+ figures = '<div class="figures-container"> <p> Go to the "Figures" tab at the top of the page to see full size images </p> </div>'
145
 
146
  steps_display = {
147
  "categorize_intent":("🔄️ Analyzing user message",True),
 
153
  answer_message_content = ""
154
  try:
155
  async for event in result:
 
 
 
 
 
156
  if "langgraph_node" in event["metadata"]:
157
  node = event["metadata"]["langgraph_node"]
158
 
 
160
  try:
161
  docs = event["data"]["output"]["documents"]
162
  docs_html = []
163
+ textual_docs = [d for d in docs if d.metadata["chunk_type"] == "text"]
164
+ for i, d in enumerate(textual_docs, 1):
165
+ if d.metadata["chunk_type"] == "text":
166
+ docs_html.append(make_html_source(d, i))
167
 
168
+ used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
169
  history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
170
 
171
  docs_html = "".join(docs_html)
 
179
  if not hasattr(history[-1], 'metadata') or history[-1].metadata["title"] != event_description: # if a new step begins
180
  history.append(ChatMessage(role="assistant", content = "", metadata={'title' :event_description}))
181
 
182
+ elif event["name"] != "transform_query" and event["event"] == "on_chat_model_stream" and node in ["answer_rag", "answer_search","answer_chitchat"]:# if streaming answer
183
  if start_streaming == False:
184
  start_streaming = True
185
  history.append(ChatMessage(role="assistant", content = ""))
186
  answer_message_content += event["data"]["chunk"].content
187
  answer_message_content = parse_output_llm_with_sources(answer_message_content)
188
  history[-1] = ChatMessage(role="assistant", content = answer_message_content)
189
+ # history.append(ChatMessage(role="assistant", content = new_message_content))
190
+
191
  elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
192
  try:
193
  recommended_content = event["data"]["output"]["recommended_content"]
 
238
 
239
  except Exception as e:
240
  print(f"Error getting graphs: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
 
243
 
 
 
 
 
244
  if event["name"] == "transform_query" and event["event"] =="on_chain_end":
245
  if hasattr(history[-1],"content"):
246
  history[-1].content += "Decompose question into sub-questions: \n\n - " + "\n - ".join([q["question"] for q in event["data"]["output"]["remaining_questions"]])
 
248
  if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
249
  print("X")
250
 
251
+ yield history,docs_html,output_query,output_language,gallery, figures, current_graphs #,output_query,output_keywords
252
 
253
  except Exception as e:
254
  print(event, "has failed")
 
260
  if os.getenv("GRADIO_ENV") != "local":
261
  timestamp = str(datetime.now().timestamp())
262
  file = timestamp + ".json"
263
+ prompt = history[1]["content"]
264
  logs = {
265
  "user_id": str(user_id),
266
  "prompt": prompt,
 
268
  "question":output_query,
269
  "sources":sources,
270
  "docs":serialize_docs(docs),
271
+ "answer": history[-1].content,
272
  "time": timestamp,
273
  }
274
  log_on_azure(file, logs, share_client)
 
276
  print(f"Error logging on Azure Blob Storage: {e}")
277
  raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
278
 
279
+
280
+
281
+
282
+ # image_dict = {}
283
+ # for i,doc in enumerate(docs):
284
 
285
+ # if doc.metadata["chunk_type"] == "image":
286
+ # try:
287
+ # key = f"Image {i+1}"
288
+ # image_path = doc.metadata["image_path"].split("documents/")[1]
289
+ # img = get_image_from_azure_blob_storage(image_path)
290
+
291
+ # # Convert the image to a byte buffer
292
+ # buffered = BytesIO()
293
+ # img.save(buffered, format="PNG")
294
+ # img_str = base64.b64encode(buffered.getvalue()).decode()
295
+
296
+ # # Embedding the base64 string in Markdown
297
+ # markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
298
+ # image_dict[key] = {"img":img,"md":markdown_image,"short_name": doc.metadata["short_name"],"figure_code":doc.metadata["figure_code"],"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"], "img_str" : img_str}
299
+ # except Exception as e:
300
+ # print(f"Skipped adding image {i} because of {e}")
301
+
302
+ # if len(image_dict) > 0:
303
+
304
+ # gallery = [x["img"] for x in list(image_dict.values())]
305
+ # img = list(image_dict.values())[0]
306
+ # img_md = img["md"]
307
+ # img_caption = img["caption"]
308
+ # img_code = img["figure_code"]
309
+ # if img_code != "N/A":
310
+ # img_name = f"{img['key']} - {img['figure_code']}"
311
+ # else:
312
+ # img_name = f"{img['key']}"
313
+
314
+ # history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
315
+
316
+ docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
317
+ for i, doc in enumerate(docs_figures):
318
  if doc.metadata["chunk_type"] == "image":
319
  try:
320
  key = f"Image {i+1}"
321
+
322
  image_path = doc.metadata["image_path"].split("documents/")[1]
323
  img = get_image_from_azure_blob_storage(image_path)
324
 
 
326
  buffered = BytesIO()
327
  img.save(buffered, format="PNG")
328
  img_str = base64.b64encode(buffered.getvalue()).decode()
329
+
330
+ figures = figures + make_html_figure_sources(doc, i, img_str)
331
+
332
+ gallery.append(img)
333
 
 
 
 
334
  except Exception as e:
335
  print(f"Skipped adding image {i} because of {e}")
336
+
337
+
338
+
339
 
340
+ yield history,docs_html,output_query,output_language,gallery, figures#,output_query,output_keywords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
 
343
  def save_feedback(feed: str, user_id):
 
400
  ⚠️ Limitations
401
  *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
402
 
403
+ 🛈 Information
404
+ Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
405
+
406
+
407
  What do you want to learn ?
408
  """
409
 
 
423
  return saved_graphs_state, gr.Button("Graph Saved")
424
 
425
 
426
+ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=theme,elem_id = "main-component") as demo:
 
427
 
428
  # chat_completed_state = gr.State(0)
429
  # current_graphs = gr.State([])
 
437
 
438
  with gr.Row(elem_id="chatbot-row"):
439
  with gr.Column(scale=2):
 
440
  chatbot = gr.Chatbot(
441
  value = [ChatMessage(role="assistant", content=init_prompt)],
442
  type = "messages",
 
445
  elem_id="chatbot",
446
  layout = "panel",
447
  avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
448
+ max_height="80vh",
449
+ height="100vh"
450
  )
451
 
452
  # bot.like(vote,None,None)
 
491
  with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
492
  sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
493
  docs_textbox = gr.State("")
494
+
495
+
496
+
497
 
498
  # with Modal(visible = False) as config_modal:
499
  with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
 
565
  # )
566
 
567
 
568
+ with gr.Tab("Figures",elem_id = "tab-figures",id = 3):
569
+ figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
570
+
571
+
572
 
573
  #---------------------------------------------------------------------------------------
574
  # OTHER TABS
 
665
  # history = history + [(query,None)]
666
  # history = [tuple(x) for x in history]
667
  history = history + [ChatMessage(role="user", content=query)]
668
+ return (gr.update(interactive = False),gr.update(selected=1),history)
669
 
670
  def finish_chat():
671
  return (gr.update(interactive = True,value = ""),gr.update(selected=3))
 
678
 
679
  (textbox
680
  .submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
681
+ .then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, figures_cards, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
682
  .then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_textbox")
683
  .then(change_completion_status, [chat_completed_state], [chat_completed_state])
684
  # .then(lambda graphs : generate_html_graphs(graphs), [current_graphs], [graphs_container],)
685
+
686
  )
687
 
688
  (examples_hidden
 
691
  # .then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_examples")
692
  # .then(change_completion_status, [chat_completed_state], [chat_completed_state])
693
  .change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
694
+ .then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, figures_cards, current_graphs],concurrency_limit = 8,api_name = "chat_examples")
695
  .then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
696
  # .then(lambda graphs : graphs, [current_graphs], [graphs_container])
697
 
 
711
 
712
  demo.queue()
713
 
714
+ demo.launch(ssr_mode=False)
climateqa/engine/chains/answer_ai_impact.py CHANGED
@@ -38,7 +38,6 @@ def make_ai_impact_chain(llm):
38
  def make_ai_impact_node(llm):
39
 
40
  ai_impact_chain = make_ai_impact_chain(llm)
41
-
42
 
43
  async def answer_ai_impact(state,config):
44
  answer = await ai_impact_chain.ainvoke({"question":state["user_input"]},config)
 
38
  def make_ai_impact_node(llm):
39
 
40
  ai_impact_chain = make_ai_impact_chain(llm)
 
41
 
42
  async def answer_ai_impact(state,config):
43
  answer = await ai_impact_chain.ainvoke({"question":state["user_input"]},config)
climateqa/engine/chains/intent_categorization.py CHANGED
@@ -7,34 +7,6 @@ from langchain_core.utils.function_calling import convert_to_openai_function
7
  from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
8
 
9
 
10
- # class IntentCategorizer(BaseModel):
11
- # """Analyzing the user message input"""
12
-
13
- # language: str = Field(
14
- # description="Find the language of the message input in full words (ex: French, English, Spanish, ...), defaults to English",
15
- # default="English",
16
- # )
17
- # intent: str = Field(
18
- # enum=[
19
- # "ai",
20
- # # "geo_info",
21
- # # "esg"
22
- # "search",
23
- # "chitchat",
24
- # ],
25
- # description="""
26
- # Categorize the user input in one of the following category
27
- # Any question
28
-
29
- # Examples:
30
- # - ai = any question related to AI: "What are the environmental consequences of AI", "How does AI affect the environment"
31
- # - search = Searching for any question about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers. Also questions about individual actions or anything loosely related to the environment.
32
- # - chitchat = Any chit chat or any question that is not related to the environment or climate change or for which it is not necessary to look for the answer in the IPCC, IPBES, IPOS or scientific reports.
33
- # """,
34
- # # - geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
35
- # # - esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
36
- # )
37
-
38
  class IntentCategorizer(BaseModel):
39
  """Analyzing the user message input"""
40
 
@@ -44,9 +16,9 @@ class IntentCategorizer(BaseModel):
44
  )
45
  intent: str = Field(
46
  enum=[
47
- "ai",
48
  # "geo_info",
49
- # "esg"
50
  "search",
51
  "chitchat",
52
  ],
@@ -55,12 +27,13 @@ class IntentCategorizer(BaseModel):
55
  Any question
56
 
57
  Examples:
58
- - ai = Any query related to Artificial Intelligence: "What are the environmental consequences of AI", "How does AI affect the environment"
59
  - search = Searching for any quesiton about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers,
60
  - chitchat = Any general question that is not related to the environment or climate change or just conversational, or if you don't think searching the IPCC or IPBES reports would be relevant
61
  """,
62
  # - geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
63
  # - esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
 
64
  )
65
 
66
 
@@ -71,7 +44,7 @@ def make_intent_categorization_chain(llm):
71
  llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"IntentCategorizer"})
72
 
73
  prompt = ChatPromptTemplate.from_messages([
74
- ("system", "You are a helpful assistant, you will analyze, and categorize the user input message using the function provided. Categorize the user input as ai ONLY if it is related to Artificial Intelligence, search if it is related to the environment, climate change, energy, biodiversity, nature, etc. and chitchat if it is just general conversation."),
75
  ("user", "input: {input}")
76
  ])
77
 
 
7
  from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
8
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  class IntentCategorizer(BaseModel):
11
  """Analyzing the user message input"""
12
 
 
16
  )
17
  intent: str = Field(
18
  enum=[
19
+ "ai_impact",
20
  # "geo_info",
21
+ # "esg",
22
  "search",
23
  "chitchat",
24
  ],
 
27
  Any question
28
 
29
  Examples:
30
+ - ai_impact = Environmental impacts of AI: "What are the environmental impacts of AI", "How does AI affect the environment"
31
  - search = Searching for any quesiton about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers,
32
  - chitchat = Any general question that is not related to the environment or climate change or just conversational, or if you don't think searching the IPCC or IPBES reports would be relevant
33
  """,
34
  # - geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
35
  # - esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
36
+
37
  )
38
 
39
 
 
44
  llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"IntentCategorizer"})
45
 
46
  prompt = ChatPromptTemplate.from_messages([
47
+ ("system", "You are a helpful assistant, you will analyze, translate and categorize the user input message using the function provided. Categorize the user input as ai ONLY if it is related to Artificial Intelligence, search if it is related to the environment, climate change, energy, biodiversity, nature, etc. and chitchat if it is just general conversation."),
48
  ("user", "input: {input}")
49
  ])
50
 
climateqa/engine/chains/query_transformation.py CHANGED
@@ -161,6 +161,7 @@ def make_query_transform_node(llm,k_final=15):
161
  question_state = {"question":question}
162
  analysis_output = rewriter_chain.invoke({"input":question})
163
 
 
164
  # The case when the llm does not return any sources
165
  if not analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS"] for source in analysis_output["sources"]):
166
  analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]
 
161
  question_state = {"question":question}
162
  analysis_output = rewriter_chain.invoke({"input":question})
163
 
164
+ # TODO WARNING llm should always return smthg
165
  # The case when the llm does not return any sources
166
  if not analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS"] for source in analysis_output["sources"]):
167
  analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]
climateqa/engine/chains/retrieve_documents.py CHANGED
@@ -84,11 +84,13 @@ def make_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_fina
84
  # # Option 2 - Get 100/n documents by question and rerank the total
85
  # if rerank_by_question:
86
  # k_by_question = divide_into_parts(k_final,len(questions))
 
 
 
 
87
 
88
- # docs = state["documents"]
89
- # if docs is None: docs = []
90
-
91
- docs = []
92
  k_by_question = k_final // state["n_questions"]
93
 
94
  sources = current_question["sources"]
 
84
  # # Option 2 - Get 100/n documents by question and rerank the total
85
  # if rerank_by_question:
86
  # k_by_question = divide_into_parts(k_final,len(questions))
87
+ if "documents" in state and state["documents"] is not None:
88
+ docs = state["documents"]
89
+ else:
90
+ docs = []
91
 
92
+
93
+
 
 
94
  k_by_question = k_final // state["n_questions"]
95
 
96
  sources = current_question["sources"]
climateqa/engine/graph.py CHANGED
@@ -92,10 +92,9 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
92
  transform_query = make_query_transform_node(llm)
93
  translate_query = make_translation_node(llm)
94
  answer_chitchat = make_chitchat_node(llm)
95
- # answer_ai_impact = make_ai_impact_node(llm)
96
  retrieve_documents = make_retriever_node(vectorstore_ipcc, reranker, llm)
97
  retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
98
- # answer_rag_graph = make_rag_graph_node(llm)
99
  answer_rag = make_rag_node(llm, with_docs=True)
100
  answer_rag_no_docs = make_rag_node(llm, with_docs=False)
101
  chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
@@ -190,142 +189,3 @@ def display_graph(app):
190
  )
191
  )
192
  )
193
-
194
- # import sys
195
- # import os
196
- # from contextlib import contextmanager
197
-
198
- # from langchain.schema import Document
199
- # from langgraph.graph import END, StateGraph
200
- # from langchain_core.runnables.graph import CurveStyle, NodeColors, MermaidDrawMethod
201
-
202
- # from typing_extensions import TypedDict
203
- # from typing import List
204
-
205
- # from IPython.display import display, HTML, Image
206
-
207
- # from .chains.answer_chitchat import make_chitchat_node
208
- # from .chains.answer_ai_impact import make_ai_impact_node
209
- # from .chains.query_transformation import make_query_transform_node
210
- # from .chains.translation import make_translation_node
211
- # from .chains.intent_categorization import make_intent_categorization_node
212
- # from .chains.retriever import make_retriever_node
213
- # from .chains.answer_rag import make_rag_node
214
-
215
-
216
- # class GraphState(TypedDict):
217
- # """
218
- # Represents the state of our graph.
219
- # """
220
- # user_input : str
221
- # language : str
222
- # intent : str
223
- # query: str
224
- # questions : List[dict]
225
- # answer: str
226
- # audience: str = "experts"
227
- # sources_input: List[str] = ["auto"]
228
- # documents: List[Document]
229
-
230
- # def search(state):
231
- # return {}
232
-
233
- # def route_intent(state):
234
- # intent = state["intent"]
235
- # if intent in ["chitchat","esg"]:
236
- # return "answer_chitchat"
237
- # elif intent == "ai_impact":
238
- # return "answer_ai_impact"
239
- # else:
240
- # # Search route
241
- # return "search"
242
-
243
- # def route_translation(state):
244
- # if state["language"].lower() == "english":
245
- # return "transform_query"
246
- # else:
247
- # return "translate_query"
248
-
249
- # def route_based_on_relevant_docs(state,threshold_docs=0.2):
250
- # docs = [x for x in state["documents"] if x.metadata["reranking_score"] > threshold_docs]
251
- # if len(docs) > 0:
252
- # return "answer_rag"
253
- # else:
254
- # return "answer_rag_no_docs"
255
-
256
-
257
- # def make_id_dict(values):
258
- # return {k:k for k in values}
259
-
260
- # def make_graph_agent(llm,vectorstore,reranker,threshold_docs = 0.2):
261
-
262
- # workflow = StateGraph(GraphState)
263
-
264
- # # Define the node functions
265
- # categorize_intent = make_intent_categorization_node(llm)
266
- # transform_query = make_query_transform_node(llm)
267
- # translate_query = make_translation_node(llm)
268
- # answer_chitchat = make_chitchat_node(llm)
269
- # answer_ai_impact = make_ai_impact_node(llm)
270
- # retrieve_documents = make_retriever_node(vectorstore,reranker)
271
- # answer_rag = make_rag_node(llm,with_docs=True)
272
- # answer_rag_no_docs = make_rag_node(llm,with_docs=False)
273
-
274
- # # Define the nodes
275
- # workflow.add_node("categorize_intent", categorize_intent)
276
- # workflow.add_node("search", search)
277
- # workflow.add_node("transform_query", transform_query)
278
- # workflow.add_node("translate_query", translate_query)
279
- # workflow.add_node("answer_chitchat", answer_chitchat)
280
- # workflow.add_node("answer_ai_impact", answer_ai_impact)
281
- # workflow.add_node("retrieve_documents",retrieve_documents)
282
- # workflow.add_node("answer_rag",answer_rag)
283
- # workflow.add_node("answer_rag_no_docs",answer_rag_no_docs)
284
-
285
- # # Entry point
286
- # workflow.set_entry_point("categorize_intent")
287
-
288
- # # CONDITIONAL EDGES
289
- # workflow.add_conditional_edges(
290
- # "categorize_intent",
291
- # route_intent,
292
- # make_id_dict(["answer_chitchat","answer_ai_impact","search"])
293
- # )
294
-
295
- # workflow.add_conditional_edges(
296
- # "search",
297
- # route_translation,
298
- # make_id_dict(["translate_query","transform_query"])
299
- # )
300
-
301
- # workflow.add_conditional_edges(
302
- # "retrieve_documents",
303
- # lambda x : route_based_on_relevant_docs(x,threshold_docs=threshold_docs),
304
- # make_id_dict(["answer_rag","answer_rag_no_docs"])
305
- # )
306
-
307
- # # Define the edges
308
- # workflow.add_edge("translate_query", "transform_query")
309
- # workflow.add_edge("transform_query", "retrieve_documents")
310
- # workflow.add_edge("retrieve_documents", "answer_rag")
311
- # workflow.add_edge("answer_rag", END)
312
- # workflow.add_edge("answer_rag_no_docs", END)
313
- # workflow.add_edge("answer_chitchat", END)
314
- # workflow.add_edge("answer_ai_impact", END)
315
-
316
- # # Compile
317
- # app = workflow.compile()
318
- # return app
319
-
320
-
321
-
322
-
323
- # def display_graph(app):
324
-
325
- # display(
326
- # Image(
327
- # app.get_graph(xray = True).draw_mermaid_png(
328
- # draw_method=MermaidDrawMethod.API,
329
- # )
330
- # )
331
- # )
 
92
  transform_query = make_query_transform_node(llm)
93
  translate_query = make_translation_node(llm)
94
  answer_chitchat = make_chitchat_node(llm)
95
+ answer_ai_impact = make_ai_impact_node(llm)
96
  retrieve_documents = make_retriever_node(vectorstore_ipcc, reranker, llm)
97
  retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
 
98
  answer_rag = make_rag_node(llm, with_docs=True)
99
  answer_rag_no_docs = make_rag_node(llm, with_docs=False)
100
  chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
 
189
  )
190
  )
191
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
climateqa/engine/llm/openai.py CHANGED
@@ -7,7 +7,7 @@ try:
7
  except Exception:
8
  pass
9
 
10
- def get_llm(model="gpt-3.5-turbo-0125",max_tokens=1024, temperature=0.0, streaming=True,timeout=30, **kwargs):
11
 
12
  llm = ChatOpenAI(
13
  model=model,
 
7
  except Exception:
8
  pass
9
 
10
+ def get_llm(model="gpt-4o-mini",max_tokens=1024, temperature=0.0, streaming=True,timeout=30, **kwargs):
11
 
12
  llm = ChatOpenAI(
13
  model=model,
climateqa/engine/reranker.py CHANGED
@@ -6,7 +6,7 @@ from sentence_transformers import CrossEncoder
6
 
7
  load_dotenv()
8
 
9
- def get_reranker(model = "jina", cohere_api_key = None):
10
 
11
  assert model in ["nano","tiny","small","large", "jina"]
12
 
@@ -34,7 +34,6 @@ def rerank_docs(reranker,docs,query):
34
  # Get a list of texts from langchain docs
35
  input_docs = [x.page_content for x in docs]
36
 
37
- print(f"\n\nDOCS:{input_docs}\n\n")
38
  # Rerank using rerankers library
39
  results = reranker.rank(query=query, docs=input_docs)
40
 
 
6
 
7
  load_dotenv()
8
 
9
+ def get_reranker(model = "nano", cohere_api_key = None):
10
 
11
  assert model in ["nano","tiny","small","large", "jina"]
12
 
 
34
  # Get a list of texts from langchain docs
35
  input_docs = [x.page_content for x in docs]
36
 
 
37
  # Rerank using rerankers library
38
  results = reranker.rank(query=query, docs=input_docs)
39
 
front/utils.py CHANGED
@@ -134,7 +134,7 @@ def make_html_source(source,i):
134
  score = meta['reranking_score']
135
  if score > 0.8:
136
  color = "score-green"
137
- elif score > 0.4:
138
  color = "score-orange"
139
  else:
140
  color = "score-red"
@@ -170,8 +170,9 @@ def make_html_source(source,i):
170
  <div class="card card-image">
171
  <div class="card-content">
172
  <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
173
- <p>{content}</p>
174
  <p class='ai-generated'>AI-generated description</p>
 
 
175
  {relevancy_score}
176
  </div>
177
  <div class="card-footer">
@@ -186,6 +187,53 @@ def make_html_source(source,i):
186
  return card
187
 
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
  def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
191
 
 
134
  score = meta['reranking_score']
135
  if score > 0.8:
136
  color = "score-green"
137
+ elif score > 0.5:
138
  color = "score-orange"
139
  else:
140
  color = "score-red"
 
170
  <div class="card card-image">
171
  <div class="card-content">
172
  <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
 
173
  <p class='ai-generated'>AI-generated description</p>
174
+ <p>{content}</p>
175
+
176
  {relevancy_score}
177
  </div>
178
  <div class="card-footer">
 
187
  return card
188
 
189
 
190
+ def make_html_figure_sources(source,i,img_str):
191
+ meta = source.metadata
192
+ content = source.page_content.strip()
193
+
194
+ score = meta['reranking_score']
195
+ if score > 0.8:
196
+ color = "score-green"
197
+ elif score > 0.5:
198
+ color = "score-orange"
199
+ else:
200
+ color = "score-red"
201
+
202
+ toc_levels = []
203
+ if len(toc_levels) > 0:
204
+ name = f"<b>{toc_levels}</b><br/>{meta['name']}"
205
+ else:
206
+ name = meta['name']
207
+
208
+ relevancy_score = f"<p class=relevancy-score>Relevancy score: <span class='{color}'>{score:.1%}</span></p>"
209
+
210
+ if meta["figure_code"] != "N/A":
211
+ title = f"{meta['figure_code']} - {meta['short_name']}"
212
+ else:
213
+ title = f"{meta['short_name']}"
214
+
215
+ card = f"""
216
+ <div class="card card-image">
217
+ <div class="card-content">
218
+ <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
219
+ <p class='ai-generated'>AI-generated description</p>
220
+ <img src="data:image/png;base64, { img_str } alt="Alt text" />
221
+
222
+ <p>{content}</p>
223
+
224
+ {relevancy_score}
225
+ </div>
226
+ <div class="card-footer">
227
+ <span>{name}</span>
228
+ <a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
229
+ <span role="img" aria-label="Open PDF">🔗</span>
230
+ </a>
231
+ </div>
232
+ </div>
233
+ """
234
+ return card
235
+
236
+
237
 
238
  def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
239
 
requirements.txt CHANGED
@@ -17,3 +17,4 @@ torch==2.3.0
17
  nvidia-cudnn-cu12==8.9.2.26
18
  langchain-community==0.2
19
  msal==1.31
 
 
17
  nvidia-cudnn-cu12==8.9.2.26
18
  langchain-community==0.2
19
  msal==1.31
20
+ matplotlib==3.9.2
style.css CHANGED
@@ -11,7 +11,23 @@
11
  margin: 0px;
12
  }
13
 
14
- .warning-box {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  background-color: #fff3cd;
16
  border: 1px solid #ffeeba;
17
  border-radius: 4px;
@@ -194,41 +210,59 @@ label.selected{
194
  padding:0px !important;
195
  }
196
 
197
-
198
  @media screen and (min-width: 1024px) {
 
 
 
 
 
 
 
 
 
 
 
199
  div#tab-examples{
200
  height:calc(100vh - 190px) !important;
201
- overflow-y: auto;
202
  }
203
 
204
  div#sources-textbox{
205
  height:calc(100vh - 190px) !important;
206
- overflow-y: auto !important;
 
 
 
 
 
207
  }
208
 
209
  div#tab-config{
210
  height:calc(100vh - 190px) !important;
211
- overflow-y: auto !important;
 
 
 
 
 
 
212
  }
213
 
 
214
  div#chatbot-row{
215
  height:calc(100vh - 90px) !important;
 
216
  }
217
 
218
- div#chatbot{
219
- height:calc(100vh - 170px) !important;
220
- }
221
 
222
  .max-height{
223
  height:calc(100vh - 90px) !important;
 
224
  overflow-y: auto;
225
  }
 
226
 
227
- /* .tabitem:nth-child(n+3) {
228
- padding-top:30px;
229
- padding-left:40px;
230
- padding-right:40px;
231
- } */
232
  }
233
 
234
  footer {
@@ -508,4 +542,22 @@ div#tab-saved-graphs {
508
  }
509
  .message-buttons-left.panel.message-buttons.with-avatar {
510
  display: none;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  }
 
11
  margin: 0px;
12
  }
13
 
14
+
15
+ /* fix for huggingface infinite growth*/
16
+ main.flex.flex-1.flex-col {
17
+ max-height: 95vh !important;
18
+ }
19
+
20
+
21
+ .avatar-container.svelte-1x5p6hu:not(.thumbnail-item) img {
22
+ width: 100%;
23
+ height: 100%;
24
+ object-fit: cover;
25
+ border-radius: 50%;
26
+ padding: 0px;
27
+ margin: 0px;
28
+ }
29
+
30
+ .warning-box {
31
  background-color: #fff3cd;
32
  border: 1px solid #ffeeba;
33
  border-radius: 4px;
 
210
  padding:0px !important;
211
  }
212
 
 
213
  @media screen and (min-width: 1024px) {
214
+ .gradio-container {
215
+ max-height: calc(100vh - 190px) !important;
216
+ overflow: hidden;
217
+ }
218
+
219
+ /* div#chatbot{
220
+ height:calc(100vh - 170px) !important;
221
+ max-height:calc(100vh - 170px) !important;
222
+
223
+ } */
224
+
225
  div#tab-examples{
226
  height:calc(100vh - 190px) !important;
227
+ /* overflow-y: auto; */
228
  }
229
 
230
  div#sources-textbox{
231
  height:calc(100vh - 190px) !important;
232
+ /* overflow-y: auto !important; */
233
+ }
234
+
235
+ div#sources-figures{
236
+ height:calc(100vh - 190px) !important;
237
+ /* overflow-y: auto !important; */
238
  }
239
 
240
  div#tab-config{
241
  height:calc(100vh - 190px) !important;
242
+ /* overflow-y: auto !important; */
243
+ }
244
+
245
+ /* Force container to respect height limits */
246
+ .main-component{
247
+ contain: size layout;
248
+ overflow: hidden;
249
  }
250
 
251
+ /*
252
  div#chatbot-row{
253
  height:calc(100vh - 90px) !important;
254
+ max-height:calc(100vh - 90px) !important;
255
  }
256
 
257
+
 
 
258
 
259
  .max-height{
260
  height:calc(100vh - 90px) !important;
261
+ max-height:calc(100vh - 90px) !important;
262
  overflow-y: auto;
263
  }
264
+ */
265
 
 
 
 
 
 
266
  }
267
 
268
  footer {
 
542
  }
543
  .message-buttons-left.panel.message-buttons.with-avatar {
544
  display: none;
545
+ }
546
+ .score-red{
547
+ color:red !important;
548
+ }
549
+ .message-buttons-left.panel.message-buttons.with-avatar {
550
+ display: none;
551
+ }
552
+
553
+ /* Specific fixes for Hugging Face Space iframe */
554
+ .h-full {
555
+ height: auto !important;
556
+ min-height: 0 !important;
557
+ }
558
+
559
+ .space-content {
560
+ height: auto !important;
561
+ max-height: 100vh !important;
562
+ overflow: hidden;
563
  }