import re def make_pairs(lst): """from a list of even lenght, make tupple pairs""" return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)] def serialize_docs(docs): new_docs = [] for doc in docs: new_doc = {} new_doc["page_content"] = doc.page_content new_doc["metadata"] = doc.metadata new_docs.append(new_doc) return new_docs def parse_output_llm_with_sources(output): # Split the content into a list of text and "[Doc X]" references content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output) parts = [] for part in content_parts: if part.startswith("Doc"): subparts = part.split(",") subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts] subparts = [f"""{subpart}""" for subpart in subparts] parts.append("".join(subparts)) else: parts.append(part) content_parts = "".join(parts) return content_parts from collections import defaultdict def generate_html_graphs(graphs): # Organize graphs by category categories = defaultdict(list) for graph in graphs: category = graph['metadata']['category'] categories[category].append(graph['embedding']) # Begin constructing the HTML html_code = ''' Graphs by Category
''' # Add buttons for each category for i, category in enumerate(categories.keys()): active_class = 'active' if i == 0 else '' html_code += f'' html_code += '
' # Add content for each category for i, (category, embeds) in enumerate(categories.items()): active_class = 'active' if i == 0 else '' html_code += f'
' for embed in embeds: html_code += embed html_code += '
' html_code += ''' ''' return html_code def make_html_source(source,i): meta = source.metadata # content = source.page_content.split(":",1)[1].strip() content = source.page_content.strip() toc_levels = [] for j in range(2): level = meta[f"toc_level{j}"] if level != "N/A": toc_levels.append(level) else: break toc_levels = " > ".join(toc_levels) if len(toc_levels) > 0: name = f"{toc_levels}
{meta['name']}" else: name = meta['name'] score = meta['reranking_score'] if score > 0.8: color = "score-green" elif score > 0.5: color = "score-orange" else: color = "score-red" relevancy_score = f"

Relevancy score: {score:.1%}

" if meta["chunk_type"] == "text": card = f"""

Doc {i} - {meta['short_name']} - Page {int(meta['page_number'])}

{content}

{relevancy_score}
""" else: if meta["figure_code"] != "N/A": title = f"{meta['figure_code']} - {meta['short_name']}" else: title = f"{meta['short_name']}" card = f"""

Image {i} - {title} - Page {int(meta['page_number'])}

AI-generated description

{content}

{relevancy_score}
""" return card def make_html_figure_sources(source,i,img_str): meta = source.metadata content = source.page_content.strip() score = meta['reranking_score'] if score > 0.8: color = "score-green" elif score > 0.5: color = "score-orange" else: color = "score-red" toc_levels = [] if len(toc_levels) > 0: name = f"{toc_levels}
{meta['name']}" else: name = meta['name'] relevancy_score = f"

Relevancy score: {score:.1%}

" if meta["figure_code"] != "N/A": title = f"{meta['figure_code']} - {meta['short_name']}" else: title = f"{meta['short_name']}" card = f"""

Image {i} - {title} - Page {int(meta['page_number'])}

AI-generated description

Alt text

{content}

{relevancy_score}
""" return card def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"): if checked: span = "" else: span = "" # toolbox = f""" # # """ toolbox = f""" """ return toolbox