import os import io import re import streamlit as st # Must be the first Streamlit command. st.set_page_config(layout="wide", initial_sidebar_state="collapsed") from PIL import Image import fitz # PyMuPDF from reportlab.lib.pagesizes import A4 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib import colors from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont # --------------------------------------------------------------- # Define available NotoEmoji fonts (local files) # One font is at the root and others are in the 'static' subdirectory. available_fonts = { "NotoEmoji Variable": "NotoEmoji-VariableFont_wght.ttf", "NotoEmoji Bold": "NotoEmoji-Bold.ttf", "NotoEmoji Light": "NotoEmoji-Light.ttf", "NotoEmoji Medium": "NotoEmoji-Medium.ttf", "NotoEmoji Regular": "NotoEmoji-Regular.ttf", "NotoEmoji SemiBold": "NotoEmoji-SemiBold.ttf" } # Sidebar: Let the user choose the desired NotoEmoji font. selected_font_name = st.sidebar.selectbox( "Select NotoEmoji Font", options=list(available_fonts.keys()) ) selected_font_path = available_fonts[selected_font_name] # Register the chosen font with ReportLab. pdfmetrics.registerFont(TTFont(selected_font_name, selected_font_path)) # --------------------------------------------------------------- # Default markdown content with emojis. default_markdown = """# Cutting-Edge ML Outline ## Core ML Techniques 1. 🌟 **Mixture of Experts (MoE)** - Conditional computation techniques - Sparse gating mechanisms - Training specialized sub-models 2. πŸ”₯ **Supervised Fine-Tuning (SFT) using PyTorch** - Loss function customization - Gradient accumulation strategies - Learning rate schedulers 3. πŸ€– **Large Language Models (LLM) using Transformers** - Attention mechanisms - Tokenization strategies - Position encodings ## Training Methods 4. πŸ“Š **Self-Rewarding Learning using NPS 0-10 and Verbatims** - Custom reward functions - Feedback categorization - Signal extraction from text 5. πŸ‘ **Reinforcement Learning from Human Feedback (RLHF)** - Preference datasets - PPO implementation - KL divergence constraints 6. πŸ”— **MergeKit: Merging Models to Same Embedding Space** - TIES merging - Task arithmetic - SLERP interpolation ## Optimization & Deployment 7. πŸ“ **DistillKit: Model Size Reduction with Spectrum Analysis** - Knowledge distillation - Quantization techniques - Model pruning strategies 8. 🧠 **Agentic RAG Agents using Document Inputs** - Vector database integration - Query planning - Self-reflection mechanisms 9. ⏳ **Longitudinal Data Summarization from Multiple Docs** - Multi-document compression - Timeline extraction - Entity tracking ## Knowledge Representation 10. πŸ“‘ **Knowledge Extraction using Markdown Knowledge Graphs** - Entity recognition - Relationship mapping - Hierarchical structuring 11. πŸ—ΊοΈ **Knowledge Mapping with Mermaid Diagrams** - Flowchart generation - Sequence diagram creation - State diagrams 12. πŸ’» **ML Code Generation with Streamlit/Gradio/HTML5+JS** - Code completion - Unit test generation - Documentation synthesis """ # --------------------------------------------------------------- # Process markdown into PDF content. def markdown_to_pdf_content(markdown_text): lines = markdown_text.strip().split('\n') pdf_content = [] in_list_item = False current_item = None sub_items = [] for line in lines: line = line.strip() if not line: continue if line.startswith('# '): # Optionally skip the main title. pass elif line.startswith('## '): if current_item and sub_items: pdf_content.append([current_item, sub_items]) sub_items = [] current_item = None section = line.replace('## ', '').strip() pdf_content.append(f"{section}") in_list_item = False elif re.match(r'^\d+\.', line): if current_item and sub_items: pdf_content.append([current_item, sub_items]) sub_items = [] current_item = line.strip() in_list_item = True elif line.startswith('- ') and in_list_item: sub_items.append(line.strip()) else: if not in_list_item: pdf_content.append(line.strip()) if current_item and sub_items: pdf_content.append([current_item, sub_items]) mid_point = len(pdf_content) // 2 left_column = pdf_content[:mid_point] right_column = pdf_content[mid_point:] return left_column, right_column # --------------------------------------------------------------- # Create PDF using ReportLab. def create_main_pdf(markdown_text, base_font_size=10, auto_size=False): buffer = io.BytesIO() doc = SimpleDocTemplate( buffer, pagesize=(A4[1], A4[0]), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36 ) styles = getSampleStyleSheet() story = [] spacer_height = 10 left_column, right_column = markdown_to_pdf_content(markdown_text) # Count total items to possibly adjust font size. total_items = 0 for col in (left_column, right_column): for item in col: if isinstance(item, list): main_item, sub_items