import os import urllib.request import io import re import streamlit as st # Set the page configuration as the very first Streamlit command. st.set_page_config(layout="wide", initial_sidebar_state="collapsed") from PIL import Image import fitz # PyMuPDF from reportlab.lib.pagesizes import A4 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib import colors from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont # --- Step 1: Define and Download Available Emoji Fonts --- font_files = [ "Noto-COLRv1-emojicompat.ttf", "Noto-COLRv1-noflags.ttf", "Noto-COLRv1.ttf", "NotoColorEmoji-emojicompat.ttf", "NotoColorEmoji-flagsonly.ttf", "NotoColorEmoji-noflags.ttf", "NotoColorEmoji.ttf", "NotoColorEmoji_WindowsCompatible.ttf" ] base_font_url = "https://github.com/googlefonts/noto-emoji/raw/main/fonts/" for font_file in font_files: if not os.path.exists(font_file): st.info(f"Downloading {font_file}...") try: urllib.request.urlretrieve(base_font_url + font_file, font_file) st.success(f"Downloaded {font_file}") except Exception as e: st.error(f"Failed to download {font_file}: {e}") # --- Step 2: Allow User to Select the Emoji Font --- font_display_names = {f: f.replace(".ttf", "") for f in font_files} selected_font_file = st.sidebar.selectbox( "Select Emoji Font", options=font_files, format_func=lambda f: font_display_names[f] ) registered_font_name = font_display_names[selected_font_file] pdfmetrics.registerFont(TTFont(registered_font_name, selected_font_file)) # --- Default Markdown Content with Emojis --- default_markdown = """# Cutting-Edge ML Outline ## Core ML Techniques 1. 🌟 **Mixture of Experts (MoE)** - Conditional computation techniques - Sparse gating mechanisms - Training specialized sub-models 2. πŸ”₯ **Supervised Fine-Tuning (SFT) using PyTorch** - Loss function customization - Gradient accumulation strategies - Learning rate schedulers 3. πŸ€– **Large Language Models (LLM) using Transformers** - Attention mechanisms - Tokenization strategies - Position encodings ## Training Methods 4. πŸ“Š **Self-Rewarding Learning using NPS 0-10 and Verbatims** - Custom reward functions - Feedback categorization - Signal extraction from text 5. πŸ‘ **Reinforcement Learning from Human Feedback (RLHF)** - Preference datasets - PPO implementation - KL divergence constraints 6. πŸ”— **MergeKit: Merging Models to Same Embedding Space** - TIES merging - Task arithmetic - SLERP interpolation ## Optimization & Deployment 7. πŸ“ **DistillKit: Model Size Reduction with Spectrum Analysis** - Knowledge distillation - Quantization techniques - Model pruning strategies 8. 🧠 **Agentic RAG Agents using Document Inputs** - Vector database integration - Query planning - Self-reflection mechanisms 9. ⏳ **Longitudinal Data Summarization from Multiple Docs** - Multi-document compression - Timeline extraction - Entity tracking ## Knowledge Representation 10. πŸ“‘ **Knowledge Extraction using Markdown Knowledge Graphs** - Entity recognition - Relationship mapping - Hierarchical structuring 11. πŸ—ΊοΈ **Knowledge Mapping with Mermaid Diagrams** - Flowchart generation - Sequence diagram creation - State diagrams 12. πŸ’» **ML Code Generation with Streamlit/Gradio/HTML5+JS** - Code completion - Unit test generation - Documentation synthesis """ # --- Markdown to PDF Content Processing --- def markdown_to_pdf_content(markdown_text): lines = markdown_text.strip().split('\n') pdf_content = [] in_list_item = False current_item = None sub_items = [] for line in lines: line = line.strip() if not line: continue if line.startswith('# '): # Optionally skip main title pass elif line.startswith('## '): if current_item and sub_items: pdf_content.append([current_item, sub_items]) sub_items = [] current_item = None section = line.replace('## ', '').strip() pdf_content.append(f"{section}") in_list_item = False elif re.match(r'^\d+\.', line): if current_item and sub_items: pdf_content.append([current_item, sub_items]) sub_items = [] current_item = line.strip() in_list_item = True elif line.startswith('- ') and in_list_item: sub_items.append(line.strip()) else: if not in_list_item: pdf_content.append(line.strip()) if current_item and sub_items: pdf_content.append([current_item, sub_items]) mid_point = len(pdf_content) // 2 left_column = pdf_content[:mid_point] right_column = pdf_content[mid_point:] return left_column, right_column # --- Main PDF Creation --- def create_main_pdf(markdown_text, base_font_size=10, auto_size=False): buffer = io.BytesIO() doc = SimpleDocTemplate( buffer, pagesize=(A4[1], A4[0]), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36 ) styles = getSampleStyleSheet() story = [] spacer_height = 10 left_column, right_column = markdown_to_pdf_content(markdown_text) total_items = 0 for col in (left_column, right_column): for item in col: if isinstance(item, list): main_item, sub_items = item total_items += 1 + len(sub_items) else: total_items += 1 if auto_size: base_font_size = max(6, min(12, 200 / total_items)) item_font_size = base_font_size subitem_font_size = base_font_size * 0.9 section_font_size = base_font_size * 1.2 title_font_size = min(16, base_font_size * 1.5) title_style = ParagraphStyle( 'Heading1', parent=styles['Heading1'], fontName=registered_font_name, textColor=colors.darkblue, alignment=1, fontSize=title_font_size ) section_style = ParagraphStyle( 'SectionStyle', parent=styles['Heading2'], fontName=registered_font_name, textColor=colors.darkblue, fontSize=section_font_size, leading=section_font_size * 1.2, spaceAfter=2 ) item_style = ParagraphStyle( 'ItemStyle', parent=styles['Normal'], fontName=registered_font_name, fontSize=item_font_size, leading=item_font_size * 1.2, spaceAfter=1 ) subitem_style = ParagraphStyle( 'SubItemStyle', parent=styles['Normal'], fontName=registered_font_name, fontSize=subitem_font_size, leading=subitem_font_size * 1.2, leftIndent=10, spaceAfter=1 ) story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style)) story.append(Spacer(1, spacer_height)) left_cells = [] for item in left_column: if isinstance(item, str) and item.startswith(''): text = item.replace('', '').replace('', '') left_cells.append(Paragraph(text, section_style)) elif isinstance(item, list): main_item, sub_items = item left_cells.append(Paragraph(main_item, item_style)) for sub_item in sub_items: left_cells.append(Paragraph(sub_item, subitem_style)) else: left_cells.append(Paragraph(item, item_style)) right_cells = [] for item in right_column: if isinstance(item, str) and item.startswith(''): text = item.replace('', '').replace('', '') right_cells.append(Paragraph(text, section_style)) elif isinstance(item, list): main_item, sub_items = item right_cells.append(Paragraph(main_item, item_style)) for sub_item in sub_items: right_cells.append(Paragraph(sub_item, subitem_style)) else: right_cells.append(Paragraph(item, item_style)) max_cells = max(len(left_cells), len(right_cells)) left_cells.extend([""] * (max_cells - len(left_cells))) right_cells.extend([""] * (max_cells - len(right_cells))) table_data = list(zip(left_cells, right_cells)) col_width = (A4[1] - 72) / 2.0 table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER') table.setStyle(TableStyle([ ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, -1), colors.white), ('GRID', (0, 0), (-1, -1), 0, colors.white), ('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey), ('LEFTPADDING', (0, 0), (-1, -1), 2), ('RIGHTPADDING', (0, 0), (-1, -1), 2), ('TOPPADDING', (0, 0), (-1, -1), 1), ('BOTTOMPADDING', (0, 0), (-1, -1), 1), ])) story.append(table) doc.build(story) buffer.seek(0) return buffer.getvalue() # --- Function to Convert PDF Bytes to Image (for Preview) --- def pdf_to_image(pdf_bytes): try: doc = fitz.open(stream=pdf_bytes, filetype="pdf") page = doc[0] pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0)) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) doc.close() return img except Exception as e: st.error(f"Failed to render PDF preview: {e}") return None # --- Sidebar UI for Additional Settings --- with st.sidebar: auto_size = st.checkbox("Auto-size text", value=True) if not auto_size: base_font_size = st.slider("Base Font Size (points)", min_value=6, max_value=16, value=10, step=1) else: base_font_size = 10 st.info("Font size will auto-adjust between 6-12 points based on content length.") # --- Persist Markdown Content in Session State --- if 'markdown_content' not in st.session_state: st.session_state.markdown_content = default_markdown # --- Generate PDF --- with st.spinner("Generating PDF..."): pdf_bytes = create_main_pdf(st.session_state.markdown_content, base_font_size, auto_size) # --- Display PDF Preview in UI --- with st.container(): pdf_image = pdf_to_image(pdf_bytes) if pdf_image: st.image(pdf_image, use_container_width=True) else: st.info("Download the PDF to view it locally.") # --- PDF Download Button --- st.download_button( label="Download PDF", data=pdf_bytes, file_name="ml_outline.pdf", mime="application/pdf" ) # --- Markdown Editor --- edited_markdown = st.text_area( "Modify the markdown content below:", value=st.session_state.markdown_content, height=300 ) # --- Update PDF on Button Click --- if st.button("Update PDF"): st.session_state.markdown_content = edited_markdown st.experimental_rerun() # --- Markdown Download Button --- st.download_button( label="Save Markdown", data=st.session_state.markdown_content, file_name="ml_outline.md", mime="text/markdown" )