import os import io import re import streamlit as st # Must be the very first Streamlit command. st.set_page_config(layout="wide", initial_sidebar_state="collapsed") from PIL import Image import fitz # PyMuPDF from reportlab.lib.pagesizes import A4 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib import colors from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont # --------------------------------------------------------------- # Define available NotoEmoji fonts (all in the base directory now) available_fonts = { "NotoEmoji Variable": "NotoEmoji-VariableFont_wght.ttf", "NotoEmoji Bold": "NotoEmoji-Bold.ttf", "NotoEmoji Light": "NotoEmoji-Light.ttf", "NotoEmoji Medium": "NotoEmoji-Medium.ttf", "NotoEmoji Regular": "NotoEmoji-Regular.ttf", "NotoEmoji SemiBold": "NotoEmoji-SemiBold.ttf" } # Sidebar: Let the user choose the desired NotoEmoji font. selected_font_name = st.sidebar.selectbox( "Select NotoEmoji Font", options=list(available_fonts.keys()) ) selected_font_path = available_fonts[selected_font_name] # Register the chosen emoji font with ReportLab. pdfmetrics.registerFont(TTFont(selected_font_name, selected_font_path)) # --------------------------------------------------------------- # Helper function to wrap emoji characters with a font tag. def apply_emoji_font(text, emoji_font): # This regex attempts to capture many common emoji ranges. emoji_pattern = re.compile( r"([\U0001F300-\U0001F5FF" r"\U0001F600-\U0001F64F" r"\U0001F680-\U0001F6FF" r"\U0001F700-\U0001F77F" r"\U0001F780-\U0001F7FF" r"\U0001F800-\U0001F8FF" r"\U0001F900-\U0001F9FF" r"\U0001FA00-\U0001FA6F" r"\U0001FA70-\U0001FAFF" r"\u2600-\u26FF" r"\u2700-\u27BF]+)" ) # Wrap found emoji with a font tag using the selected emoji font. return emoji_pattern.sub(r'\1'.format(emoji_font), text) # --------------------------------------------------------------- # Default markdown content with emojis. default_markdown = """# Cutting-Edge ML Outline ## Core ML Techniques 1. 🌟 **Mixture of Experts (MoE)** - Conditional computation techniques - Sparse gating mechanisms - Training specialized sub-models 2. πŸ”₯ **Supervised Fine-Tuning (SFT) using PyTorch** - Loss function customization - Gradient accumulation strategies - Learning rate schedulers 3. πŸ€– **Large Language Models (LLM) using Transformers** - Attention mechanisms - Tokenization strategies - Position encodings ## Training Methods 4. πŸ“Š **Self-Rewarding Learning using NPS 0-10 and Verbatims** - Custom reward functions - Feedback categorization - Signal extraction from text 5. πŸ‘ **Reinforcement Learning from Human Feedback (RLHF)** - Preference datasets - PPO implementation - KL divergence constraints 6. πŸ”— **MergeKit: Merging Models to Same Embedding Space** - TIES merging - Task arithmetic - SLERP interpolation ## Optimization & Deployment 7. πŸ“ **DistillKit: Model Size Reduction with Spectrum Analysis** - Knowledge distillation - Quantization techniques - Model pruning strategies 8. 🧠 **Agentic RAG Agents using Document Inputs** - Vector database integration - Query planning - Self-reflection mechanisms 9. ⏳ **Longitudinal Data Summarization from Multiple Docs** - Multi-document compression - Timeline extraction - Entity tracking ## Knowledge Representation 10. πŸ“‘ **Knowledge Extraction using Markdown Knowledge Graphs** - Entity recognition - Relationship mapping - Hierarchical structuring 11. πŸ—ΊοΈ **Knowledge Mapping with Mermaid Diagrams** - Flowchart generation - Sequence diagram creation - State diagrams 12. πŸ’» **ML Code Generation with Streamlit/Gradio/HTML5+JS** - Code completion - Unit test generation - Documentation synthesis """ # --------------------------------------------------------------- # Process markdown into a two-column layout for the PDF. def markdown_to_pdf_content(markdown_text): lines = markdown_text.strip().split('\n') pdf_content = [] in_list_item = False current_item = None sub_items = [] for line in lines: line = line.strip() if not line: continue if line.startswith('# '): # Optionally skip the main title. pass elif line.startswith('## '): if current_item and sub_items: pdf_content.append([current_item, sub_items]) sub_items = [] current_item = None section = line.replace('## ', '').strip() pdf_content.append(f"{section}") in_list_item = False elif re.match(r'^\d+\.', line): if current_item and sub_items: pdf_content.append([current_item, sub_items]) sub_items = [] current_item = line.strip() in_list_item = True elif line.startswith('- ') and in_list_item: sub_items.append(line.strip()) else: if not in_list_item: pdf_content.append(line.strip()) if current_item and sub_items: pdf_content.append([current_item, sub_items]) mid_point = len(pdf_content) // 2 left_column = pdf_content[:mid_point] right_column = pdf_content[mid_point:] return left_column, right_column # --------------------------------------------------------------- # Create the PDF using ReportLab. def create_main_pdf(markdown_text, base_font_size=10, auto_size=False): buffer = io.BytesIO() doc = SimpleDocTemplate( buffer, pagesize=(A4[1], A4[0]), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36 ) styles = getSampleStyleSheet() story = [] spacer_height = 10 left_column, right_column = markdown_to_pdf_content(markdown_text) total_items = 0 for col in (left_column, right_column): for item in col: if isinstance(item, list): main_item, sub_items = item total_items += 1 + len(sub_items) else: total_items += 1 if auto_size: base_font_size = max(6, min(12, 200 / total_items)) item_font_size = base_font_size subitem_font_size = base_font_size * 0.9 section_font_size = base_font_size * 1.2 title_font_size = min(16, base_font_size * 1.5) # Define ParagraphStyles using Helvetica for normal text. title_style = ParagraphStyle( 'Heading1', parent=styles['Heading1'], fontName="Helvetica-Bold", textColor=colors.darkblue, alignment=1, fontSize=title_font_size ) section_style = ParagraphStyle( 'SectionStyle', parent=styles['Heading2'], fontName="Helvetica-Bold", textColor=colors.darkblue, fontSize=section_font_size, leading=section_font_size * 1.2, spaceAfter=2 ) item_style = ParagraphStyle( 'ItemStyle', parent=styles['Normal'], fontName="Helvetica", fontSize=item_font_size, leading=item_font_size * 1.2, spaceAfter=1 ) subitem_style = ParagraphStyle( 'SubItemStyle', parent=styles['Normal'], fontName="Helvetica", fontSize=subitem_font_size, leading=subitem_font_size * 1.2, leftIndent=10, spaceAfter=1 ) story.append(Paragraph(apply_emoji_font("Cutting-Edge ML Outline (ReportLab)", selected_font_name), title_style)) story.append(Spacer(1, spacer_height)) left_cells = [] for item in left_column: if isinstance(item, str) and item.startswith(''): # Process section headings. text = item.replace('', '').replace('', '') left_cells.append(Paragraph(apply_emoji_font(text, selected_font_name), section_style)) elif isinstance(item, list): main_item, sub_items = item left_cells.append(Paragraph(apply_emoji_font(main_item, selected_font_name), item_style)) for sub_item in sub_items: left_cells.append(Paragraph(apply_emoji_font(sub_item, selected_font_name), subitem_style)) else: left_cells.append(Paragraph(apply_emoji_font(item, selected_font_name), item_style)) right_cells = [] for item in right_column: if isinstance(item, str) and item.startswith(''): text = item.replace('', '').replace('', '') right_cells.append(Paragraph(apply_emoji_font(text, selected_font_name), section_style)) elif isinstance(item, list): main_item, sub_items = item right_cells.append(Paragraph(apply_emoji_font(main_item, selected_font_name), item_style)) for sub_item in sub_items: right_cells.append(Paragraph(apply_emoji_font(sub_item, selected_font_name), subitem_style)) else: right_cells.append(Paragraph(apply_emoji_font(item, selected_font_name), item_style)) max_cells = max(len(left_cells), len(right_cells)) left_cells.extend([""] * (max_cells - len(left_cells))) right_cells.extend([""] * (max_cells - len(right_cells))) table_data = list(zip(left_cells, right_cells)) col_width = (A4[1] - 72) / 2.0 table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER') table.setStyle(TableStyle([ ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, -1), colors.white), ('GRID', (0, 0), (-1, -1), 0, colors.white), ('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey), ('LEFTPADDING', (0, 0), (-1, -1), 2), ('RIGHTPADDING', (0, 0), (-1, -1), 2), ('TOPPADDING', (0, 0), (-1, -1), 1), ('BOTTOMPADDING', (0, 0), (-1, -1), 1), ])) story.append(table) doc.build(story) buffer.seek(0) return buffer.getvalue() # --------------------------------------------------------------- # Convert PDF bytes to an image for preview using PyMuPDF. def pdf_to_image(pdf_bytes): try: doc = fitz.open(stream=pdf_bytes, filetype="pdf") page = doc[0] pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0)) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) doc.close() return img except Exception as e: st.error(f"Failed to render PDF preview: {e}") return None # --------------------------------------------------------------- # Sidebar options for text size. with st.sidebar: auto_size = st.checkbox("Auto-size text", value=True) if not auto_size: base_font_size = st.slider("Base Font Size (points)", min_value=6, max_value=16, value=10, step=1) else: base_font_size = 10 st.info("Font size will auto-adjust between 6-12 points based on content length.") # Persist markdown content in session state. if 'markdown_content' not in st.session_state: st.session_state.markdown_content = default_markdown # --------------------------------------------------------------- # Generate the PDF. with st.spinner("Generating PDF..."): pdf_bytes = create_main_pdf(st.session_state.markdown_content, base_font_size, auto_size) # Display PDF preview. with st.container(): pdf_image = pdf_to_image(pdf_bytes) if pdf_image: st.image(pdf_image, use_container_width=True) else: st.info("Download the PDF to view it locally.") # PDF Download button. st.download_button( label="Download PDF", data=pdf_bytes, file_name="ml_outline.pdf", mime="application/pdf" ) # Markdown editor. edited_markdown = st.text_area( "Modify the markdown content below:", value=st.session_state.markdown_content, height=300 ) # Update PDF on button click. if st.button("Update PDF"): st.session_state.markdown_content = edited_markdown st.experimental_rerun() # Markdown Download button. st.download_button( label="Save Markdown", data=st.session_state.markdown_content, file_name="ml_outline.md", mime="text/markdown" )