import streamlit as st import base64 from reportlab.lib.pagesizes import A4 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib import colors import io import re import fitz # PyMuPDF from PIL import Image # Initial markdown content default_markdown = """# Cutting-Edge ML Outline ## Core ML Techniques 1. 🌟 **Mixture of Experts (MoE)** - Conditional computation techniques - Sparse gating mechanisms - Training specialized sub-models 2. πŸ”₯ **Supervised Fine-Tuning (SFT) using PyTorch** - Loss function customization - Gradient accumulation strategies - Learning rate schedulers 3. πŸ€– **Large Language Models (LLM) using Transformers** - Attention mechanisms - Tokenization strategies - Position encodings ## Training Methods 4. πŸ“Š **Self-Rewarding Learning using NPS 0-10 and Verbatims** - Custom reward functions - Feedback categorization - Signal extraction from text 5. πŸ‘ **Reinforcement Learning from Human Feedback (RLHF)** - Preference datasets - PPO implementation - KL divergence constraints 6. πŸ”— **MergeKit: Merging Models to Same Embedding Space** - TIES merging - Task arithmetic - SLERP interpolation ## Optimization & Deployment 7. πŸ“ **DistillKit: Model Size Reduction with Spectrum Analysis** - Knowledge distillation - Quantization techniques - Model pruning strategies 8. 🧠 **Agentic RAG Agents using Document Inputs** - Vector database integration - Query planning - Self-reflection mechanisms 9. ⏳ **Longitudinal Data Summarization from Multiple Docs** - Multi-document compression - Timeline extraction - Entity tracking ## Knowledge Representation 10. πŸ“‘ **Knowledge Extraction using Markdown Knowledge Graphs** - Entity recognition - Relationship mapping - Hierarchical structuring 11. πŸ—ΊοΈ **Knowledge Mapping with Mermaid Diagrams** - Flowchart generation - Sequence diagram creation - State diagrams 12. πŸ’» **ML Code Generation with Streamlit/Gradio/HTML5+JS** - Code completion - Unit test generation - Documentation synthesis """ # Process multilevel markdown for PDF output def markdown_to_pdf_content(markdown_text): lines = markdown_text.strip().split('\n') pdf_content = [] in_list_item = False current_item = None sub_items = [] for line in lines: line = line.strip() if not line: continue if line.startswith('# '): pass elif line.startswith('## '): if current_item and sub_items: pdf_content.append([current_item, sub_items]) sub_items = [] current_item = None section = line.replace('## ', '').strip() pdf_content.append(f"{section}") in_list_item = False elif re.match(r'^\d+\.', line): if current_item and sub_items: pdf_content.append([current_item, sub_items]) sub_items = [] current_item = line.strip() in_list_item = True elif line.startswith('- ') and in_list_item: sub_items.append(line.strip()) else: if not in_list_item: pdf_content.append(line.strip()) if current_item and sub_items: pdf_content.append([current_item, sub_items]) mid_point = len(pdf_content) // 2 left_column = pdf_content[:mid_point] right_column = pdf_content[mid_point:] return left_column, right_column # Main PDF creation with parameterized text sizes def create_main_pdf(markdown_text, base_font_size=10, auto_size=False): buffer = io.BytesIO() doc = SimpleDocTemplate( buffer, pagesize=(A4[1], A4[0]), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36 ) styles = getSampleStyleSheet() story = [] page_height = A4[0] - 72 title_height = 20 spacer_height = 10 left_column, right_column = markdown_to_pdf_content(markdown_text) total_items = 0 for col in (left_column, right_column): for item in col: if isinstance(item, list): main_item, sub_items = item total_items += 1 + len(sub_items) else: total_items += 1 # πŸ”§ Adjust this multiplier to control autosizing sensitivity if auto_size: base_font_size = max(6, min(12, 200 / total_items)) # πŸ”§ Font size parameters - tweak these ratios as needed item_font_size = base_font_size subitem_font_size = base_font_size * 0.9 section_font_size = base_font_size * 1.2 title_font_size = min(16, base_font_size * 1.5) title_style = styles['Heading1'] title_style.textColor = colors.darkblue title_style.alignment = 1 title_style.fontSize = title_font_size section_style = ParagraphStyle( 'SectionStyle', parent=styles['Heading2'], textColor=colors.darkblue, fontSize=section_font_size, leading=section_font_size * 1.2, spaceAfter=2 ) item_style = ParagraphStyle( 'ItemStyle', parent=styles['Normal'], fontSize=item_font_size, leading=item_font_size * 1.2, fontName='Helvetica-Bold', spaceAfter=1 ) subitem_style = ParagraphStyle( 'SubItemStyle', parent=styles['Normal'], fontSize=subitem_font_size, leading=subitem_font_size * 1.2, leftIndent=10, spaceAfter=1 ) story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style)) story.append(Spacer(1, spacer_height)) left_cells = [] for item in left_column: if isinstance(item, str) and item.startswith(''): text = item.replace('', '').replace('', '') left_cells.append(Paragraph(text, section_style)) elif isinstance(item, list): main_item, sub_items = item left_cells.append(Paragraph(main_item, item_style)) for sub_item in sub_items: left_cells.append(Paragraph(sub_item, subitem_style)) else: left_cells.append(Paragraph(item, item_style)) right_cells = [] for item in right_column: if isinstance(item, str) and item.startswith(''): text = item.replace('', '').replace('', '') right_cells.append(Paragraph(text, section_style)) elif isinstance(item, list): main_item, sub_items = item right_cells.append(Paragraph(main_item, item_style)) for sub_item in sub_items: right_cells.append(Paragraph(sub_item, subitem_style)) else: right_cells.append(Paragraph(item, item_style)) max_cells = max(len(left_cells), len(right_cells)) left_cells.extend([""] * (max_cells - len(left_cells))) right_cells.extend([""] * (max_cells - len(right_cells))) table_data = list(zip(left_cells, right_cells)) col_width = (A4[1] - 72) / 2.0 table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER') table.setStyle(TableStyle([ ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, -1), colors.white), ('GRID', (0, 0), (-1, -1), 0, colors.white), ('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey), ('LEFTPADDING', (0, 0), (-1, -1), 2), ('RIGHTPADDING', (0, 0), (-1, -1), 2), ('TOPPADDING', (0, 0), (-1, -1), 1), ('BOTTOMPADDING', (0, 0), (-1, -1), 1), ])) story.append(table) doc.build(story) buffer.seek(0) return buffer.getvalue() # Function to convert PDF bytes to image using fitz (from backup.03302025-720pm.app.py) def pdf_to_image(pdf_bytes): try: # Open PDF from bytes doc = fitz.open(stream=pdf_bytes, filetype="pdf") # Get the first page page = doc[0] # Render page to pixmap with a zoom factor for clarity pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0)) # 2x zoom # Convert to PIL Image img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) doc.close() return img except Exception as e: st.error(f"Failed to render PDF preview: {e}") return None # Streamlit UI st.title("πŸš€ Cutting-Edge ML Outline Generator") # Sidebar for settings with st.sidebar: st.header("PDF Settings") auto_size = st.checkbox("Auto-size text", value=True) if not auto_size: base_font_size = st.slider("Base Font Size (points)", min_value=6, max_value=16, value=10, step=1) else: base_font_size = 10 st.info("Font size will auto-adjust between 6-12 points based on content length.") # Use session state to persist markdown content if 'markdown_content' not in st.session_state: st.session_state.markdown_content = default_markdown # Generate PDF with st.spinner("Generating PDF..."): pdf_bytes = create_main_pdf(st.session_state.markdown_content, base_font_size, auto_size) # Display PDF preview using fitz st.subheader("PDF Preview") pdf_image = pdf_to_image(pdf_bytes) if pdf_image: st.image(pdf_image, caption="PDF Page 1", use_column_width=True) else: st.info("Download the PDF to view it locally.") # Download button st.download_button( label="Download PDF", data=pdf_bytes, file_name="ml_outline.pdf", mime="application/pdf" ) # Markdown editor st.subheader("Edit Markdown Outline") edited_markdown = st.text_area( "Modify the markdown content below:", value=st.session_state.markdown_content, height=300 ) # Update markdown and regenerate PDF on change if st.button("Update PDF"): st.session_state.markdown_content = edited_markdown st.rerun() # Save markdown option st.download_button( label="Save Markdown", data=st.session_state.markdown_content, file_name="ml_outline.md", mime="text/markdown" )