awacke1's picture
Update app.py
ce08b6d verified
raw
history blame
5.66 kB
import os
import io
import re
import streamlit as st
# Must be the first Streamlit command.
st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
from PIL import Image
import fitz # PyMuPDF
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib import colors
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
# ---------------------------------------------------------------
# Define available NotoEmoji fonts (local files)
# One font is at the root and others are in the 'static' subdirectory.
available_fonts = {
"NotoEmoji Variable": "NotoEmoji-VariableFont_wght.ttf",
"NotoEmoji Bold": "NotoEmoji-Bold.ttf",
"NotoEmoji Light": "NotoEmoji-Light.ttf",
"NotoEmoji Medium": "NotoEmoji-Medium.ttf",
"NotoEmoji Regular": "NotoEmoji-Regular.ttf",
"NotoEmoji SemiBold": "NotoEmoji-SemiBold.ttf"
}
# Sidebar: Let the user choose the desired NotoEmoji font.
selected_font_name = st.sidebar.selectbox(
"Select NotoEmoji Font",
options=list(available_fonts.keys())
)
selected_font_path = available_fonts[selected_font_name]
# Register the chosen font with ReportLab.
pdfmetrics.registerFont(TTFont(selected_font_name, selected_font_path))
# ---------------------------------------------------------------
# Default markdown content with emojis.
default_markdown = """# Cutting-Edge ML Outline
## Core ML Techniques
1. 🌟 **Mixture of Experts (MoE)**
- Conditional computation techniques
- Sparse gating mechanisms
- Training specialized sub-models
2. πŸ”₯ **Supervised Fine-Tuning (SFT) using PyTorch**
- Loss function customization
- Gradient accumulation strategies
- Learning rate schedulers
3. πŸ€– **Large Language Models (LLM) using Transformers**
- Attention mechanisms
- Tokenization strategies
- Position encodings
## Training Methods
4. πŸ“Š **Self-Rewarding Learning using NPS 0-10 and Verbatims**
- Custom reward functions
- Feedback categorization
- Signal extraction from text
5. πŸ‘ **Reinforcement Learning from Human Feedback (RLHF)**
- Preference datasets
- PPO implementation
- KL divergence constraints
6. πŸ”— **MergeKit: Merging Models to Same Embedding Space**
- TIES merging
- Task arithmetic
- SLERP interpolation
## Optimization & Deployment
7. πŸ“ **DistillKit: Model Size Reduction with Spectrum Analysis**
- Knowledge distillation
- Quantization techniques
- Model pruning strategies
8. 🧠 **Agentic RAG Agents using Document Inputs**
- Vector database integration
- Query planning
- Self-reflection mechanisms
9. ⏳ **Longitudinal Data Summarization from Multiple Docs**
- Multi-document compression
- Timeline extraction
- Entity tracking
## Knowledge Representation
10. πŸ“‘ **Knowledge Extraction using Markdown Knowledge Graphs**
- Entity recognition
- Relationship mapping
- Hierarchical structuring
11. πŸ—ΊοΈ **Knowledge Mapping with Mermaid Diagrams**
- Flowchart generation
- Sequence diagram creation
- State diagrams
12. πŸ’» **ML Code Generation with Streamlit/Gradio/HTML5+JS**
- Code completion
- Unit test generation
- Documentation synthesis
"""
# ---------------------------------------------------------------
# Process markdown into PDF content.
def markdown_to_pdf_content(markdown_text):
lines = markdown_text.strip().split('\n')
pdf_content = []
in_list_item = False
current_item = None
sub_items = []
for line in lines:
line = line.strip()
if not line:
continue
if line.startswith('# '):
# Optionally skip the main title.
pass
elif line.startswith('## '):
if current_item and sub_items:
pdf_content.append([current_item, sub_items])
sub_items = []
current_item = None
section = line.replace('## ', '').strip()
pdf_content.append(f"<b>{section}</b>")
in_list_item = False
elif re.match(r'^\d+\.', line):
if current_item and sub_items:
pdf_content.append([current_item, sub_items])
sub_items = []
current_item = line.strip()
in_list_item = True
elif line.startswith('- ') and in_list_item:
sub_items.append(line.strip())
else:
if not in_list_item:
pdf_content.append(line.strip())
if current_item and sub_items:
pdf_content.append([current_item, sub_items])
mid_point = len(pdf_content) // 2
left_column = pdf_content[:mid_point]
right_column = pdf_content[mid_point:]
return left_column, right_column
# ---------------------------------------------------------------
# Create PDF using ReportLab.
def create_main_pdf(markdown_text, base_font_size=10, auto_size=False):
buffer = io.BytesIO()
doc = SimpleDocTemplate(
buffer,
pagesize=(A4[1], A4[0]),
leftMargin=36,
rightMargin=36,
topMargin=36,
bottomMargin=36
)
styles = getSampleStyleSheet()
story = []
spacer_height = 10
left_column, right_column = markdown_to_pdf_content(markdown_text)
# Count total items to possibly adjust font size.
total_items = 0
for col in (left_column, right_column):
for item in col:
if isinstance(item, list):
main_item, sub_items