awacke1's picture
Update app.py
9f3cf94 verified
raw
history blame
11.4 kB
import os
import urllib.request
import io
import re
import streamlit as st
# Set the page configuration as the very first Streamlit command.
st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
from PIL import Image
import fitz # PyMuPDF
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib import colors
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
# --- Step 1: Define and Download Available Emoji Fonts ---
font_files = [
"Noto-COLRv1-emojicompat.ttf",
"Noto-COLRv1-noflags.ttf",
"Noto-COLRv1.ttf",
"NotoColorEmoji-emojicompat.ttf",
"NotoColorEmoji-flagsonly.ttf",
"NotoColorEmoji-noflags.ttf",
"NotoColorEmoji.ttf",
"NotoColorEmoji_WindowsCompatible.ttf"
]
base_font_url = "https://github.com/googlefonts/noto-emoji/raw/main/fonts/"
for font_file in font_files:
if not os.path.exists(font_file):
st.info(f"Downloading {font_file}...")
try:
urllib.request.urlretrieve(base_font_url + font_file, font_file)
st.success(f"Downloaded {font_file}")
except Exception as e:
st.error(f"Failed to download {font_file}: {e}")
# --- Step 2: Allow User to Select the Emoji Font ---
font_display_names = {f: f.replace(".ttf", "") for f in font_files}
selected_font_file = st.sidebar.selectbox(
"Select Emoji Font",
options=font_files,
format_func=lambda f: font_display_names[f]
)
registered_font_name = font_display_names[selected_font_file]
pdfmetrics.registerFont(TTFont(registered_font_name, selected_font_file))
# --- Default Markdown Content with Emojis ---
default_markdown = """# Cutting-Edge ML Outline
## Core ML Techniques
1. 🌟 **Mixture of Experts (MoE)**
- Conditional computation techniques
- Sparse gating mechanisms
- Training specialized sub-models
2. πŸ”₯ **Supervised Fine-Tuning (SFT) using PyTorch**
- Loss function customization
- Gradient accumulation strategies
- Learning rate schedulers
3. πŸ€– **Large Language Models (LLM) using Transformers**
- Attention mechanisms
- Tokenization strategies
- Position encodings
## Training Methods
4. πŸ“Š **Self-Rewarding Learning using NPS 0-10 and Verbatims**
- Custom reward functions
- Feedback categorization
- Signal extraction from text
5. πŸ‘ **Reinforcement Learning from Human Feedback (RLHF)**
- Preference datasets
- PPO implementation
- KL divergence constraints
6. πŸ”— **MergeKit: Merging Models to Same Embedding Space**
- TIES merging
- Task arithmetic
- SLERP interpolation
## Optimization & Deployment
7. πŸ“ **DistillKit: Model Size Reduction with Spectrum Analysis**
- Knowledge distillation
- Quantization techniques
- Model pruning strategies
8. 🧠 **Agentic RAG Agents using Document Inputs**
- Vector database integration
- Query planning
- Self-reflection mechanisms
9. ⏳ **Longitudinal Data Summarization from Multiple Docs**
- Multi-document compression
- Timeline extraction
- Entity tracking
## Knowledge Representation
10. πŸ“‘ **Knowledge Extraction using Markdown Knowledge Graphs**
- Entity recognition
- Relationship mapping
- Hierarchical structuring
11. πŸ—ΊοΈ **Knowledge Mapping with Mermaid Diagrams**
- Flowchart generation
- Sequence diagram creation
- State diagrams
12. πŸ’» **ML Code Generation with Streamlit/Gradio/HTML5+JS**
- Code completion
- Unit test generation
- Documentation synthesis
"""
# --- Markdown to PDF Content Processing ---
def markdown_to_pdf_content(markdown_text):
lines = markdown_text.strip().split('\n')
pdf_content = []
in_list_item = False
current_item = None
sub_items = []
for line in lines:
line = line.strip()
if not line:
continue
if line.startswith('# '):
# Optionally skip main title
pass
elif line.startswith('## '):
if current_item and sub_items:
pdf_content.append([current_item, sub_items])
sub_items = []
current_item = None
section = line.replace('## ', '').strip()
pdf_content.append(f"<b>{section}</b>")
in_list_item = False
elif re.match(r'^\d+\.', line):
if current_item and sub_items:
pdf_content.append([current_item, sub_items])
sub_items = []
current_item = line.strip()
in_list_item = True
elif line.startswith('- ') and in_list_item:
sub_items.append(line.strip())
else:
if not in_list_item:
pdf_content.append(line.strip())
if current_item and sub_items:
pdf_content.append([current_item, sub_items])
mid_point = len(pdf_content) // 2
left_column = pdf_content[:mid_point]
right_column = pdf_content[mid_point:]
return left_column, right_column
# --- Main PDF Creation ---
def create_main_pdf(markdown_text, base_font_size=10, auto_size=False):
buffer = io.BytesIO()
doc = SimpleDocTemplate(
buffer,
pagesize=(A4[1], A4[0]),
leftMargin=36,
rightMargin=36,
topMargin=36,
bottomMargin=36
)
styles = getSampleStyleSheet()
story = []
spacer_height = 10
left_column, right_column = markdown_to_pdf_content(markdown_text)
total_items = 0
for col in (left_column, right_column):
for item in col:
if isinstance(item, list):
main_item, sub_items = item
total_items += 1 + len(sub_items)
else:
total_items += 1
if auto_size:
base_font_size = max(6, min(12, 200 / total_items))
item_font_size = base_font_size
subitem_font_size = base_font_size * 0.9
section_font_size = base_font_size * 1.2
title_font_size = min(16, base_font_size * 1.5)
title_style = ParagraphStyle(
'Heading1',
parent=styles['Heading1'],
fontName=registered_font_name,
textColor=colors.darkblue,
alignment=1,
fontSize=title_font_size
)
section_style = ParagraphStyle(
'SectionStyle',
parent=styles['Heading2'],
fontName=registered_font_name,
textColor=colors.darkblue,
fontSize=section_font_size,
leading=section_font_size * 1.2,
spaceAfter=2
)
item_style = ParagraphStyle(
'ItemStyle',
parent=styles['Normal'],
fontName=registered_font_name,
fontSize=item_font_size,
leading=item_font_size * 1.2,
spaceAfter=1
)
subitem_style = ParagraphStyle(
'SubItemStyle',
parent=styles['Normal'],
fontName=registered_font_name,
fontSize=subitem_font_size,
leading=subitem_font_size * 1.2,
leftIndent=10,
spaceAfter=1
)
story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style))
story.append(Spacer(1, spacer_height))
left_cells = []
for item in left_column:
if isinstance(item, str) and item.startswith('<b>'):
text = item.replace('<b>', '').replace('</b>', '')
left_cells.append(Paragraph(text, section_style))
elif isinstance(item, list):
main_item, sub_items = item
left_cells.append(Paragraph(main_item, item_style))
for sub_item in sub_items:
left_cells.append(Paragraph(sub_item, subitem_style))
else:
left_cells.append(Paragraph(item, item_style))
right_cells = []
for item in right_column:
if isinstance(item, str) and item.startswith('<b>'):
text = item.replace('<b>', '').replace('</b>', '')
right_cells.append(Paragraph(text, section_style))
elif isinstance(item, list):
main_item, sub_items = item
right_cells.append(Paragraph(main_item, item_style))
for sub_item in sub_items:
right_cells.append(Paragraph(sub_item, subitem_style))
else:
right_cells.append(Paragraph(item, item_style))
max_cells = max(len(left_cells), len(right_cells))
left_cells.extend([""] * (max_cells - len(left_cells)))
right_cells.extend([""] * (max_cells - len(right_cells)))
table_data = list(zip(left_cells, right_cells))
col_width = (A4[1] - 72) / 2.0
table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER')
table.setStyle(TableStyle([
('VALIGN', (0, 0), (-1, -1), 'TOP'),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('BACKGROUND', (0, 0), (-1, -1), colors.white),
('GRID', (0, 0), (-1, -1), 0, colors.white),
('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey),
('LEFTPADDING', (0, 0), (-1, -1), 2),
('RIGHTPADDING', (0, 0), (-1, -1), 2),
('TOPPADDING', (0, 0), (-1, -1), 1),
('BOTTOMPADDING', (0, 0), (-1, -1), 1),
]))
story.append(table)
doc.build(story)
buffer.seek(0)
return buffer.getvalue()
# --- Function to Convert PDF Bytes to Image (for Preview) ---
def pdf_to_image(pdf_bytes):
try:
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
page = doc[0]
pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
doc.close()
return img
except Exception as e:
st.error(f"Failed to render PDF preview: {e}")
return None
# --- Sidebar UI for Additional Settings ---
with st.sidebar:
auto_size = st.checkbox("Auto-size text", value=True)
if not auto_size:
base_font_size = st.slider("Base Font Size (points)", min_value=6, max_value=16, value=10, step=1)
else:
base_font_size = 10
st.info("Font size will auto-adjust between 6-12 points based on content length.")
# --- Persist Markdown Content in Session State ---
if 'markdown_content' not in st.session_state:
st.session_state.markdown_content = default_markdown
# --- Generate PDF ---
with st.spinner("Generating PDF..."):
pdf_bytes = create_main_pdf(st.session_state.markdown_content, base_font_size, auto_size)
# --- Display PDF Preview in UI ---
with st.container():
pdf_image = pdf_to_image(pdf_bytes)
if pdf_image:
st.image(pdf_image, use_container_width=True)
else:
st.info("Download the PDF to view it locally.")
# --- PDF Download Button ---
st.download_button(
label="Download PDF",
data=pdf_bytes,
file_name="ml_outline.pdf",
mime="application/pdf"
)
# --- Markdown Editor ---
edited_markdown = st.text_area(
"Modify the markdown content below:",
value=st.session_state.markdown_content,
height=300
)
# --- Update PDF on Button Click ---
if st.button("Update PDF"):
st.session_state.markdown_content = edited_markdown
st.experimental_rerun()
# --- Markdown Download Button ---
st.download_button(
label="Save Markdown",
data=st.session_state.markdown_content,
file_name="ml_outline.md",
mime="text/markdown"
)