|
import os |
|
import io |
|
import re |
|
import streamlit as st |
|
|
|
|
|
st.set_page_config(layout="wide", initial_sidebar_state="collapsed") |
|
|
|
from PIL import Image |
|
import fitz |
|
|
|
from reportlab.lib.pagesizes import A4 |
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle |
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle |
|
from reportlab.lib import colors |
|
from reportlab.pdfbase import pdfmetrics |
|
from reportlab.pdfbase.ttfonts import TTFont |
|
|
|
|
|
|
|
available_fonts = { |
|
"NotoEmoji Variable": "NotoEmoji-VariableFont_wght.ttf", |
|
"NotoEmoji Bold": "NotoEmoji-Bold.ttf", |
|
"NotoEmoji Light": "NotoEmoji-Light.ttf", |
|
"NotoEmoji Medium": "NotoEmoji-Medium.ttf", |
|
"NotoEmoji Regular": "NotoEmoji-Regular.ttf", |
|
"NotoEmoji SemiBold": "NotoEmoji-SemiBold.ttf" |
|
} |
|
|
|
|
|
selected_font_name = st.sidebar.selectbox( |
|
"Select NotoEmoji Font", |
|
options=list(available_fonts.keys()) |
|
) |
|
selected_font_path = available_fonts[selected_font_name] |
|
|
|
|
|
pdfmetrics.registerFont(TTFont(selected_font_name, selected_font_path)) |
|
|
|
|
|
|
|
def apply_emoji_font(text, emoji_font): |
|
|
|
emoji_pattern = re.compile( |
|
r"([\U0001F300-\U0001F5FF" |
|
r"\U0001F600-\U0001F64F" |
|
r"\U0001F680-\U0001F6FF" |
|
r"\U0001F700-\U0001F77F" |
|
r"\U0001F780-\U0001F7FF" |
|
r"\U0001F800-\U0001F8FF" |
|
r"\U0001F900-\U0001F9FF" |
|
r"\U0001FA00-\U0001FA6F" |
|
r"\U0001FA70-\U0001FAFF" |
|
r"\u2600-\u26FF" |
|
r"\u2700-\u27BF]+)" |
|
) |
|
|
|
return emoji_pattern.sub(r'<font face="{}">\1</font>'.format(emoji_font), text) |
|
|
|
|
|
|
|
default_markdown = """# Cutting-Edge ML Outline |
|
|
|
## Core ML Techniques |
|
1. π **Mixture of Experts (MoE)** |
|
- Conditional computation techniques |
|
- Sparse gating mechanisms |
|
- Training specialized sub-models |
|
|
|
2. π₯ **Supervised Fine-Tuning (SFT) using PyTorch** |
|
- Loss function customization |
|
- Gradient accumulation strategies |
|
- Learning rate schedulers |
|
|
|
3. π€ **Large Language Models (LLM) using Transformers** |
|
- Attention mechanisms |
|
- Tokenization strategies |
|
- Position encodings |
|
|
|
## Training Methods |
|
4. π **Self-Rewarding Learning using NPS 0-10 and Verbatims** |
|
- Custom reward functions |
|
- Feedback categorization |
|
- Signal extraction from text |
|
|
|
5. π **Reinforcement Learning from Human Feedback (RLHF)** |
|
- Preference datasets |
|
- PPO implementation |
|
- KL divergence constraints |
|
|
|
6. π **MergeKit: Merging Models to Same Embedding Space** |
|
- TIES merging |
|
- Task arithmetic |
|
- SLERP interpolation |
|
|
|
## Optimization & Deployment |
|
7. π **DistillKit: Model Size Reduction with Spectrum Analysis** |
|
- Knowledge distillation |
|
- Quantization techniques |
|
- Model pruning strategies |
|
|
|
8. π§ **Agentic RAG Agents using Document Inputs** |
|
- Vector database integration |
|
- Query planning |
|
- Self-reflection mechanisms |
|
|
|
9. β³ **Longitudinal Data Summarization from Multiple Docs** |
|
- Multi-document compression |
|
- Timeline extraction |
|
- Entity tracking |
|
|
|
## Knowledge Representation |
|
10. π **Knowledge Extraction using Markdown Knowledge Graphs** |
|
- Entity recognition |
|
- Relationship mapping |
|
- Hierarchical structuring |
|
|
|
11. πΊοΈ **Knowledge Mapping with Mermaid Diagrams** |
|
- Flowchart generation |
|
- Sequence diagram creation |
|
- State diagrams |
|
|
|
12. π» **ML Code Generation with Streamlit/Gradio/HTML5+JS** |
|
- Code completion |
|
- Unit test generation |
|
- Documentation synthesis |
|
""" |
|
|
|
|
|
|
|
def markdown_to_pdf_content(markdown_text): |
|
lines = markdown_text.strip().split('\n') |
|
pdf_content = [] |
|
in_list_item = False |
|
current_item = None |
|
sub_items = [] |
|
|
|
for line in lines: |
|
line = line.strip() |
|
if not line: |
|
continue |
|
|
|
if line.startswith('# '): |
|
|
|
pass |
|
elif line.startswith('## '): |
|
if current_item and sub_items: |
|
pdf_content.append([current_item, sub_items]) |
|
sub_items = [] |
|
current_item = None |
|
section = line.replace('## ', '').strip() |
|
pdf_content.append(f"<b>{section}</b>") |
|
in_list_item = False |
|
elif re.match(r'^\d+\.', line): |
|
if current_item and sub_items: |
|
pdf_content.append([current_item, sub_items]) |
|
sub_items = [] |
|
current_item = line.strip() |
|
in_list_item = True |
|
elif line.startswith('- ') and in_list_item: |
|
sub_items.append(line.strip()) |
|
else: |
|
if not in_list_item: |
|
pdf_content.append(line.strip()) |
|
|
|
if current_item and sub_items: |
|
pdf_content.append([current_item, sub_items]) |
|
|
|
mid_point = len(pdf_content) // 2 |
|
left_column = pdf_content[:mid_point] |
|
right_column = pdf_content[mid_point:] |
|
|
|
return left_column, right_column |
|
|
|
|
|
|
|
def create_main_pdf(markdown_text, base_font_size=10, auto_size=False): |
|
buffer = io.BytesIO() |
|
doc = SimpleDocTemplate( |
|
buffer, |
|
pagesize=(A4[1], A4[0]), |
|
leftMargin=36, |
|
rightMargin=36, |
|
topMargin=36, |
|
bottomMargin=36 |
|
) |
|
|
|
styles = getSampleStyleSheet() |
|
story = [] |
|
spacer_height = 10 |
|
left_column, right_column = markdown_to_pdf_content(markdown_text) |
|
|
|
total_items = 0 |
|
for col in (left_column, right_column): |
|
for item in col: |
|
if isinstance(item, list): |
|
main_item, sub_items = item |
|
total_items += 1 + len(sub_items) |
|
else: |
|
total_items += 1 |
|
|
|
if auto_size: |
|
base_font_size = max(6, min(12, 200 / total_items)) |
|
|
|
item_font_size = base_font_size |
|
subitem_font_size = base_font_size * 0.9 |
|
section_font_size = base_font_size * 1.2 |
|
title_font_size = min(16, base_font_size * 1.5) |
|
|
|
|
|
title_style = ParagraphStyle( |
|
'Heading1', |
|
parent=styles['Heading1'], |
|
fontName="Helvetica-Bold", |
|
textColor=colors.darkblue, |
|
alignment=1, |
|
fontSize=title_font_size |
|
) |
|
|
|
section_style = ParagraphStyle( |
|
'SectionStyle', |
|
parent=styles['Heading2'], |
|
fontName="Helvetica-Bold", |
|
textColor=colors.darkblue, |
|
fontSize=section_font_size, |
|
leading=section_font_size * 1.2, |
|
spaceAfter=2 |
|
) |
|
|
|
item_style = ParagraphStyle( |
|
'ItemStyle', |
|
parent=styles['Normal'], |
|
fontName="Helvetica", |
|
fontSize=item_font_size, |
|
leading=item_font_size * 1.2, |
|
spaceAfter=1 |
|
) |
|
|
|
subitem_style = ParagraphStyle( |
|
'SubItemStyle', |
|
parent=styles['Normal'], |
|
fontName="Helvetica", |
|
fontSize=subitem_font_size, |
|
leading=subitem_font_size * 1.2, |
|
leftIndent=10, |
|
spaceAfter=1 |
|
) |
|
|
|
story.append(Paragraph(apply_emoji_font("Cutting-Edge ML Outline (ReportLab)", selected_font_name), title_style)) |
|
story.append(Spacer(1, spacer_height)) |
|
|
|
left_cells = [] |
|
for item in left_column: |
|
if isinstance(item, str) and item.startswith('<b>'): |
|
|
|
text = item.replace('<b>', '').replace('</b>', '') |
|
left_cells.append(Paragraph(apply_emoji_font(text, selected_font_name), section_style)) |
|
elif isinstance(item, list): |
|
main_item, sub_items = item |
|
left_cells.append(Paragraph(apply_emoji_font(main_item, selected_font_name), item_style)) |
|
for sub_item in sub_items: |
|
left_cells.append(Paragraph(apply_emoji_font(sub_item, selected_font_name), subitem_style)) |
|
else: |
|
left_cells.append(Paragraph(apply_emoji_font(item, selected_font_name), item_style)) |
|
|
|
right_cells = [] |
|
for item in right_column: |
|
if isinstance(item, str) and item.startswith('<b>'): |
|
text = item.replace('<b>', '').replace('</b>', '') |
|
right_cells.append(Paragraph(apply_emoji_font(text, selected_font_name), section_style)) |
|
elif isinstance(item, list): |
|
main_item, sub_items = item |
|
right_cells.append(Paragraph(apply_emoji_font(main_item, selected_font_name), item_style)) |
|
for sub_item in sub_items: |
|
right_cells.append(Paragraph(apply_emoji_font(sub_item, selected_font_name), subitem_style)) |
|
else: |
|
right_cells.append(Paragraph(apply_emoji_font(item, selected_font_name), item_style)) |
|
|
|
max_cells = max(len(left_cells), len(right_cells)) |
|
left_cells.extend([""] * (max_cells - len(left_cells))) |
|
right_cells.extend([""] * (max_cells - len(right_cells))) |
|
|
|
table_data = list(zip(left_cells, right_cells)) |
|
col_width = (A4[1] - 72) / 2.0 |
|
table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER') |
|
table.setStyle(TableStyle([ |
|
('VALIGN', (0, 0), (-1, -1), 'TOP'), |
|
('ALIGN', (0, 0), (-1, -1), 'LEFT'), |
|
('BACKGROUND', (0, 0), (-1, -1), colors.white), |
|
('GRID', (0, 0), (-1, -1), 0, colors.white), |
|
('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey), |
|
('LEFTPADDING', (0, 0), (-1, -1), 2), |
|
('RIGHTPADDING', (0, 0), (-1, -1), 2), |
|
('TOPPADDING', (0, 0), (-1, -1), 1), |
|
('BOTTOMPADDING', (0, 0), (-1, -1), 1), |
|
])) |
|
|
|
story.append(table) |
|
doc.build(story) |
|
buffer.seek(0) |
|
return buffer.getvalue() |
|
|
|
|
|
|
|
def pdf_to_image(pdf_bytes): |
|
try: |
|
doc = fitz.open(stream=pdf_bytes, filetype="pdf") |
|
page = doc[0] |
|
pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0)) |
|
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) |
|
doc.close() |
|
return img |
|
except Exception as e: |
|
st.error(f"Failed to render PDF preview: {e}") |
|
return None |
|
|
|
|
|
|
|
with st.sidebar: |
|
auto_size = st.checkbox("Auto-size text", value=True) |
|
if not auto_size: |
|
base_font_size = st.slider("Base Font Size (points)", min_value=6, max_value=16, value=10, step=1) |
|
else: |
|
base_font_size = 10 |
|
st.info("Font size will auto-adjust between 6-12 points based on content length.") |
|
|
|
|
|
if 'markdown_content' not in st.session_state: |
|
st.session_state.markdown_content = default_markdown |
|
|
|
|
|
|
|
with st.spinner("Generating PDF..."): |
|
pdf_bytes = create_main_pdf(st.session_state.markdown_content, base_font_size, auto_size) |
|
|
|
|
|
with st.container(): |
|
pdf_image = pdf_to_image(pdf_bytes) |
|
if pdf_image: |
|
st.image(pdf_image, use_container_width=True) |
|
else: |
|
st.info("Download the PDF to view it locally.") |
|
|
|
|
|
st.download_button( |
|
label="Download PDF", |
|
data=pdf_bytes, |
|
file_name="ml_outline.pdf", |
|
mime="application/pdf" |
|
) |
|
|
|
|
|
edited_markdown = st.text_area( |
|
"Modify the markdown content below:", |
|
value=st.session_state.markdown_content, |
|
height=300 |
|
) |
|
|
|
|
|
if st.button("Update PDF"): |
|
st.session_state.markdown_content = edited_markdown |
|
st.experimental_rerun() |
|
|
|
|
|
st.download_button( |
|
label="Save Markdown", |
|
data=st.session_state.markdown_content, |
|
file_name="ml_outline.md", |
|
mime="text/markdown" |
|
) |
|
|