|
import streamlit as st |
|
import base64 |
|
from reportlab.lib.pagesizes import A4 |
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle |
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle |
|
from reportlab.lib import colors |
|
import pikepdf |
|
import fpdf |
|
import fitz |
|
import cv2 |
|
import numpy as np |
|
from PIL import Image |
|
import io |
|
import os |
|
import re |
|
|
|
|
|
ml_markdown = """# Cutting-Edge ML Outline |
|
|
|
## Core ML Techniques |
|
1. π **Mixture of Experts (MoE)** |
|
- Conditional computation techniques |
|
- Sparse gating mechanisms |
|
- Training specialized sub-models |
|
|
|
2. π₯ **Supervised Fine-Tuning (SFT) using PyTorch** |
|
- Loss function customization |
|
- Gradient accumulation strategies |
|
- Learning rate schedulers |
|
|
|
3. π€ **Large Language Models (LLM) using Transformers** |
|
- Attention mechanisms |
|
- Tokenization strategies |
|
- Position encodings |
|
|
|
## Training Methods |
|
4. π **Self-Rewarding Learning using NPS 0-10 and Verbatims** |
|
- Custom reward functions |
|
- Feedback categorization |
|
- Signal extraction from text |
|
|
|
5. π **Reinforcement Learning from Human Feedback (RLHF)** |
|
- Preference datasets |
|
- PPO implementation |
|
- KL divergence constraints |
|
|
|
6. π **MergeKit: Merging Models to Same Embedding Space** |
|
- TIES merging |
|
- Task arithmetic |
|
- SLERP interpolation |
|
|
|
## Optimization & Deployment |
|
7. π **DistillKit: Model Size Reduction with Spectrum Analysis** |
|
- Knowledge distillation |
|
- Quantization techniques |
|
- Model pruning strategies |
|
|
|
8. π§ **Agentic RAG Agents using Document Inputs** |
|
- Vector database integration |
|
- Query planning |
|
- Self-reflection mechanisms |
|
|
|
9. β³ **Longitudinal Data Summarization from Multiple Docs** |
|
- Multi-document compression |
|
- Timeline extraction |
|
- Entity tracking |
|
|
|
## Knowledge Representation |
|
10. π **Knowledge Extraction using Markdown Knowledge Graphs** |
|
- Entity recognition |
|
- Relationship mapping |
|
- Hierarchical structuring |
|
|
|
11. πΊοΈ **Knowledge Mapping with Mermaid Diagrams** |
|
- Flowchart generation |
|
- Sequence diagram creation |
|
- State diagrams |
|
|
|
12. π» **ML Code Generation with Streamlit/Gradio/HTML5+JS** |
|
- Code completion |
|
- Unit test generation |
|
- Documentation synthesis |
|
""" |
|
|
|
|
|
def markdown_to_pdf_content(markdown_text): |
|
"""Convert markdown text to a format suitable for PDF generation""" |
|
import re |
|
|
|
|
|
lines = markdown_text.strip().split('\n') |
|
pdf_content = [] |
|
in_list_item = False |
|
current_item = None |
|
sub_items = [] |
|
|
|
for line in lines: |
|
line = line.strip() |
|
if not line: |
|
continue |
|
|
|
if line.startswith('# '): |
|
|
|
pass |
|
elif line.startswith('## '): |
|
|
|
if current_item and sub_items: |
|
|
|
pdf_content.append([current_item, sub_items]) |
|
sub_items = [] |
|
current_item = None |
|
|
|
section = line.replace('## ', '').strip() |
|
pdf_content.append(f"<b>{section}</b>") |
|
in_list_item = False |
|
elif re.match(r'^\d+\.', line): |
|
|
|
if current_item and sub_items: |
|
|
|
pdf_content.append([current_item, sub_items]) |
|
sub_items = [] |
|
|
|
current_item = line.strip() |
|
in_list_item = True |
|
elif line.startswith('- ') and in_list_item: |
|
|
|
sub_items.append(line.strip()) |
|
else: |
|
|
|
if not in_list_item: |
|
pdf_content.append(line.strip()) |
|
|
|
|
|
if current_item and sub_items: |
|
pdf_content.append([current_item, sub_items]) |
|
|
|
|
|
mid_point = len(pdf_content) // 2 |
|
left_column = pdf_content[:mid_point] |
|
right_column = pdf_content[mid_point:] |
|
|
|
return left_column, right_column |
|
|
|
|
|
def demo_pikepdf(): |
|
"""Create a two-column PDF with the markdown outline using pikepdf""" |
|
|
|
left_column, right_column = markdown_to_pdf_content(ml_markdown) |
|
|
|
|
|
doc = fitz.open() |
|
page = doc.new_page(width=842, height=595) |
|
|
|
|
|
title_font = "helv-b" |
|
section_font = "helv-b" |
|
item_font = "helv-b" |
|
subitem_font = "helv" |
|
blue_color = (0, 0, 0.8) |
|
black_color = (0, 0, 0) |
|
|
|
|
|
page.insert_text((50, 40), "Cutting-Edge ML Outline (PikePDF Demo)", fontname=title_font, fontsize=16, color=blue_color) |
|
|
|
|
|
x1, y1 = 50, 80 |
|
current_y = y1 |
|
|
|
for item in left_column: |
|
if isinstance(item, str) and item.startswith('<b>'): |
|
|
|
|
|
if current_y > y1: |
|
current_y += 10 |
|
|
|
text = item.replace('<b>', '').replace('</b>', '') |
|
page.insert_text((x1, current_y), text, fontname=section_font, fontsize=14, color=blue_color) |
|
current_y += 25 |
|
elif isinstance(item, list): |
|
|
|
main_item, sub_items = item |
|
page.insert_text((x1, current_y), main_item, fontname=item_font, fontsize=12, color=black_color) |
|
current_y += 20 |
|
|
|
|
|
for sub_item in sub_items: |
|
page.insert_text((x1 + 20, current_y), sub_item, fontname=subitem_font, fontsize=10, color=black_color) |
|
current_y += 15 |
|
|
|
current_y += 5 |
|
else: |
|
|
|
page.insert_text((x1, current_y), item, fontname=item_font, fontsize=12, color=black_color) |
|
current_y += 20 |
|
|
|
|
|
x2, y2 = 450, 80 |
|
current_y = y2 |
|
|
|
for item in right_column: |
|
if isinstance(item, str) and item.startswith('<b>'): |
|
|
|
|
|
if current_y > y2: |
|
current_y += 10 |
|
|
|
text = item.replace('<b>', '').replace('</b>', '') |
|
page.insert_text((x2, current_y), text, fontname=section_font, fontsize=14, color=blue_color) |
|
current_y += 25 |
|
elif isinstance(item, list): |
|
|
|
main_item, sub_items = item |
|
page.insert_text((x2, current_y), main_item, fontname=item_font, fontsize=12, color=black_color) |
|
current_y += 20 |
|
|
|
|
|
for sub_item in sub_items: |
|
page.insert_text((x2 + 20, current_y), sub_item, fontname=subitem_font, fontsize=10, color=black_color) |
|
current_y += 15 |
|
|
|
current_y += 5 |
|
else: |
|
|
|
page.insert_text((x2, current_y), item, fontname=item_font, fontsize=12, color=black_color) |
|
current_y += 20 |
|
|
|
|
|
page.draw_line((421, 70), (421, 550)) |
|
|
|
|
|
temp_buffer = io.BytesIO() |
|
doc.save(temp_buffer) |
|
temp_buffer.seek(0) |
|
|
|
pdf = pikepdf.Pdf.open(temp_buffer) |
|
|
|
|
|
buffer = io.BytesIO() |
|
pdf.save(buffer) |
|
buffer.seek(0) |
|
return buffer.getvalue() |
|
|
|
def demo_fpdf(): |
|
"""Create a two-column PDF with the markdown outline using FPDF""" |
|
|
|
left_column, right_column = markdown_to_pdf_content(ml_markdown) |
|
|
|
pdf = fpdf.FPDF(orientation='L') |
|
pdf.add_page() |
|
|
|
|
|
pdf.set_font("Arial", 'B', size=16) |
|
pdf.set_text_color(0, 0, 128) |
|
pdf.cell(0, 10, txt="Cutting-Edge ML Outline (FPDF Demo)", ln=True, align='C') |
|
pdf.ln(10) |
|
|
|
|
|
x_col1 = 20 |
|
x_col2 = pdf.w / 2 + 10 |
|
y_start = pdf.get_y() |
|
|
|
|
|
def render_column(items, x_start, y_start): |
|
y_pos = y_start |
|
|
|
for item in items: |
|
if isinstance(item, str) and item.startswith('<b>'): |
|
|
|
text = item.replace('<b>', '').replace('</b>', '') |
|
pdf.set_font("Arial", 'B', size=14) |
|
pdf.set_text_color(0, 0, 128) |
|
pdf.set_xy(x_start, y_pos) |
|
pdf.cell(0, 10, txt=text, ln=True) |
|
y_pos += 10 |
|
elif isinstance(item, list): |
|
|
|
main_item, sub_items = item |
|
|
|
|
|
pdf.set_font("Arial", 'B', size=11) |
|
pdf.set_text_color(0, 0, 0) |
|
pdf.set_xy(x_start, y_pos) |
|
pdf.multi_cell(180, 6, txt=main_item, align='L') |
|
main_height = pdf.get_y() - y_pos |
|
y_pos += main_height + 2 |
|
|
|
|
|
pdf.set_font("Arial", size=10) |
|
for sub_item in sub_items: |
|
pdf.set_xy(x_start + 10, y_pos) |
|
pdf.multi_cell(170, 5, txt=sub_item, align='L') |
|
sub_height = pdf.get_y() - y_pos |
|
y_pos += sub_height + 1 |
|
|
|
y_pos += 2 |
|
else: |
|
|
|
pdf.set_font("Arial", 'B', size=11) |
|
pdf.set_text_color(0, 0, 0) |
|
pdf.set_xy(x_start, y_pos) |
|
pdf.multi_cell(180, 6, txt=item, align='L') |
|
item_height = pdf.get_y() - y_pos |
|
y_pos += item_height + 4 |
|
|
|
|
|
render_column(left_column, x_col1, y_start) |
|
render_column(right_column, x_col2, y_start) |
|
|
|
|
|
pdf.line(pdf.w/2, 30, pdf.w/2, 280) |
|
|
|
buffer = io.BytesIO() |
|
pdf.output(buffer) |
|
buffer.seek(0) |
|
return buffer.getvalue() |
|
|
|
def demo_pymupdf(): |
|
"""Create a two-column PDF with the markdown outline using PyMuPDF""" |
|
|
|
left_column, right_column = markdown_to_pdf_content(ml_markdown) |
|
|
|
doc = fitz.open() |
|
page = doc.new_page(width=842, height=595) |
|
|
|
|
|
title_font = "helv-b" |
|
section_font = "helv-b" |
|
item_font = "helv-b" |
|
subitem_font = "helv" |
|
blue_color = (0, 0, 0.8) |
|
black_color = (0, 0, 0) |
|
|
|
|
|
page.insert_text((300, 40), "Cutting-Edge ML Outline (PyMuPDF Demo)", fontname=title_font, fontsize=16, color=blue_color) |
|
|
|
|
|
x1, y1 = 50, 80 |
|
current_y = y1 |
|
|
|
for item in left_column: |
|
if isinstance(item, str) and item.startswith('<b>'): |
|
|
|
|
|
if current_y > y1: |
|
current_y += 10 |
|
|
|
text = item.replace('<b>', '').replace('</b>', '') |
|
page.insert_text((x1, current_y), text, fontname=section_font, fontsize=14, color=blue_color) |
|
current_y += 25 |
|
elif isinstance(item, list): |
|
|
|
main_item, sub_items = item |
|
page.insert_text((x1, current_y), main_item, fontname=item_font, fontsize=12, color=black_color) |
|
current_y += 20 |
|
|
|
|
|
for sub_item in sub_items: |
|
page.insert_text((x1 + 20, current_y), sub_item, fontname=subitem_font, fontsize=10, color=black_color) |
|
current_y += 15 |
|
|
|
current_y += 5 |
|
else: |
|
|
|
page.insert_text((x1, current_y), item, fontname=item_font, fontsize=12, color=black_color) |
|
current_y += 20 |
|
|
|
|
|
x2, y2 = 450, 80 |
|
current_y = y2 |
|
|
|
for item in right_column: |
|
if isinstance(item, str) and item.startswith('<b>'): |
|
|
|
|
|
if current_y > y2: |
|
current_y += 10 |
|
|
|
text = item.replace('<b>', '').replace('</b>', '') |
|
page.insert_text((x2, current_y), text, fontname=section_font, fontsize=14, color=blue_color) |
|
current_y += 25 |
|
elif isinstance(item, list): |
|
|
|
main_item, sub_items = item |
|
page.insert_text((x2, current_y), main_item, fontname=item_font, fontsize=12, color=black_color) |
|
current_y += 20 |
|
|
|
|
|
for sub_item in sub_items: |
|
page.insert_text((x2 + 20, current_y), sub_item, fontname=subitem_font, fontsize=10, color=black_color) |
|
current_y += 15 |
|
|
|
current_y += 5 |
|
else: |
|
|
|
page.insert_text((x2, current_y), item, fontname=item_font, fontsize=12, color=black_color) |
|
current_y += 20 |
|
|
|
|
|
page.draw_line((421, 70), (421, 550)) |
|
|
|
buffer = io.BytesIO() |
|
doc.save(buffer) |
|
buffer.seek(0) |
|
return buffer.getvalue() |
|
|
|
|
|
def demo_image_capture(): |
|
"""Generate a demo image (fake capture) since we can't access the camera in this environment""" |
|
|
|
width, height = 640, 480 |
|
|
|
|
|
x = np.linspace(0, 1, width) |
|
y = np.linspace(0, 1, height) |
|
xx, yy = np.meshgrid(x, y) |
|
gradient = (xx + yy) / 2 |
|
|
|
|
|
img_array = (gradient * 255).astype(np.uint8) |
|
rgb_array = np.stack([img_array, img_array//2, img_array*2], axis=2) |
|
|
|
|
|
img = Image.fromarray(rgb_array) |
|
|
|
|
|
from PIL import ImageDraw, ImageFont |
|
draw = ImageDraw.Draw(img) |
|
try: |
|
font = ImageFont.truetype("arial.ttf", 30) |
|
except: |
|
font = ImageFont.load_default() |
|
|
|
draw.text((width//4, height//2), "OpenCV Demo Image", fill=(255, 255, 255), font=font) |
|
|
|
|
|
buffer = io.BytesIO() |
|
img.save(buffer, format="JPEG") |
|
buffer.seek(0) |
|
return buffer.getvalue() |
|
|
|
|
|
def create_main_pdf(markdown_text): |
|
"""Create a single-page landscape PDF with the outline in two columns""" |
|
from reportlab.platypus import Table, TableStyle, Paragraph, Spacer |
|
from reportlab.lib import pagesizes |
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle |
|
|
|
|
|
left_column, right_column = markdown_to_pdf_content(markdown_text) |
|
|
|
buffer = io.BytesIO() |
|
doc = SimpleDocTemplate( |
|
buffer, |
|
pagesize=(A4[1], A4[0]), |
|
leftMargin=50, |
|
rightMargin=50, |
|
topMargin=50, |
|
bottomMargin=50 |
|
) |
|
|
|
styles = getSampleStyleSheet() |
|
story = [] |
|
|
|
|
|
title_style = styles['Heading1'] |
|
title_style.textColor = colors.darkblue |
|
title_style.alignment = 1 |
|
|
|
section_style = ParagraphStyle( |
|
'SectionStyle', |
|
parent=styles['Heading2'], |
|
textColor=colors.darkblue, |
|
spaceAfter=6 |
|
) |
|
|
|
item_style = ParagraphStyle( |
|
'ItemStyle', |
|
parent=styles['Normal'], |
|
fontSize=11, |
|
leading=14, |
|
fontName='Helvetica-Bold' |
|
) |
|
|
|
subitem_style = ParagraphStyle( |
|
'SubItemStyle', |
|
parent=styles['Normal'], |
|
fontSize=10, |
|
leading=12, |
|
leftIndent=20 |
|
) |
|
|
|
|
|
story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style)) |
|
story.append(Spacer(1, 20)) |
|
|
|
|
|
left_cells = [] |
|
for item in left_column: |
|
if isinstance(item, str) and item.startswith('<b>'): |
|
|
|
text = item.replace('<b>', '').replace('</b>', '') |
|
left_cells.append(Paragraph(text, section_style)) |
|
elif isinstance(item, list): |
|
|
|
main_item, sub_items = item |
|
left_cells.append(Paragraph(main_item, item_style)) |
|
|
|
|
|
for sub_item in sub_items: |
|
left_cells.append(Paragraph(sub_item, subitem_style)) |
|
else: |
|
|
|
left_cells.append(Paragraph(item, item_style)) |
|
|
|
right_cells = [] |
|
for item in right_column: |
|
if isinstance(item, str) and item.startswith('<b>'): |
|
|
|
text = item.replace('<b>', '').replace('</b>', '') |
|
right_cells.append(Paragraph(text, section_style)) |
|
elif isinstance(item, list): |
|
|
|
main_item, sub_items = item |
|
right_cells.append(Paragraph(main_item, item_style)) |
|
|
|
|
|
for sub_item in sub_items: |
|
right_cells.append(Paragraph(sub_item, subitem_style)) |
|
else: |
|
|
|
right_cells.append(Paragraph(item, item_style)) |
|
|
|
|
|
max_cells = max(len(left_cells), len(right_cells)) |
|
if len(left_cells) < max_cells: |
|
for i in range(max_cells - len(left_cells)): |
|
left_cells.append("") |
|
if len(right_cells) < max_cells: |
|
for i in range(max_cells - len(right_cells)): |
|
right_cells.append("") |
|
|
|
|
|
table_data = [] |
|
for i in range(max_cells): |
|
table_data.append([left_cells[i], right_cells[i]]) |
|
|
|
|
|
col_width = (A4[1] - 120) / 2.0 |
|
|
|
|
|
table = Table(table_data, colWidths=[col_width, col_width]) |
|
|
|
|
|
table.setStyle(TableStyle([ |
|
('VALIGN', (0, 0), (-1, -1), 'TOP'), |
|
('ALIGN', (0, 0), (0, -1), 'LEFT'), |
|
('ALIGN', (1, 0), (1, -1), 'LEFT'), |
|
('BACKGROUND', (0, 0), (-1, -1), colors.white), |
|
('GRID', (0, 0), (-1, -1), 0.5, colors.white), |
|
('LINEAFTER', (0, 0), (0, -1), 1, colors.grey), |
|
])) |
|
|
|
story.append(table) |
|
|
|
doc.build(story) |
|
buffer.seek(0) |
|
return buffer.getvalue() |
|
|
|
def get_binary_file_downloader_html(bin_data, file_label='File'): |
|
"""Create a download link for binary data""" |
|
bin_str = base64.b64encode(bin_data).decode() |
|
href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{file_label}">Download {file_label}</a>' |
|
return href |
|
|
|
|
|
st.title("π Cutting-Edge ML Outline Generator") |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.header("π Markdown Outline") |
|
|
|
|
|
st.markdown(ml_markdown) |
|
|
|
|
|
st.download_button( |
|
label="Download Markdown", |
|
data=ml_markdown, |
|
file_name="ml_outline.md", |
|
mime="text/markdown" |
|
) |
|
|
|
|
|
with st.expander("View Markdown Source"): |
|
st.code(ml_markdown, language="markdown") |
|
|
|
with col2: |
|
st.header("π PDF Preview & Demos") |
|
|
|
|
|
st.subheader("Library Demos") |
|
|
|
|
|
if st.button("Generate PikePDF Demo"): |
|
with st.spinner("Generating PikePDF demo..."): |
|
pike_pdf = demo_pikepdf() |
|
st.download_button("Download PikePDF Demo", pike_pdf, "pikepdf_demo.pdf") |
|
st.success("PikePDF demo created successfully!") |
|
st.info("This PDF contains the multilevel markdown outline in a two-column layout.") |
|
|
|
|
|
if st.button("Generate FPDF Demo"): |
|
with st.spinner("Generating FPDF demo..."): |
|
fpdf_pdf = demo_fpdf() |
|
st.download_button("Download FPDF Demo", fpdf_pdf, "fpdf_demo.pdf") |
|
st.success("FPDF demo created successfully!") |
|
st.info("This PDF contains the multilevel markdown outline in a two-column layout.") |
|
|
|
|
|
if st.button("Generate PyMuPDF Demo"): |
|
with st.spinner("Generating PyMuPDF demo..."): |
|
pymupdf_pdf = demo_pymupdf() |
|
st.download_button("Download PyMuPDF Demo", pymupdf_pdf, "pymupdf_demo.pdf") |
|
st.success("PyMuPDF demo created successfully!") |
|
st.info("This PDF contains the multilevel markdown outline in a two-column layout.") |
|
|
|
|
|
if st.button("Generate Demo Image"): |
|
with st.spinner("Generating demo image..."): |
|
img_data = demo_image_capture() |
|
st.image(img_data, caption="Demo Image (Camera simulation)") |
|
|
|
|
|
st.download_button( |
|
label="Download Image", |
|
data=img_data, |
|
file_name="demo_image.jpg", |
|
mime="image/jpeg" |
|
) |
|
|
|
|
|
st.subheader("Main Outline PDF") |
|
if st.button("Generate Main PDF"): |
|
with st.spinner("Generating PDF..."): |
|
try: |
|
pdf_bytes = create_main_pdf(ml_markdown) |
|
|
|
st.download_button( |
|
label="Download Main PDF", |
|
data=pdf_bytes, |
|
file_name="ml_outline.pdf", |
|
mime="application/pdf" |
|
) |
|
|
|
|
|
base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8') |
|
pdf_display = f''' |
|
<embed |
|
src="data:application/pdf;base64,{base64_pdf}" |
|
width="100%" |
|
height="400px" |
|
type="application/pdf"> |
|
''' |
|
st.markdown(pdf_display, unsafe_allow_html=True) |
|
|
|
st.success("PDF generated successfully! The PDF displays the multilevel markdown outline in a two-column layout.") |
|
except Exception as e: |
|
st.error(f"Error generating PDF: {str(e)}") |
|
|
|
|
|
with st.expander("View PDF Rendering Code"): |
|
st.code(""" |
|
# Process multilevel markdown for PDF output |
|
def markdown_to_pdf_content(markdown_text): |
|
# Convert markdown headers to styled text for PDF |
|
lines = markdown_text.strip().split('\\n') |
|
pdf_content = [] |
|
|
|
for line in lines: |
|
if line.startswith('# '): |
|
# Main header - will be handled separately |
|
pass |
|
elif line.startswith('## '): |
|
# Section header - add as a bold item |
|
section = line.replace('## ', '').strip() |
|
pdf_content.append(f"<b>{section}</b>") |
|
elif re.match(r'^\\d+\\.', line): |
|
# Numbered list item |
|
item = line.strip() |
|
pdf_content.append(item) |
|
elif line.startswith('- '): |
|
# Sub-item under a numbered list item |
|
sub_item = line.strip() |
|
pdf_content.append(" " + sub_item) |
|
|
|
# Split the content for two columns |
|
mid_point = len(pdf_content) // 2 |
|
left_column = pdf_content[:mid_point] |
|
right_column = pdf_content[mid_point:] |
|
|
|
return left_column, right_column |
|
""", language="python") |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.stButton>button { |
|
background-color: #4CAF50; |
|
color: white; |
|
font-weight: bold; |
|
} |
|
.stTabs [data-baseweb="tab-list"] { |
|
gap: 2px; |
|
} |
|
.stTabs [data-baseweb="tab"] { |
|
height: 50px; |
|
white-space: pre-wrap; |
|
background-color: #f0f2f6; |
|
border-radius: 4px 4px 0px 0px; |
|
gap: 1px; |
|
padding-top: 10px; |
|
padding-bottom: 10px; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |