Spaces:
Sleeping
Sleeping
import streamlit as st | |
import base64 | |
from reportlab.lib.pagesizes import A4 | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib import colors | |
import pikepdf | |
import fpdf | |
import fitz # pymupdf | |
import cv2 | |
import numpy as np | |
from PIL import Image | |
import io | |
import os | |
import re | |
# Define the ML outline as a markdown string for multilevel content | |
ml_markdown = """# Cutting-Edge ML Outline | |
## Core ML Techniques | |
1. π **Mixture of Experts (MoE)** | |
- Conditional computation techniques | |
- Sparse gating mechanisms | |
- Training specialized sub-models | |
2. π₯ **Supervised Fine-Tuning (SFT) using PyTorch** | |
- Loss function customization | |
- Gradient accumulation strategies | |
- Learning rate schedulers | |
3. π€ **Large Language Models (LLM) using Transformers** | |
- Attention mechanisms | |
- Tokenization strategies | |
- Position encodings | |
## Training Methods | |
4. π **Self-Rewarding Learning using NPS 0-10 and Verbatims** | |
- Custom reward functions | |
- Feedback categorization | |
- Signal extraction from text | |
5. π **Reinforcement Learning from Human Feedback (RLHF)** | |
- Preference datasets | |
- PPO implementation | |
- KL divergence constraints | |
6. π **MergeKit: Merging Models to Same Embedding Space** | |
- TIES merging | |
- Task arithmetic | |
- SLERP interpolation | |
## Optimization & Deployment | |
7. π **DistillKit: Model Size Reduction with Spectrum Analysis** | |
- Knowledge distillation | |
- Quantization techniques | |
- Model pruning strategies | |
8. π§ **Agentic RAG Agents using Document Inputs** | |
- Vector database integration | |
- Query planning | |
- Self-reflection mechanisms | |
9. β³ **Longitudinal Data Summarization from Multiple Docs** | |
- Multi-document compression | |
- Timeline extraction | |
- Entity tracking | |
## Knowledge Representation | |
10. π **Knowledge Extraction using Markdown Knowledge Graphs** | |
- Entity recognition | |
- Relationship mapping | |
- Hierarchical structuring | |
11. πΊοΈ **Knowledge Mapping with Mermaid Diagrams** | |
- Flowchart generation | |
- Sequence diagram creation | |
- State diagrams | |
12. π» **ML Code Generation with Streamlit/Gradio/HTML5+JS** | |
- Code completion | |
- Unit test generation | |
- Documentation synthesis | |
""" | |
# Process multilevel markdown for PDF output | |
def markdown_to_pdf_content(markdown_text): | |
"""Convert markdown text to a format suitable for PDF generation""" | |
import re | |
# Convert markdown headers to styled text for PDF | |
lines = markdown_text.strip().split('\n') | |
pdf_content = [] | |
in_list_item = False | |
current_item = None | |
sub_items = [] | |
for line in lines: | |
line = line.strip() | |
if not line: | |
continue | |
if line.startswith('# '): | |
# Main header - will be handled separately in the PDF generation | |
pass | |
elif line.startswith('## '): | |
# Section header - add as a bold item | |
if current_item and sub_items: | |
# Store the previous item with its sub-items | |
pdf_content.append([current_item, sub_items]) | |
sub_items = [] | |
current_item = None | |
section = line.replace('## ', '').strip() | |
pdf_content.append(f"<b>{section}</b>") | |
in_list_item = False | |
elif re.match(r'^\d+\.', line): | |
# Numbered list item | |
if current_item and sub_items: | |
# Store the previous item with its sub-items | |
pdf_content.append([current_item, sub_items]) | |
sub_items = [] | |
current_item = line.strip() | |
in_list_item = True | |
elif line.startswith('- ') and in_list_item: | |
# Sub-item under a numbered list item | |
sub_items.append(line.strip()) | |
else: | |
# Regular line | |
if not in_list_item: | |
pdf_content.append(line.strip()) | |
# Add the last item if there is one | |
if current_item and sub_items: | |
pdf_content.append([current_item, sub_items]) | |
# Split the content for two columns | |
mid_point = len(pdf_content) // 2 | |
left_column = pdf_content[:mid_point] | |
right_column = pdf_content[mid_point:] | |
return left_column, right_column | |
# Demo functions for PDF libraries | |
def demo_pikepdf(): | |
"""Create a two-column PDF with the markdown outline using pikepdf""" | |
# Process markdown content | |
left_column, right_column = markdown_to_pdf_content(ml_markdown) | |
# We'll use pymupdf (fitz) to create the content, then save with pikepdf | |
doc = fitz.open() | |
page = doc.new_page(width=842, height=595) # A4 Landscape | |
# Set up fonts and colors | |
title_font = "helv-b" | |
section_font = "helv-b" | |
item_font = "helv-b" | |
subitem_font = "helv" | |
blue_color = (0, 0, 0.8) | |
black_color = (0, 0, 0) | |
# Add title | |
page.insert_text((50, 40), "Cutting-Edge ML Outline (PikePDF Demo)", fontname=title_font, fontsize=16, color=blue_color) | |
# First column | |
x1, y1 = 50, 80 | |
current_y = y1 | |
for item in left_column: | |
if isinstance(item, str) and item.startswith('<b>'): | |
# Section header | |
# Add extra space before sections (except the first one) | |
if current_y > y1: | |
current_y += 10 | |
text = item.replace('<b>', '').replace('</b>', '') | |
page.insert_text((x1, current_y), text, fontname=section_font, fontsize=14, color=blue_color) | |
current_y += 25 | |
elif isinstance(item, list): | |
# Main item with sub-items | |
main_item, sub_items = item | |
page.insert_text((x1, current_y), main_item, fontname=item_font, fontsize=12, color=black_color) | |
current_y += 20 | |
# Add sub-items | |
for sub_item in sub_items: | |
page.insert_text((x1 + 20, current_y), sub_item, fontname=subitem_font, fontsize=10, color=black_color) | |
current_y += 15 | |
current_y += 5 # Extra space after a group | |
else: | |
# Regular item | |
page.insert_text((x1, current_y), item, fontname=item_font, fontsize=12, color=black_color) | |
current_y += 20 | |
# Second column | |
x2, y2 = 450, 80 | |
current_y = y2 | |
for item in right_column: | |
if isinstance(item, str) and item.startswith('<b>'): | |
# Section header | |
# Add extra space before sections (except the first one) | |
if current_y > y2: | |
current_y += 10 | |
text = item.replace('<b>', '').replace('</b>', '') | |
page.insert_text((x2, current_y), text, fontname=section_font, fontsize=14, color=blue_color) | |
current_y += 25 | |
elif isinstance(item, list): | |
# Main item with sub-items | |
main_item, sub_items = item | |
page.insert_text((x2, current_y), main_item, fontname=item_font, fontsize=12, color=black_color) | |
current_y += 20 | |
# Add sub-items | |
for sub_item in sub_items: | |
page.insert_text((x2 + 20, current_y), sub_item, fontname=subitem_font, fontsize=10, color=black_color) | |
current_y += 15 | |
current_y += 5 # Extra space after a group | |
else: | |
# Regular item | |
page.insert_text((x2, current_y), item, fontname=item_font, fontsize=12, color=black_color) | |
current_y += 20 | |
# Draw a dividing line | |
page.draw_line((421, 70), (421, 550)) | |
# Convert to pikepdf | |
temp_buffer = io.BytesIO() | |
doc.save(temp_buffer) | |
temp_buffer.seek(0) | |
pdf = pikepdf.Pdf.open(temp_buffer) | |
# Save to buffer | |
buffer = io.BytesIO() | |
pdf.save(buffer) | |
buffer.seek(0) | |
return buffer.getvalue() | |
def demo_fpdf(): | |
"""Create a two-column PDF with the markdown outline using FPDF""" | |
# Process markdown content | |
left_column, right_column = markdown_to_pdf_content(ml_markdown) | |
pdf = fpdf.FPDF(orientation='L') # Landscape | |
pdf.add_page() | |
# Set title | |
pdf.set_font("Arial", 'B', size=16) | |
pdf.set_text_color(0, 0, 128) # Dark blue | |
pdf.cell(0, 10, txt="Cutting-Edge ML Outline (FPDF Demo)", ln=True, align='C') | |
pdf.ln(10) | |
# Define coordinates for columns | |
x_col1 = 20 | |
x_col2 = pdf.w / 2 + 10 | |
y_start = pdf.get_y() | |
# Function to render a column | |
def render_column(items, x_start, y_start): | |
y_pos = y_start | |
for item in items: | |
if isinstance(item, str) and item.startswith('<b>'): | |
# Section header | |
text = item.replace('<b>', '').replace('</b>', '') | |
pdf.set_font("Arial", 'B', size=14) | |
pdf.set_text_color(0, 0, 128) # Dark blue | |
pdf.set_xy(x_start, y_pos) | |
pdf.cell(0, 10, txt=text, ln=True) | |
y_pos += 10 | |
elif isinstance(item, list): | |
# Main item with sub-items | |
main_item, sub_items = item | |
# Main item | |
pdf.set_font("Arial", 'B', size=11) | |
pdf.set_text_color(0, 0, 0) # Black | |
pdf.set_xy(x_start, y_pos) | |
pdf.multi_cell(180, 6, txt=main_item, align='L') | |
main_height = pdf.get_y() - y_pos | |
y_pos += main_height + 2 | |
# Sub-items | |
pdf.set_font("Arial", size=10) | |
for sub_item in sub_items: | |
pdf.set_xy(x_start + 10, y_pos) | |
pdf.multi_cell(170, 5, txt=sub_item, align='L') | |
sub_height = pdf.get_y() - y_pos | |
y_pos += sub_height + 1 | |
y_pos += 2 # Extra space after a group | |
else: | |
# Regular item | |
pdf.set_font("Arial", 'B', size=11) | |
pdf.set_text_color(0, 0, 0) # Black | |
pdf.set_xy(x_start, y_pos) | |
pdf.multi_cell(180, 6, txt=item, align='L') | |
item_height = pdf.get_y() - y_pos | |
y_pos += item_height + 4 | |
# Render both columns | |
render_column(left_column, x_col1, y_start) | |
render_column(right_column, x_col2, y_start) | |
# Draw a dividing line | |
pdf.line(pdf.w/2, 30, pdf.w/2, 280) | |
buffer = io.BytesIO() | |
pdf.output(buffer) | |
buffer.seek(0) | |
return buffer.getvalue() | |
def demo_pymupdf(): | |
"""Create a two-column PDF with the markdown outline using PyMuPDF""" | |
# Process markdown content | |
left_column, right_column = markdown_to_pdf_content(ml_markdown) | |
doc = fitz.open() | |
page = doc.new_page(width=842, height=595) # A4 Landscape | |
# Set up fonts and colors | |
title_font = "helv-b" | |
section_font = "helv-b" | |
item_font = "helv-b" | |
subitem_font = "helv" | |
blue_color = (0, 0, 0.8) | |
black_color = (0, 0, 0) | |
# Add title | |
page.insert_text((300, 40), "Cutting-Edge ML Outline (PyMuPDF Demo)", fontname=title_font, fontsize=16, color=blue_color) | |
# First column | |
x1, y1 = 50, 80 | |
current_y = y1 | |
for item in left_column: | |
if isinstance(item, str) and item.startswith('<b>'): | |
# Section header | |
# Add extra space before sections (except the first one) | |
if current_y > y1: | |
current_y += 10 | |
text = item.replace('<b>', '').replace('</b>', '') | |
page.insert_text((x1, current_y), text, fontname=section_font, fontsize=14, color=blue_color) | |
current_y += 25 | |
elif isinstance(item, list): | |
# Main item with sub-items | |
main_item, sub_items = item | |
page.insert_text((x1, current_y), main_item, fontname=item_font, fontsize=12, color=black_color) | |
current_y += 20 | |
# Add sub-items | |
for sub_item in sub_items: | |
page.insert_text((x1 + 20, current_y), sub_item, fontname=subitem_font, fontsize=10, color=black_color) | |
current_y += 15 | |
current_y += 5 # Extra space after a group | |
else: | |
# Regular item | |
page.insert_text((x1, current_y), item, fontname=item_font, fontsize=12, color=black_color) | |
current_y += 20 | |
# Second column | |
x2, y2 = 450, 80 | |
current_y = y2 | |
for item in right_column: | |
if isinstance(item, str) and item.startswith('<b>'): | |
# Section header | |
# Add extra space before sections (except the first one) | |
if current_y > y2: | |
current_y += 10 | |
text = item.replace('<b>', '').replace('</b>', '') | |
page.insert_text((x2, current_y), text, fontname=section_font, fontsize=14, color=blue_color) | |
current_y += 25 | |
elif isinstance(item, list): | |
# Main item with sub-items | |
main_item, sub_items = item | |
page.insert_text((x2, current_y), main_item, fontname=item_font, fontsize=12, color=black_color) | |
current_y += 20 | |
# Add sub-items | |
for sub_item in sub_items: | |
page.insert_text((x2 + 20, current_y), sub_item, fontname=subitem_font, fontsize=10, color=black_color) | |
current_y += 15 | |
current_y += 5 # Extra space after a group | |
else: | |
# Regular item | |
page.insert_text((x2, current_y), item, fontname=item_font, fontsize=12, color=black_color) | |
current_y += 20 | |
# Draw a dividing line | |
page.draw_line((421, 70), (421, 550)) | |
buffer = io.BytesIO() | |
doc.save(buffer) | |
buffer.seek(0) | |
return buffer.getvalue() | |
# Demo function for image capture | |
def demo_image_capture(): | |
"""Generate a demo image (fake capture) since we can't access the camera in this environment""" | |
# Create a simple gradient image using numpy and PIL | |
width, height = 640, 480 | |
# Create a gradient array | |
x = np.linspace(0, 1, width) | |
y = np.linspace(0, 1, height) | |
xx, yy = np.meshgrid(x, y) | |
gradient = (xx + yy) / 2 | |
# Convert to RGB image | |
img_array = (gradient * 255).astype(np.uint8) | |
rgb_array = np.stack([img_array, img_array//2, img_array*2], axis=2) | |
# Create PIL Image | |
img = Image.fromarray(rgb_array) | |
# Add text to the image | |
from PIL import ImageDraw, ImageFont | |
draw = ImageDraw.Draw(img) | |
try: | |
font = ImageFont.truetype("arial.ttf", 30) | |
except: | |
font = ImageFont.load_default() | |
draw.text((width//4, height//2), "OpenCV Demo Image", fill=(255, 255, 255), font=font) | |
# Save to buffer | |
buffer = io.BytesIO() | |
img.save(buffer, format="JPEG") | |
buffer.seek(0) | |
return buffer.getvalue() | |
# Main PDF creation using ReportLab | |
def create_main_pdf(markdown_text): | |
"""Create a single-page landscape PDF with the outline in two columns""" | |
from reportlab.platypus import Table, TableStyle, Paragraph, Spacer | |
from reportlab.lib import pagesizes | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
# Process markdown content | |
left_column, right_column = markdown_to_pdf_content(markdown_text) | |
buffer = io.BytesIO() | |
doc = SimpleDocTemplate( | |
buffer, | |
pagesize=(A4[1], A4[0]), # Landscape | |
leftMargin=50, | |
rightMargin=50, | |
topMargin=50, | |
bottomMargin=50 | |
) | |
styles = getSampleStyleSheet() | |
story = [] | |
# Create custom styles | |
title_style = styles['Heading1'] | |
title_style.textColor = colors.darkblue | |
title_style.alignment = 1 # Center alignment | |
section_style = ParagraphStyle( | |
'SectionStyle', | |
parent=styles['Heading2'], | |
textColor=colors.darkblue, | |
spaceAfter=6 | |
) | |
item_style = ParagraphStyle( | |
'ItemStyle', | |
parent=styles['Normal'], | |
fontSize=11, | |
leading=14, | |
fontName='Helvetica-Bold' | |
) | |
subitem_style = ParagraphStyle( | |
'SubItemStyle', | |
parent=styles['Normal'], | |
fontSize=10, | |
leading=12, | |
leftIndent=20 | |
) | |
# Add title | |
story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style)) | |
story.append(Spacer(1, 20)) | |
# Prepare data for table | |
left_cells = [] | |
for item in left_column: | |
if isinstance(item, str) and item.startswith('<b>'): | |
# Section header | |
text = item.replace('<b>', '').replace('</b>', '') | |
left_cells.append(Paragraph(text, section_style)) | |
elif isinstance(item, list): | |
# Main item with sub-items | |
main_item, sub_items = item | |
left_cells.append(Paragraph(main_item, item_style)) | |
# Sub items | |
for sub_item in sub_items: | |
left_cells.append(Paragraph(sub_item, subitem_style)) | |
else: | |
# Regular item | |
left_cells.append(Paragraph(item, item_style)) | |
right_cells = [] | |
for item in right_column: | |
if isinstance(item, str) and item.startswith('<b>'): | |
# Section header | |
text = item.replace('<b>', '').replace('</b>', '') | |
right_cells.append(Paragraph(text, section_style)) | |
elif isinstance(item, list): | |
# Main item with sub-items | |
main_item, sub_items = item | |
right_cells.append(Paragraph(main_item, item_style)) | |
# Sub items | |
for sub_item in sub_items: | |
right_cells.append(Paragraph(sub_item, subitem_style)) | |
else: | |
# Regular item | |
right_cells.append(Paragraph(item, item_style)) | |
# Make sure both columns have the same number of rows by adding empty cells | |
max_cells = max(len(left_cells), len(right_cells)) | |
if len(left_cells) < max_cells: | |
for i in range(max_cells - len(left_cells)): | |
left_cells.append("") | |
if len(right_cells) < max_cells: | |
for i in range(max_cells - len(right_cells)): | |
right_cells.append("") | |
# Create table data (one row per cell) | |
table_data = [] | |
for i in range(max_cells): | |
table_data.append([left_cells[i], right_cells[i]]) | |
# Calculate column widths | |
col_width = (A4[1] - 120) / 2.0 # Page width minus margins divided by 2 | |
# Create the table with the data | |
table = Table(table_data, colWidths=[col_width, col_width]) | |
# Style the table | |
table.setStyle(TableStyle([ | |
('VALIGN', (0, 0), (-1, -1), 'TOP'), | |
('ALIGN', (0, 0), (0, -1), 'LEFT'), | |
('ALIGN', (1, 0), (1, -1), 'LEFT'), | |
('BACKGROUND', (0, 0), (-1, -1), colors.white), | |
('GRID', (0, 0), (-1, -1), 0.5, colors.white), | |
('LINEAFTER', (0, 0), (0, -1), 1, colors.grey), | |
])) | |
story.append(table) | |
doc.build(story) | |
buffer.seek(0) | |
return buffer.getvalue() | |
def get_binary_file_downloader_html(bin_data, file_label='File'): | |
"""Create a download link for binary data""" | |
bin_str = base64.b64encode(bin_data).decode() | |
href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{file_label}">Download {file_label}</a>' | |
return href | |
# Streamlit UI | |
st.title("π Cutting-Edge ML Outline Generator") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.header("π Markdown Outline") | |
# Display the markdown content | |
st.markdown(ml_markdown) | |
# Create a download button for the markdown file | |
st.download_button( | |
label="Download Markdown", | |
data=ml_markdown, | |
file_name="ml_outline.md", | |
mime="text/markdown" | |
) | |
# Show the markdown source code in an expandable section | |
with st.expander("View Markdown Source"): | |
st.code(ml_markdown, language="markdown") | |
with col2: | |
st.header("π PDF Preview & Demos") | |
# Library Demos | |
st.subheader("Library Demos") | |
if st.button("Run PDF Library Demos"): | |
with st.spinner("Running demos..."): | |
# Create tabs for each demo | |
demo_tabs = st.tabs(["PikePDF", "FPDF", "PyMuPDF", "Image Demo"]) | |
with demo_tabs[0]: | |
# pikepdf demo | |
pike_pdf = demo_pikepdf() | |
st.download_button("Download pikepdf Demo", pike_pdf, "pikepdf_demo.pdf") | |
st.write("PikePDF demo created successfully!") | |
st.info("This PDF contains the multilevel markdown outline in a two-column layout.") | |
with demo_tabs[1]: | |
# fpdf demo | |
fpdf_pdf = demo_fpdf() | |
st.download_button("Download fpdf Demo", fpdf_pdf, "fpdf_demo.pdf") | |
st.write("FPDF demo created successfully!") | |
st.info("This PDF contains the multilevel markdown outline in a two-column layout.") | |
with demo_tabs[2]: | |
# pymupdf demo | |
pymupdf_pdf = demo_pymupdf() | |
st.download_button("Download pymupdf Demo", pymupdf_pdf, "pymupdf_demo.pdf") | |
st.write("PyMuPDF demo created successfully!") | |
st.info("This PDF contains the multilevel markdown outline in a two-column layout.") | |
with demo_tabs[3]: | |
# Image demo | |
img_data = demo_image_capture() | |
st.image(img_data, caption="Demo Image (Camera simulation)") | |
# Main PDF Generation | |
st.subheader("Main Outline PDF") | |
if st.button("Generate Main PDF"): | |
with st.spinner("Generating PDF..."): | |
try: | |
pdf_bytes = create_main_pdf(ml_markdown) | |
st.download_button( | |
label="Download Main PDF", | |
data=pdf_bytes, | |
file_name="ml_outline.pdf", | |
mime="application/pdf" | |
) | |
# Display the PDF in the app | |
base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8') | |
pdf_display = f''' | |
<embed | |
src="data:application/pdf;base64,{base64_pdf}" | |
width="100%" | |
height="400px" | |
type="application/pdf"> | |
''' | |
st.markdown(pdf_display, unsafe_allow_html=True) | |
st.success("PDF generated successfully! The PDF displays the multilevel markdown outline in a two-column layout.") | |
except Exception as e: | |
st.error(f"Error generating PDF: {str(e)}") | |
# Show the PDF rendering code in an expandable section | |
with st.expander("View PDF Rendering Code"): | |
st.code(""" | |
# Process multilevel markdown for PDF output | |
def markdown_to_pdf_content(markdown_text): | |
# Convert markdown headers to styled text for PDF | |
lines = markdown_text.strip().split('\\n') | |
pdf_content = [] | |
for line in lines: | |
if line.startswith('# '): | |
# Main header - will be handled separately | |
pass | |
elif line.startswith('## '): | |
# Section header - add as a bold item | |
section = line.replace('## ', '').strip() | |
pdf_content.append(f"<b>{section}</b>") | |
elif re.match(r'^\\d+\\.', line): | |
# Numbered list item | |
item = line.strip() | |
pdf_content.append(item) | |
elif line.startswith('- '): | |
# Sub-item under a numbered list item | |
sub_item = line.strip() | |
pdf_content.append(" " + sub_item) | |
# Split the content for two columns | |
mid_point = len(pdf_content) // 2 | |
left_column = pdf_content[:mid_point] | |
right_column = pdf_content[mid_point:] | |
return left_column, right_column | |
""", language="python") | |
# Add custom CSS for better appearance | |
st.markdown(""" | |
<style> | |
.stButton>button { | |
background-color: #4CAF50; | |
color: white; | |
font-weight: bold; | |
} | |
.stTabs [data-baseweb="tab-list"] { | |
gap: 2px; | |
} | |
.stTabs [data-baseweb="tab"] { | |
height: 50px; | |
white-space: pre-wrap; | |
background-color: #f0f2f6; | |
border-radius: 4px 4px 0px 0px; | |
gap: 1px; | |
padding-top: 10px; | |
padding-bottom: 10px; | |
} | |
</style> | |
""", unsafe_allow_html=True) |