|
import streamlit as st |
|
import base64 |
|
from reportlab.lib.pagesizes import A4 |
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle |
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle |
|
from reportlab.lib import colors |
|
import io |
|
import re |
|
|
|
|
|
ml_markdown = """# Cutting-Edge ML Outline |
|
|
|
## Core ML Techniques |
|
1. π **Mixture of Experts (MoE)** |
|
- Conditional computation techniques |
|
- Sparse gating mechanisms |
|
- Training specialized sub-models |
|
|
|
2. π₯ **Supervised Fine-Tuning (SFT) using PyTorch** |
|
- Loss function customization |
|
- Gradient accumulation strategies |
|
- Learning rate schedulers |
|
|
|
3. π€ **Large Language Models (LLM) using Transformers** |
|
- Attention mechanisms |
|
- Tokenization strategies |
|
- Position encodings |
|
|
|
## Training Methods |
|
4. π **Self-Rewarding Learning using NPS 0-10 and Verbatims** |
|
- Custom reward functions |
|
- Feedback categorization |
|
- Signal extraction from text |
|
|
|
5. π **Reinforcement Learning from Human Feedback (RLHF)** |
|
- Preference datasets |
|
- PPO implementation |
|
- KL divergence constraints |
|
|
|
6. π **MergeKit: Merging Models to Same Embedding Space** |
|
- TIES merging |
|
- Task arithmetic |
|
- SLERP interpolation |
|
|
|
## Optimization & Deployment |
|
7. π **DistillKit: Model Size Reduction with Spectrum Analysis** |
|
- Knowledge distillation |
|
- Quantization techniques |
|
- Model pruning strategies |
|
|
|
8. π§ **Agentic RAG Agents using Document Inputs** |
|
- Vector database integration |
|
- Query planning |
|
- Self-reflection mechanisms |
|
|
|
9. β³ **Longitudinal Data Summarization from Multiple Docs** |
|
- Multi-document compression |
|
- Timeline extraction |
|
- Entity tracking |
|
|
|
## Knowledge Representation |
|
10. π **Knowledge Extraction using Markdown Knowledge Graphs** |
|
- Entity recognition |
|
- Relationship mapping |
|
- Hierarchical structuring |
|
|
|
11. πΊοΈ **Knowledge Mapping with Mermaid Diagrams** |
|
- Flowchart generation |
|
- Sequence diagram creation |
|
- State diagrams |
|
|
|
12. π» **ML Code Generation with Streamlit/Gradio/HTML5+JS** |
|
- Code completion |
|
- Unit test generation |
|
- Documentation synthesis |
|
""" |
|
|
|
|
|
def markdown_to_pdf_content(markdown_text): |
|
"""Convert markdown text to a format suitable for PDF generation""" |
|
lines = markdown_text.strip().split('\n') |
|
pdf_content = [] |
|
in_list_item = False |
|
current_item = None |
|
sub_items = [] |
|
|
|
for line in lines: |
|
line = line.strip() |
|
if not line: |
|
continue |
|
|
|
if line.startswith('# '): |
|
pass |
|
elif line.startswith('## '): |
|
if current_item and sub_items: |
|
pdf_content.append([current_item, sub_items]) |
|
sub_items = [] |
|
current_item = None |
|
|
|
section = line.replace('## ', '').strip() |
|
pdf_content.append(f"<b>{section}</b>") |
|
in_list_item = False |
|
elif re.match(r'^\d+\.', line): |
|
if current_item and sub_items: |
|
pdf_content.append([current_item, sub_items]) |
|
sub_items = [] |
|
|
|
current_item = line.strip() |
|
in_list_item = True |
|
elif line.startswith('- ') and in_list_item: |
|
sub_items.append(line.strip()) |
|
else: |
|
if not in_list_item: |
|
pdf_content.append(line.strip()) |
|
|
|
if current_item and sub_items: |
|
pdf_content.append([current_item, sub_items]) |
|
|
|
mid_point = len(pdf_content) // 2 |
|
left_column = pdf_content[:mid_point] |
|
right_column = pdf_content[mid_point:] |
|
|
|
return left_column, right_column |
|
|
|
|
|
def create_main_pdf(markdown_text): |
|
"""Create a single-page landscape PDF with the outline in two columns""" |
|
buffer = io.BytesIO() |
|
doc = SimpleDocTemplate( |
|
buffer, |
|
pagesize=(A4[1], A4[0]), |
|
leftMargin=50, |
|
rightMargin=50, |
|
topMargin=50, |
|
bottomMargin=50 |
|
) |
|
|
|
styles = getSampleStyleSheet() |
|
story = [] |
|
|
|
|
|
title_style = styles['Heading1'] |
|
title_style.textColor = colors.darkblue |
|
title_style.alignment = 1 |
|
|
|
section_style = ParagraphStyle( |
|
'SectionStyle', |
|
parent=styles['Heading2'], |
|
textColor=colors.darkblue, |
|
spaceAfter=6 |
|
) |
|
|
|
item_style = ParagraphStyle( |
|
'ItemStyle', |
|
parent=styles['Normal'], |
|
fontSize=11, |
|
leading=14, |
|
fontName='Helvetica-Bold' |
|
) |
|
|
|
subitem_style = ParagraphStyle( |
|
'SubItemStyle', |
|
parent=styles['Normal'], |
|
fontSize=10, |
|
leading=12, |
|
leftIndent=20 |
|
) |
|
|
|
|
|
story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style)) |
|
story.append(Spacer(1, 20)) |
|
|
|
|
|
left_column, right_column = markdown_to_pdf_content(markdown_text) |
|
|
|
|
|
left_cells = [] |
|
for item in left_column: |
|
if isinstance(item, str) and item.startswith('<b>'): |
|
text = item.replace('<b>', '').replace('</b>', '') |
|
left_cells.append(Paragraph(text, section_style)) |
|
elif isinstance(item, list): |
|
main_item, sub_items = item |
|
left_cells.append(Paragraph(main_item, item_style)) |
|
for sub_item in sub_items: |
|
left_cells.append(Paragraph(sub_item, subitem_style)) |
|
else: |
|
left_cells.append(Paragraph(item, item_style)) |
|
|
|
right_cells = [] |
|
for item in right_column: |
|
if isinstance(item, str) and item.startswith('<b>'): |
|
text = item.replace('<b>', '').replace('</b>', '') |
|
right_cells.append(Paragraph(text, section_style)) |
|
elif isinstance(item, list): |
|
main_item, sub_items = item |
|
right_cells.append(Paragraph(main_item, item_style)) |
|
for sub_item in sub_items: |
|
right_cells.append(Paragraph(sub_item, subitem_style)) |
|
else: |
|
right_cells.append(Paragraph(item, item_style)) |
|
|
|
|
|
max_cells = max(len(left_cells), len(right_cells)) |
|
left_cells.extend([""] * (max_cells - len(left_cells))) |
|
right_cells.extend([""] * (max_cells - len(right_cells))) |
|
|
|
|
|
table_data = list(zip(left_cells, right_cells)) |
|
|
|
|
|
col_width = (A4[1] - 120) / 2.0 |
|
|
|
|
|
table = Table(table_data, colWidths=[col_width, col_width]) |
|
table.setStyle(TableStyle([ |
|
('VALIGN', (0, 0), (-1, -1), 'TOP'), |
|
('ALIGN', (0, 0), (0, -1), 'LEFT'), |
|
('ALIGN', (1, 0), (1, -1), 'LEFT'), |
|
('BACKGROUND', (0, 0), (-1, -1), colors.white), |
|
('GRID', (0, 0), (-1, -1), 0.5, colors.white), |
|
('LINEAFTER', (0, 0), (0, -1), 1, colors.grey), |
|
])) |
|
|
|
story.append(table) |
|
doc.build(story) |
|
buffer.seek(0) |
|
return buffer.getvalue() |
|
|
|
|
|
st.title("π Cutting-Edge ML Outline Generator") |
|
|
|
if st.button("Generate Main PDF"): |
|
with st.spinner("Generating PDF..."): |
|
pdf_bytes = create_main_pdf(ml_markdown) |
|
st.download_button( |
|
label="Download Main PDF", |
|
data=pdf_bytes, |
|
file_name="ml_outline.pdf", |
|
mime="application/pdf" |
|
) |
|
base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8') |
|
pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="400px" type="application/pdf">' |
|
st.markdown(pdf_display, unsafe_allow_html=True) |
|
st.success("PDF generated successfully!") |