Spaces:

awacke1
/

Pillow-PyMuPDF-ReportLab

Running

App Files Files Community

awacke1 commited on Apr 1

Commit

15f6774

verified ·

1 Parent(s): c283503

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -164

app.py CHANGED Viewed

@@ -1,135 +1,9 @@
-import streamlit as st
-import base64
-from reportlab.lib.pagesizes import A4
-from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
-from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
-from reportlab.lib import colors
-import io
-import re
-# Define the ML outline as a markdown string
-ml_markdown = """# Cutting-Edge ML Outline
-## Core ML Techniques
-1. 🌟 **Mixture of Experts (MoE)**
-   - Conditional computation techniques
-   - Sparse gating mechanisms
-   - Training specialized sub-models
-2. 🔥 **Supervised Fine-Tuning (SFT) using PyTorch**
-   - Loss function customization
-   - Gradient accumulation strategies
-   - Learning rate schedulers
-3. 🤖 **Large Language Models (LLM) using Transformers**
-   - Attention mechanisms
-   - Tokenization strategies
-   - Position encodings
-## Training Methods
-4. 📊 **Self-Rewarding Learning using NPS 0-10 and Verbatims**
-   - Custom reward functions
-   - Feedback categorization
-   - Signal extraction from text
-5. 👍 **Reinforcement Learning from Human Feedback (RLHF)**
-   - Preference datasets
-   - PPO implementation
-   - KL divergence constraints
-6. 🔗 **MergeKit: Merging Models to Same Embedding Space**
-   - TIES merging
-   - Task arithmetic
-   - SLERP interpolation
-## Optimization & Deployment
-7. 📏 **DistillKit: Model Size Reduction with Spectrum Analysis**
-   - Knowledge distillation
-   - Quantization techniques
-   - Model pruning strategies
-8. 🧠 **Agentic RAG Agents using Document Inputs**
-   - Vector database integration
-   - Query planning
-   - Self-reflection mechanisms
-9. ⏳ **Longitudinal Data Summarization from Multiple Docs**
-   - Multi-document compression
-   - Timeline extraction
-   - Entity tracking
-## Knowledge Representation
-10. 📑 **Knowledge Extraction using Markdown Knowledge Graphs**
-    - Entity recognition
-    - Relationship mapping
-    - Hierarchical structuring
-11. 🗺️ **Knowledge Mapping with Mermaid Diagrams**
-    - Flowchart generation
-    - Sequence diagram creation
-    - State diagrams
-12. 💻 **ML Code Generation with Streamlit/Gradio/HTML5+JS**
-    - Code completion
-    - Unit test generation
-    - Documentation synthesis
-"""
-# Process multilevel markdown for PDF output
-def markdown_to_pdf_content(markdown_text):
-    """Convert markdown text to a format suitable for PDF generation"""
-    lines = markdown_text.strip().split('\n')
-    pdf_content = []
-    in_list_item = False
-    current_item = None
-    sub_items = []
-    for line in lines:
-        line = line.strip()
-        if not line:
-            continue
-        if line.startswith('# '):
-            pass
-        elif line.startswith('## '):
-            if current_item and sub_items:
-                pdf_content.append([current_item, sub_items])
-                sub_items = []
-                current_item = None
-            section = line.replace('## ', '').strip()
-            pdf_content.append(f"<b>{section}</b>")
-            in_list_item = False
-        elif re.match(r'^\d+\.', line):
-            if current_item and sub_items:
-                pdf_content.append([current_item, sub_items])
-                sub_items = []
-            current_item = line.strip()
-            in_list_item = True
-        elif line.startswith('- ') and in_list_item:
-            sub_items.append(line.strip())
-        else:
-            if not in_list_item:
-                pdf_content.append(line.strip())
-    if current_item and sub_items:
-        pdf_content.append([current_item, sub_items])
-    mid_point = len(pdf_content) // 2
-    left_column = pdf_content[:mid_point]
-    right_column = pdf_content[mid_point:]
-    return left_column, right_column
-# Main PDF creation using ReportLab
 def create_main_pdf(markdown_text):
-    """Create a single-page landscape PDF with the outline in two columns"""
     buffer = io.BytesIO()
     doc = SimpleDocTemplate(
         buffer,
         pagesize=(A4[1], A4[0]),  # Landscape A4: 841.89 x 595.27 points
-        leftMargin=36,  # Reduced margins to maximize content area
         rightMargin=36,
         topMargin=36,
         bottomMargin=36
@@ -138,25 +12,31 @@ def create_main_pdf(markdown_text):
     styles = getSampleStyleSheet()
     story = []
-    # Available height for content (excluding title and spacer)
-    page_height = A4[0] - 72  # Total height minus top and bottom margins
-    title_height = 20  # Approximate height of title
-    spacer_height = 10  # Reduced spacer
     available_content_height = page_height - title_height - spacer_height
-    # Count total items for dynamic sizing
     left_column, right_column = markdown_to_pdf_content(markdown_text)
-    total_items = sum(1 + (len(sub_items) if isinstance(item, list) else 0)
-                     for col in (left_column, right_column)
-                     for item in col)
-    # Dynamic font sizes based on content length
-    base_font_size = max(6, min(11, 200 / total_items))  # Between 6 and 11
     item_font_size = base_font_size
     subitem_font_size = base_font_size * 0.9
     section_font_size = base_font_size * 1.2
-    # Create custom styles with dynamic sizes
     title_style = styles['Heading1']
     title_style.textColor = colors.darkblue
     title_style.alignment = 1
@@ -228,8 +108,8 @@ def create_main_pdf(markdown_text):
     # Create table data
     table_data = list(zip(left_cells, right_cells))
-    # Calculate column widths (maximize usable width)
-    col_width = (A4[1] - 72) / 2.0  # Total width minus margins divided by 2
     # Create and style table
     table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER')
@@ -237,36 +117,15 @@ def create_main_pdf(markdown_text):
         ('VALIGN', (0, 0), (-1, -1), 'TOP'),
         ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
         ('BACKGROUND', (0, 0), (-1, -1), colors.white),
-        ('GRID', (0, 0), (-1, -1), 0, colors.white),  # Remove grid lines
-        ('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey),  # Center divider
         ('LEFTPADDING', (0, 0), (-1, -1), 2),
         ('RIGHTPADDING', (0, 0), (-1, -1), 2),
         ('TOPPADDING', (0, 0), (-1, -1), 1),
         ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
     ]))
-    # Ensure table fits within available height
-    table.spaceBefore = 0
-    table.spaceAfter = 0
     story.append(table)
     doc.build(story)
     buffer.seek(0)
-    return buffer.getvalue()
-# Streamlit UI
-st.title("🚀 Cutting-Edge ML Outline Generator")
-if st.button("Generate Main PDF"):
-    with st.spinner("Generating PDF..."):
-        pdf_bytes = create_main_pdf(ml_markdown)
-        st.download_button(
-            label="Download Main PDF",
-            data=pdf_bytes,
-            file_name="ml_outline.pdf",
-            mime="application/pdf"
-        )
-        base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
-        pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="400px" type="application/pdf">'
-        st.markdown(pdf_display, unsafe_allow_html=True)
-        st.success("PDF generated successfully!")

 def create_main_pdf(markdown_text):
     buffer = io.BytesIO()
     doc = SimpleDocTemplate(
         buffer,
         pagesize=(A4[1], A4[0]),  # Landscape A4: 841.89 x 595.27 points
+        leftMargin=36,
         rightMargin=36,
         topMargin=36,
         bottomMargin=36
     styles = getSampleStyleSheet()
     story = []
+    page_height = A4[0] - 72
+    title_height = 20
+    spacer_height = 10
     available_content_height = page_height - title_height - spacer_height
+    # Process columns first
     left_column, right_column = markdown_to_pdf_content(markdown_text)
+    # Calculate total items by explicitly handling the unpacking
+    total_items = 0
+    for col in (left_column, right_column):
+        for item in col:
+            if isinstance(item, list):
+                main_item, sub_items = item  # Unpack here
+                total_items += 1 + len(sub_items)
+            else:
+                total_items += 1
+    # Dynamic font sizes
+    base_font_size = max(6, min(11, 200 / total_items))
     item_font_size = base_font_size
     subitem_font_size = base_font_size * 0.9
     section_font_size = base_font_size * 1.2
+    # Create custom styles
     title_style = styles['Heading1']
     title_style.textColor = colors.darkblue
     title_style.alignment = 1
     # Create table data
     table_data = list(zip(left_cells, right_cells))
+    # Calculate column widths
+    col_width = (A4[1] - 72) / 2.0
     # Create and style table
     table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER')
         ('VALIGN', (0, 0), (-1, -1), 'TOP'),
         ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
         ('BACKGROUND', (0, 0), (-1, -1), colors.white),
+        ('GRID', (0, 0), (-1, -1), 0, colors.white),
+        ('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey),
         ('LEFTPADDING', (0, 0), (-1, -1), 2),
         ('RIGHTPADDING', (0, 0), (-1, -1), 2),
         ('TOPPADDING', (0, 0), (-1, -1), 1),
         ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
     ]))
     story.append(table)
     doc.build(story)
     buffer.seek(0)
+    return buffer.getvalue()