awacke1 commited on
Commit
15f6774
Β·
verified Β·
1 Parent(s): c283503

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -164
app.py CHANGED
@@ -1,135 +1,9 @@
1
- import streamlit as st
2
- import base64
3
- from reportlab.lib.pagesizes import A4
4
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
5
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
6
- from reportlab.lib import colors
7
- import io
8
- import re
9
-
10
- # Define the ML outline as a markdown string
11
- ml_markdown = """# Cutting-Edge ML Outline
12
-
13
- ## Core ML Techniques
14
- 1. 🌟 **Mixture of Experts (MoE)**
15
- - Conditional computation techniques
16
- - Sparse gating mechanisms
17
- - Training specialized sub-models
18
-
19
- 2. πŸ”₯ **Supervised Fine-Tuning (SFT) using PyTorch**
20
- - Loss function customization
21
- - Gradient accumulation strategies
22
- - Learning rate schedulers
23
-
24
- 3. πŸ€– **Large Language Models (LLM) using Transformers**
25
- - Attention mechanisms
26
- - Tokenization strategies
27
- - Position encodings
28
-
29
- ## Training Methods
30
- 4. πŸ“Š **Self-Rewarding Learning using NPS 0-10 and Verbatims**
31
- - Custom reward functions
32
- - Feedback categorization
33
- - Signal extraction from text
34
-
35
- 5. πŸ‘ **Reinforcement Learning from Human Feedback (RLHF)**
36
- - Preference datasets
37
- - PPO implementation
38
- - KL divergence constraints
39
-
40
- 6. πŸ”— **MergeKit: Merging Models to Same Embedding Space**
41
- - TIES merging
42
- - Task arithmetic
43
- - SLERP interpolation
44
-
45
- ## Optimization & Deployment
46
- 7. πŸ“ **DistillKit: Model Size Reduction with Spectrum Analysis**
47
- - Knowledge distillation
48
- - Quantization techniques
49
- - Model pruning strategies
50
-
51
- 8. 🧠 **Agentic RAG Agents using Document Inputs**
52
- - Vector database integration
53
- - Query planning
54
- - Self-reflection mechanisms
55
-
56
- 9. ⏳ **Longitudinal Data Summarization from Multiple Docs**
57
- - Multi-document compression
58
- - Timeline extraction
59
- - Entity tracking
60
-
61
- ## Knowledge Representation
62
- 10. πŸ“‘ **Knowledge Extraction using Markdown Knowledge Graphs**
63
- - Entity recognition
64
- - Relationship mapping
65
- - Hierarchical structuring
66
-
67
- 11. πŸ—ΊοΈ **Knowledge Mapping with Mermaid Diagrams**
68
- - Flowchart generation
69
- - Sequence diagram creation
70
- - State diagrams
71
-
72
- 12. πŸ’» **ML Code Generation with Streamlit/Gradio/HTML5+JS**
73
- - Code completion
74
- - Unit test generation
75
- - Documentation synthesis
76
- """
77
-
78
- # Process multilevel markdown for PDF output
79
- def markdown_to_pdf_content(markdown_text):
80
- """Convert markdown text to a format suitable for PDF generation"""
81
- lines = markdown_text.strip().split('\n')
82
- pdf_content = []
83
- in_list_item = False
84
- current_item = None
85
- sub_items = []
86
-
87
- for line in lines:
88
- line = line.strip()
89
- if not line:
90
- continue
91
-
92
- if line.startswith('# '):
93
- pass
94
- elif line.startswith('## '):
95
- if current_item and sub_items:
96
- pdf_content.append([current_item, sub_items])
97
- sub_items = []
98
- current_item = None
99
-
100
- section = line.replace('## ', '').strip()
101
- pdf_content.append(f"<b>{section}</b>")
102
- in_list_item = False
103
- elif re.match(r'^\d+\.', line):
104
- if current_item and sub_items:
105
- pdf_content.append([current_item, sub_items])
106
- sub_items = []
107
-
108
- current_item = line.strip()
109
- in_list_item = True
110
- elif line.startswith('- ') and in_list_item:
111
- sub_items.append(line.strip())
112
- else:
113
- if not in_list_item:
114
- pdf_content.append(line.strip())
115
-
116
- if current_item and sub_items:
117
- pdf_content.append([current_item, sub_items])
118
-
119
- mid_point = len(pdf_content) // 2
120
- left_column = pdf_content[:mid_point]
121
- right_column = pdf_content[mid_point:]
122
-
123
- return left_column, right_column
124
-
125
- # Main PDF creation using ReportLab
126
  def create_main_pdf(markdown_text):
127
- """Create a single-page landscape PDF with the outline in two columns"""
128
  buffer = io.BytesIO()
129
  doc = SimpleDocTemplate(
130
  buffer,
131
  pagesize=(A4[1], A4[0]), # Landscape A4: 841.89 x 595.27 points
132
- leftMargin=36, # Reduced margins to maximize content area
133
  rightMargin=36,
134
  topMargin=36,
135
  bottomMargin=36
@@ -138,25 +12,31 @@ def create_main_pdf(markdown_text):
138
  styles = getSampleStyleSheet()
139
  story = []
140
 
141
- # Available height for content (excluding title and spacer)
142
- page_height = A4[0] - 72 # Total height minus top and bottom margins
143
- title_height = 20 # Approximate height of title
144
- spacer_height = 10 # Reduced spacer
145
  available_content_height = page_height - title_height - spacer_height
146
 
147
- # Count total items for dynamic sizing
148
  left_column, right_column = markdown_to_pdf_content(markdown_text)
149
- total_items = sum(1 + (len(sub_items) if isinstance(item, list) else 0)
150
- for col in (left_column, right_column)
151
- for item in col)
152
 
153
- # Dynamic font sizes based on content length
154
- base_font_size = max(6, min(11, 200 / total_items)) # Between 6 and 11
 
 
 
 
 
 
 
 
 
 
155
  item_font_size = base_font_size
156
  subitem_font_size = base_font_size * 0.9
157
  section_font_size = base_font_size * 1.2
158
 
159
- # Create custom styles with dynamic sizes
160
  title_style = styles['Heading1']
161
  title_style.textColor = colors.darkblue
162
  title_style.alignment = 1
@@ -228,8 +108,8 @@ def create_main_pdf(markdown_text):
228
  # Create table data
229
  table_data = list(zip(left_cells, right_cells))
230
 
231
- # Calculate column widths (maximize usable width)
232
- col_width = (A4[1] - 72) / 2.0 # Total width minus margins divided by 2
233
 
234
  # Create and style table
235
  table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER')
@@ -237,36 +117,15 @@ def create_main_pdf(markdown_text):
237
  ('VALIGN', (0, 0), (-1, -1), 'TOP'),
238
  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
239
  ('BACKGROUND', (0, 0), (-1, -1), colors.white),
240
- ('GRID', (0, 0), (-1, -1), 0, colors.white), # Remove grid lines
241
- ('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey), # Center divider
242
  ('LEFTPADDING', (0, 0), (-1, -1), 2),
243
  ('RIGHTPADDING', (0, 0), (-1, -1), 2),
244
  ('TOPPADDING', (0, 0), (-1, -1), 1),
245
  ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
246
  ]))
247
 
248
- # Ensure table fits within available height
249
- table.spaceBefore = 0
250
- table.spaceAfter = 0
251
-
252
  story.append(table)
253
  doc.build(story)
254
  buffer.seek(0)
255
- return buffer.getvalue()
256
-
257
- # Streamlit UI
258
- st.title("πŸš€ Cutting-Edge ML Outline Generator")
259
-
260
- if st.button("Generate Main PDF"):
261
- with st.spinner("Generating PDF..."):
262
- pdf_bytes = create_main_pdf(ml_markdown)
263
- st.download_button(
264
- label="Download Main PDF",
265
- data=pdf_bytes,
266
- file_name="ml_outline.pdf",
267
- mime="application/pdf"
268
- )
269
- base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
270
- pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="400px" type="application/pdf">'
271
- st.markdown(pdf_display, unsafe_allow_html=True)
272
- st.success("PDF generated successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def create_main_pdf(markdown_text):
 
2
  buffer = io.BytesIO()
3
  doc = SimpleDocTemplate(
4
  buffer,
5
  pagesize=(A4[1], A4[0]), # Landscape A4: 841.89 x 595.27 points
6
+ leftMargin=36,
7
  rightMargin=36,
8
  topMargin=36,
9
  bottomMargin=36
 
12
  styles = getSampleStyleSheet()
13
  story = []
14
 
15
+ page_height = A4[0] - 72
16
+ title_height = 20
17
+ spacer_height = 10
 
18
  available_content_height = page_height - title_height - spacer_height
19
 
20
+ # Process columns first
21
  left_column, right_column = markdown_to_pdf_content(markdown_text)
 
 
 
22
 
23
+ # Calculate total items by explicitly handling the unpacking
24
+ total_items = 0
25
+ for col in (left_column, right_column):
26
+ for item in col:
27
+ if isinstance(item, list):
28
+ main_item, sub_items = item # Unpack here
29
+ total_items += 1 + len(sub_items)
30
+ else:
31
+ total_items += 1
32
+
33
+ # Dynamic font sizes
34
+ base_font_size = max(6, min(11, 200 / total_items))
35
  item_font_size = base_font_size
36
  subitem_font_size = base_font_size * 0.9
37
  section_font_size = base_font_size * 1.2
38
 
39
+ # Create custom styles
40
  title_style = styles['Heading1']
41
  title_style.textColor = colors.darkblue
42
  title_style.alignment = 1
 
108
  # Create table data
109
  table_data = list(zip(left_cells, right_cells))
110
 
111
+ # Calculate column widths
112
+ col_width = (A4[1] - 72) / 2.0
113
 
114
  # Create and style table
115
  table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER')
 
117
  ('VALIGN', (0, 0), (-1, -1), 'TOP'),
118
  ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
119
  ('BACKGROUND', (0, 0), (-1, -1), colors.white),
120
+ ('GRID', (0, 0), (-1, -1), 0, colors.white),
121
+ ('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey),
122
  ('LEFTPADDING', (0, 0), (-1, -1), 2),
123
  ('RIGHTPADDING', (0, 0), (-1, -1), 2),
124
  ('TOPPADDING', (0, 0), (-1, -1), 1),
125
  ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
126
  ]))
127
 
 
 
 
 
128
  story.append(table)
129
  doc.build(story)
130
  buffer.seek(0)
131
+ return buffer.getvalue()