import re from fpdf import FPDF class PDF(FPDF): def header(self): self.set_font("Arial", "B", 12) self.cell(0, 10, "", 0, 1, "C") def footer(self): self.set_y(-15) self.set_font("Arial", "I", 8) self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C") def sanitize_content(content): try: # Use 'utf-8' encoding to handle Unicode characters encoded_content = content.encode('utf-8', 'ignore').decode('utf-8') return encoded_content except UnicodeEncodeError as e: print(f"Encoding error: {e}") # Remove problematic characters using 'ascii' encoding sanitized_content = content.encode('ascii', 'ignore').decode('ascii') return sanitized_content def replace_problematic_characters(content): # Replace or remove problematic characters replacements = { '\u2013': '-', # en dash to hyphen '\u2014': '--', # en dash to double hyphen '\u2018': "'", # left single quotation mark to apostrophe '\u2019': "'", # right single quotation mark to apostrophe '\u201c': '"', # left double quotation mark to double quote '\u201d': '"', # right double quotation mark to double quote '\u2026': '...', # horizontal ellipsis '\u2010': '-', # dash '\u2022': '*', # bullet '\u2122': 'TM' # TradeMark Symbol } for char, replacement in replacements.items(): content = content.replace(char, replacement) return content def generate_pdf_from_md(content, filename='output.pdf'): try: pdf = PDF() pdf.add_page() pdf.set_auto_page_break(auto=True, margin=15) pdf.set_font('Arial', '', 12) sanitized_content = sanitize_content(content) sanitized_content = replace_problematic_characters(sanitized_content) lines = sanitized_content.split('\n') for line in lines: if line.startswith('#'): header_level = min(line.count('#'), 4) header_text = re.sub(r'\*{2,}', '', line.strip('# ').strip()) pdf.set_font('Arial', 'B', 12 + (4 - header_level) * 2) pdf.multi_cell(0, 10, header_text) pdf.set_font('Arial', '', 12) else: parts = re.split(r'(\*\*\*.*?\*\*\*|\*\*.*?\*\*|\*.*?\*|\[.*?\]\(.*?\)|\([^ ]+?\))', line) for part in parts: if re.match(r'\*\*\*.*?\*\*\*', part): # Bold Italic text = part.strip('*') pdf.set_font('Arial', 'BI', 12) pdf.write(10, text) elif re.match(r'\*\*.*?\*\*', part): # Bold text = part.strip('*') pdf.set_font('Arial', 'B', 12) pdf.write(10, text) elif re.match(r'\*.*?\*', part): # Italic text = part.strip('*') pdf.set_font('Arial', 'I', 12) pdf.write(10, text) elif re.match(r'\[.*?\]\(.*?\)', part): # Markdown-style link display_text = re.search(r'\[(.*?)\]', part).group(1) url = re.search(r'\((.*?)\)', part).group(1) pdf.set_text_color(0, 0, 255) # Set text color to blue pdf.set_font('', 'U') pdf.write(10, display_text, url) pdf.set_text_color(0, 0, 0) # Reset text color pdf.set_font('Arial', '', 12) # elif re.match(r'\([^ ]+?\)', part): # Plain URL # url = part[1:-1] # pdf.set_text_color(0, 0, 255) # Set text color to blue # pdf.set_font('', 'U') # pdf.write(10, url, url) else: pdf.write(10, part) pdf.set_text_color(0, 0, 0) # Reset text color pdf.set_font('Arial', '', 12) # Reset font pdf.ln(10) pdf.output(filename) return f"PDF generated: {filename}" except Exception as e: return f"Error generating PDF: {e}"