PDFtoPPT / app.py
Razzaqi3143's picture
Create app.py
726da04 verified
import gradio as gr
from pptx import Presentation
from pptx.util import Inches
import pdfplumber
from transformers import pipeline
import tempfile
# Initialize the summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Function to process PDF and create PowerPoint
def pdf_to_pptx(pdf_file):
# Step 1: Extract text from the PDF
pdf_text = ""
with pdfplumber.open(pdf_file) as pdf:
for page in pdf.pages:
pdf_text += page.extract_text()
# Step 2: Summarize text
def chunk_text(text, max_len=1024):
return [text[i:i+max_len] for i in range(0, len(text), max_len)]
chunked_text = chunk_text(pdf_text)
summarized_text = " ".join([summarizer(chunk)[0]['summary_text'] for chunk in chunked_text])
# Step 3: Create PowerPoint presentation
presentation = Presentation()
slide_content = summarized_text.split(". ")
for i, content in enumerate(slide_content):
slide = presentation.slides.add_slide(presentation.slide_layouts[1]) # Title and Content layout
title = slide.shapes.title
body = slide.placeholders[1]
title.text = f"Slide {i+1}" # Slide title
body.text = content # Slide content
# Step 4: Save presentation to a temporary file and return path
temp_pptx = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx")
presentation.save(temp_pptx.name)
return temp_pptx.name
# Gradio app interface
def generate_pptx(pdf_file):
pptx_file_path = pdf_to_pptx(pdf_file.name)
return pptx_file_path
# Define Gradio interface
interface = gr.Interface(
fn=generate_pptx,
inputs=gr.File(label="Upload PDF File"),
outputs=gr.File(label="Download PowerPoint Presentation"),
title="PDF to PowerPoint Converter",
description="Upload a PDF file to convert it into a professional PowerPoint presentation."
)
# Launch the app
interface.launch()