pdffigures2 / app.py
Giacomo Vianello
Reorganize
a10d2b2 unverified
import gradio as gr
import urllib.request
import subprocess
import os
import glob
def extract_figure(url):
# download PDF file from URL
urllib.request.urlretrieve(url, "input.pdf")
# extract first figure from PDF using pdffigures2
subprocess.run(["java", "-jar", "pdffigures2.jar", "input.pdf", "-m", "figures_"])
all_pngs = glob.glob("*.png")
print(all_pngs)
# get path to first figure
figure_path = "figures_input-Figure1-1.png"
# # read first figure from file
# with open(figure_path, "rb") as f:
# figure_bytes = f.read()
# # delete downloaded file and figure file
# os.remove("input.pdf")
# os.remove(figure_path)
# return first figure
return figure_path
def run():
# define input and output interfaces
inputs = gr.inputs.Textbox(label="Enter URL of PDF file:")
outputs = gr.outputs.Image(label="First figure in PDF:", type="filepath")
# create interface
interface = gr.Interface(fn=extract_figure, inputs=inputs, outputs=outputs, title="Extract First Figure from PDF", description="Enter the URL of a PDF file and the first figure in the file will be extracted and displayed.")
# launch interface
interface.launch(server_name="0.0.0.0", server_port=7860)
if __name__ == "__main__":
run()