|
|
|
import os |
|
import glob |
|
import re |
|
import pandas as pd, numpy as np |
|
import comtypes.client |
|
import docx |
|
from docx.document import Document |
|
from docx.oxml.table import CT_Tbl |
|
from docx.oxml.text.paragraph import CT_P |
|
from docx.table import _Cell, Table |
|
from docx.text.paragraph import Paragraph |
|
import tempfile |
|
import zipfile |
|
from io import BytesIO |
|
import streamlit as st |
|
|
|
|
|
def pdf_to_docx(pdf_file_paths, path_docx): |
|
word = comtypes.client.CreateObject('Word.Application') |
|
word.visible = 0 |
|
|
|
docx_files = [] |
|
for i, pdf_file_path in enumerate(pdf_file_paths): |
|
in_file = os.path.abspath(pdf_file_path) |
|
wdoc = word.Documents.Open(in_file) |
|
filename = os.path.basename(pdf_file_path) |
|
out_file = os.path.abspath(path_docx + filename[:-4] + '.docx') |
|
wdoc.SaveAs2(out_file, FileFormat=16) |
|
wdoc.Close() |
|
docx_files.append(out_file) |
|
|
|
word.Quit() |
|
return docx_files |
|
|
|
|
|
st.title("PDF to DOCX Converter") |
|
|
|
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True) |
|
|
|
if uploaded_files: |
|
with tempfile.TemporaryDirectory() as temp_dir: |
|
pdf_file_paths = [] |
|
for uploaded_file in uploaded_files: |
|
pdf_file_path = os.path.join(temp_dir, uploaded_file.name) |
|
with open(pdf_file_path, "wb") as f: |
|
f.write(uploaded_file.getbuffer()) |
|
pdf_file_paths.append(pdf_file_path) |
|
|
|
converted_files = pdf_to_docx(pdf_file_paths, temp_dir) |
|
|
|
if st.button("Convert PDF to DOCX"): |
|
with BytesIO() as output: |
|
with zipfile.ZipFile(output, mode="w", compression=zipfile.ZIP_DEFLATED) as archive: |
|
for docx_file in converted_files: |
|
archive.write(docx_file, os.path.basename(docx_file)) |
|
output.seek(0) |
|
st.download_button( |
|
label="Download ZIP", |
|
data=output, |
|
file_name="converted_docx.zip", |
|
mime="application/zip" |
|
) |
|
|