# Import necessary libraries import os import glob import re import pandas as pd, numpy as np import comtypes.client import docx from docx.document import Document from docx.oxml.table import CT_Tbl from docx.oxml.text.paragraph import CT_P from docx.table import _Cell, Table from docx.text.paragraph import Paragraph import tempfile import zipfile from io import BytesIO import streamlit as st # Function to convert PDF to DOCX def pdf_to_docx(pdf_file_paths, path_docx): word = comtypes.client.CreateObject('Word.Application') word.visible = 0 docx_files = [] for i, pdf_file_path in enumerate(pdf_file_paths): in_file = os.path.abspath(pdf_file_path) wdoc = word.Documents.Open(in_file) filename = os.path.basename(pdf_file_path) out_file = os.path.abspath(path_docx + filename[:-4] + '.docx') wdoc.SaveAs2(out_file, FileFormat=16) wdoc.Close() docx_files.append(out_file) word.Quit() return docx_files # Streamlit app st.title("PDF to DOCX Converter") uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True) if uploaded_files: with tempfile.TemporaryDirectory() as temp_dir: pdf_file_paths = [] for uploaded_file in uploaded_files: pdf_file_path = os.path.join(temp_dir, uploaded_file.name) with open(pdf_file_path, "wb") as f: f.write(uploaded_file.getbuffer()) pdf_file_paths.append(pdf_file_path) converted_files = pdf_to_docx(pdf_file_paths, temp_dir) if st.button("Convert PDF to DOCX"): with BytesIO() as output: with zipfile.ZipFile(output, mode="w", compression=zipfile.ZIP_DEFLATED) as archive: for docx_file in converted_files: archive.write(docx_file, os.path.basename(docx_file)) output.seek(0) st.download_button( label="Download ZIP", data=output, file_name="converted_docx.zip", mime="application/zip" )