File size: 2,138 Bytes
3fc6766
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574f5b4
 
3fc6766
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Import necessary libraries
import os
import glob
import re
import pandas as pd, numpy as np
import comtypes.client
import docx
from docx.document import Document
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph
import tempfile
import zipfile
from io import BytesIO
import streamlit as st

# Function to convert PDF to DOCX
def pdf_to_docx(pdf_file_paths, path_docx):
    word = comtypes.client.CreateObject('Word.Application')
    word.visible = 0

    docx_files = []
    for i, pdf_file_path in enumerate(pdf_file_paths):
        in_file = os.path.abspath(pdf_file_path)
        wdoc = word.Documents.Open(in_file)
        filename = os.path.basename(pdf_file_path)
        out_file = os.path.abspath(path_docx + filename[:-4] + '.docx')
        wdoc.SaveAs2(out_file, FileFormat=16)
        wdoc.Close()
        docx_files.append(out_file)

    word.Quit()
    return docx_files

# Streamlit app
st.title("PDF to DOCX Converter")

uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)

if uploaded_files:
    with tempfile.TemporaryDirectory() as temp_dir:
        pdf_file_paths = []
        for uploaded_file in uploaded_files:
            pdf_file_path = os.path.join(temp_dir, uploaded_file.name)
            with open(pdf_file_path, "wb") as f:
                f.write(uploaded_file.getbuffer())
            pdf_file_paths.append(pdf_file_path)

        converted_files = pdf_to_docx(pdf_file_paths, temp_dir)

        if st.button("Convert PDF to DOCX"):
            with BytesIO() as output:
                with zipfile.ZipFile(output, mode="w", compression=zipfile.ZIP_DEFLATED) as archive:
                    for docx_file in converted_files:
                        archive.write(docx_file, os.path.basename(docx_file))
                output.seek(0)
                st.download_button(
                    label="Download ZIP",
                    data=output,
                    file_name="converted_docx.zip",
                    mime="application/zip"
                )