File size: 1,928 Bytes
e94100d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import os
from pathlib import Path
import shutil
import tempfile
import uuid

import pandas as pd

from project_settings import project_path
from toolbox.to_markdown.base_to_markdown import BaseToMarkdown


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--filename",
        default=(project_path / "data/files/xlsx/可对外提供资料清单说明-V1.0版本2022.1.13.xlsx").as_posix(),
        # default=(project_path / "data/files/xlsx/财务部流程文档编号记录.xlsx").as_posix(),
        type=str
    )
    args = parser.parse_args()
    return args


@BaseToMarkdown.register("pandas")
class ExcelToMarkdown(BaseToMarkdown):
    def __init__(self, filename: str):
        super().__init__(filename)
        self.excel = pd.read_excel(self.filename, sheet_name=None)

    def get_md_text(self) -> str:
        result = ""
        for sheet_name, df in self.excel.items():
            md_text = df.to_markdown(index=False)

            result += f"{sheet_name}\n\n"
            result += f"{md_text}\n\n"

        return result

    def save_to_zip(self, output_dir: str):
        basename = str(uuid.uuid4())

        temp_dir = Path(tempfile.gettempdir()) / basename
        temp_dir.mkdir(parents=True, exist_ok=False)

        md_file = temp_dir / f"{basename}.md"
        md_text = self.get_md_text()
        with open(md_file.as_posix(), "w", encoding="utf-8") as f:
            f.write(md_text)
        output_zip_file = os.path.join(output_dir, f"{basename}.zip")

        # zip
        self.zip_directory(temp_dir, output_zip_file)
        shutil.rmtree(temp_dir)
        return output_zip_file


def main():
    args = get_args()

    e2m = ExcelToMarkdown(args.filename)

    output_zip_file = e2m.save_to_zip(output_dir=".")
    print(output_zip_file)
    return


if __name__ == "__main__":
    main()