#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse import os from pathlib import Path import shutil import tempfile import uuid import pandas as pd from project_settings import project_path from toolbox.to_markdown.base_to_markdown import BaseToMarkdown def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--filename", default=(project_path / "data/files/xlsx/可对外提供资料清单说明-V1.0版本2022.1.13.xlsx").as_posix(), # default=(project_path / "data/files/xlsx/财务部流程文档编号记录.xlsx").as_posix(), type=str ) args = parser.parse_args() return args @BaseToMarkdown.register("pandas") class ExcelToMarkdown(BaseToMarkdown): def __init__(self, filename: str): super().__init__(filename) self.excel = pd.read_excel(self.filename, sheet_name=None) def get_md_text(self) -> str: result = "" for sheet_name, df in self.excel.items(): md_text = df.to_markdown(index=False) result += f"{sheet_name}\n\n" result += f"{md_text}\n\n" return result def save_to_zip(self, output_dir: str): basename = str(uuid.uuid4()) temp_dir = Path(tempfile.gettempdir()) / basename temp_dir.mkdir(parents=True, exist_ok=False) md_file = temp_dir / f"{basename}.md" md_text = self.get_md_text() with open(md_file.as_posix(), "w", encoding="utf-8") as f: f.write(md_text) output_zip_file = os.path.join(output_dir, f"{basename}.zip") # zip self.zip_directory(temp_dir, output_zip_file) shutil.rmtree(temp_dir) return output_zip_file def main(): args = get_args() e2m = ExcelToMarkdown(args.filename) output_zip_file = e2m.save_to_zip(output_dir=".") print(output_zip_file) return if __name__ == "__main__": main()