Spaces:
Sleeping
Sleeping
#!/usr/bin/python3 | |
# -*- coding: utf-8 -*- | |
import argparse | |
import os | |
from pathlib import Path | |
import shutil | |
import tempfile | |
import uuid | |
import pandas as pd | |
from project_settings import project_path | |
from toolbox.to_markdown.base_to_markdown import BaseToMarkdown | |
def get_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--filename", | |
default=(project_path / "data/files/xlsx/可对外提供资料清单说明-V1.0版本2022.1.13.xlsx").as_posix(), | |
# default=(project_path / "data/files/xlsx/财务部流程文档编号记录.xlsx").as_posix(), | |
type=str | |
) | |
args = parser.parse_args() | |
return args | |
class ExcelToMarkdown(BaseToMarkdown): | |
def __init__(self, filename: str): | |
super().__init__(filename) | |
self.excel = pd.read_excel(self.filename, sheet_name=None) | |
def get_md_text(self) -> str: | |
result = "" | |
for sheet_name, df in self.excel.items(): | |
md_text = df.to_markdown(index=False) | |
result += f"{sheet_name}\n\n" | |
result += f"{md_text}\n\n" | |
return result | |
def save_to_zip(self, output_dir: str): | |
basename = str(uuid.uuid4()) | |
temp_dir = Path(tempfile.gettempdir()) / basename | |
temp_dir.mkdir(parents=True, exist_ok=False) | |
md_file = temp_dir / f"{basename}.md" | |
md_text = self.get_md_text() | |
with open(md_file.as_posix(), "w", encoding="utf-8") as f: | |
f.write(md_text) | |
output_zip_file = os.path.join(output_dir, f"{basename}.zip") | |
# zip | |
self.zip_directory(temp_dir, output_zip_file) | |
shutil.rmtree(temp_dir) | |
return output_zip_file | |
def main(): | |
args = get_args() | |
e2m = ExcelToMarkdown(args.filename) | |
output_zip_file = e2m.save_to_zip(output_dir=".") | |
print(output_zip_file) | |
return | |
if __name__ == "__main__": | |
main() | |