Spaces:
Sleeping
Sleeping
import argparse | |
import json | |
import logging | |
import os | |
import tempfile | |
import zipfile | |
from datetime import datetime | |
from pathlib import Path | |
from typing import Optional, Union | |
import huggingface_hub | |
from relik.common.log import get_logger | |
from relik.common.utils import SAPIENZANLP_DATE_FORMAT, get_md5 | |
logger = get_logger(level=logging.DEBUG) | |
def create_info_file(tmpdir: Path): | |
logger.debug("Computing md5 of model.zip") | |
md5 = get_md5(tmpdir / "model.zip") | |
date = datetime.now().strftime(SAPIENZANLP_DATE_FORMAT) | |
logger.debug("Dumping info.json file") | |
with (tmpdir / "info.json").open("w") as f: | |
json.dump(dict(md5=md5, upload_date=date), f, indent=2) | |
def zip_run( | |
dir_path: Union[str, os.PathLike], | |
tmpdir: Union[str, os.PathLike], | |
zip_name: str = "model.zip", | |
) -> Path: | |
logger.debug(f"zipping {dir_path} to {tmpdir}") | |
# creates a zip version of the provided dir_path | |
run_dir = Path(dir_path) | |
zip_path = tmpdir / zip_name | |
with zipfile.ZipFile(zip_path, "w") as zip_file: | |
# fully zip the run directory maintaining its structure | |
for file in run_dir.rglob("*.*"): | |
if file.is_dir(): | |
continue | |
zip_file.write(file, arcname=file.relative_to(run_dir)) | |
return zip_path | |
def upload( | |
model_dir: Union[str, os.PathLike], | |
model_name: str, | |
organization: Optional[str] = None, | |
repo_name: Optional[str] = None, | |
commit: Optional[str] = None, | |
archive: bool = False, | |
): | |
token = huggingface_hub.HfFolder.get_token() | |
if token is None: | |
print( | |
"No HuggingFace token found. You need to execute `huggingface-cli login` first!" | |
) | |
return | |
repo_id = repo_name or model_name | |
if organization is not None: | |
repo_id = f"{organization}/{repo_id}" | |
with tempfile.TemporaryDirectory() as tmpdir: | |
api = huggingface_hub.HfApi() | |
repo_url = api.create_repo( | |
token=token, | |
repo_id=repo_id, | |
exist_ok=True, | |
) | |
repo = huggingface_hub.Repository( | |
str(tmpdir), clone_from=repo_url, use_auth_token=token | |
) | |
tmp_path = Path(tmpdir) | |
if archive: | |
# otherwise we zip the model_dir | |
logger.debug(f"Zipping {model_dir} to {tmp_path}") | |
zip_run(model_dir, tmp_path) | |
create_info_file(tmp_path) | |
else: | |
# if the user wants to upload a transformers model, we don't need to zip it | |
# we just need to copy the files to the tmpdir | |
logger.debug(f"Copying {model_dir} to {tmpdir}") | |
os.system(f"cp -r {model_dir}/* {tmpdir}") | |
# this method automatically puts large files (>10MB) into git lfs | |
repo.push_to_hub(commit_message=commit or "Automatic push from sapienzanlp") | |
def parse_args() -> argparse.Namespace: | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"model_dir", help="The directory of the model you want to upload" | |
) | |
parser.add_argument("model_name", help="The model you want to upload") | |
parser.add_argument( | |
"--organization", | |
help="the name of the organization where you want to upload the model", | |
) | |
parser.add_argument( | |
"--repo_name", | |
help="Optional name to use when uploading to the HuggingFace repository", | |
) | |
parser.add_argument( | |
"--commit", help="Commit message to use when pushing to the HuggingFace Hub" | |
) | |
parser.add_argument( | |
"--archive", | |
action="store_true", | |
help=""" | |
Whether to compress the model directory before uploading it. | |
If True, the model directory will be zipped and the zip file will be uploaded. | |
If False, the model directory will be uploaded as is.""", | |
) | |
return parser.parse_args() | |
def main(): | |
upload(**vars(parse_args())) | |
if __name__ == "__main__": | |
main() | |