File size: 2,118 Bytes
f239efc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os.path as osp
import os
import re
import multiprocessing
import functools
import huggingface_hub
from huggingface_hub import snapshot_download


def upload(repo_id, local_dir, path_in_repo, repo_type, token):
    huggingface_hub.upload_folder(
        repo_id=repo_id,
        folder_path=local_dir,
        path_in_repo=path_in_repo,
        token=token,
        repo_type=repo_type
    )

def download(repo_id, local_dir, repo_type, token, filter_re=None):
    files = huggingface_hub.list_repo_files(repo_id, repo_type=repo_type, token=token)
    if filter_re is not None:
        files = [file for file in files if re.search(filter_re, file) is not None]
    pool = multiprocessing.Pool(8)
    download_func = functools.partial(
        huggingface_hub.hf_hub_download,
        repo_id,
        repo_type=repo_type,
        local_dir=local_dir,
        local_dir_use_symlinks=True,
        token=token
    )
    pool.map(download_func, files)
    print(f'downloaded files {files}')


def upload_file(repo_id, file_path, repo_type, token):
    huggingface_hub.upload_file(
        repo_id=repo_id,
        path_or_fileobj=file_path,
        path_in_repo=file_path,
        token=token,
        repo_type=repo_type,
    )

if __name__ == '__main__':
    read_token = '...'
    write_token = '...'
    repo_id = '...'
    local_dir = '...'
    repo_type = '...'
    
    
    # #############
    # # Examples on most simple hf usage
    # # downlaod
    # filters = []
    # for filter_re in filters:
    #     download(repo_id,
    #              local_dir,
    #              repo_type,
    #              filter_re)

    # # upload
    # upload(repo_id, local_dir, local_dir, repo_type, write_token)
    # #############

    # download models
    repo_ids = [
        'ermu2001/pllava-7b',
        'ermu2001/pllava-13b',
    ]
    for repo_id in repo_ids:
        local_dir = repo_id.replace('ermu2001', 'MODELS')
        snapshot_download(
            repo_id,
            local_dir=local_dir,
            repo_type='model',
            local_dir_use_symlinks=True,
            token=read_token,
        )