File size: 4,034 Bytes
b72ab63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os

import pygit2

from fsspec.spec import AbstractFileSystem

from .memory import MemoryFile


class GitFileSystem(AbstractFileSystem):
    """Browse the files of a local git repo at any hash/tag/branch

    (experimental backend)
    """

    root_marker = ""
    cachable = True

    def __init__(self, path=None, fo=None, ref=None, **kwargs):
        """

        Parameters
        ----------
        path: str (optional)
            Local location of the repo (uses current directory if not given).
            May be deprecated in favour of ``fo``. When used with a higher
            level function such as fsspec.open(), may be of the form
            "git://[path-to-repo[:]][ref@]path/to/file" (but the actual
            file path should not contain "@" or ":").
        fo: str (optional)
            Same as ``path``, but passed as part of a chained URL. This one
            takes precedence if both are given.
        ref: str (optional)
            Reference to work with, could be a hash, tag or branch name. Defaults
            to current working tree. Note that ``ls`` and ``open`` also take hash,
            so this becomes the default for those operations
        kwargs
        """
        super().__init__(**kwargs)
        self.repo = pygit2.Repository(fo or path or os.getcwd())
        self.ref = ref or "master"

    @classmethod
    def _strip_protocol(cls, path):
        path = super()._strip_protocol(path).lstrip("/")
        if ":" in path:
            path = path.split(":", 1)[1]
        if "@" in path:
            path = path.split("@", 1)[1]
        return path.lstrip("/")

    def _path_to_object(self, path, ref):
        comm, ref = self.repo.resolve_refish(ref or self.ref)
        parts = path.split("/")
        tree = comm.tree
        for part in parts:
            if part and isinstance(tree, pygit2.Tree):
                tree = tree[part]
        return tree

    @staticmethod
    def _get_kwargs_from_urls(path):
        if path.startswith("git://"):
            path = path[6:]
        out = {}
        if ":" in path:
            out["path"], path = path.split(":", 1)
        if "@" in path:
            out["ref"], path = path.split("@", 1)
        return out

    def ls(self, path, detail=True, ref=None, **kwargs):
        path = self._strip_protocol(path)
        tree = self._path_to_object(path, ref)
        if isinstance(tree, pygit2.Tree):
            out = []
            for obj in tree:
                if isinstance(obj, pygit2.Tree):
                    out.append(
                        {
                            "type": "directory",
                            "name": "/".join([path, obj.name]).lstrip("/"),
                            "hex": obj.hex,
                            "mode": f"{obj.filemode:o}",
                            "size": 0,
                        }
                    )
                else:
                    out.append(
                        {
                            "type": "file",
                            "name": "/".join([path, obj.name]).lstrip("/"),
                            "hex": obj.hex,
                            "mode": f"{obj.filemode:o}",
                            "size": obj.size,
                        }
                    )
        else:
            obj = tree
            out = [
                {
                    "type": "file",
                    "name": obj.name,
                    "hex": obj.hex,
                    "mode": f"{obj.filemode:o}",
                    "size": obj.size,
                }
            ]
        if detail:
            return out
        return [o["name"] for o in out]

    def ukey(self, path, ref=None):
        return self.info(path, ref=ref)["hex"]

    def _open(
        self,
        path,
        mode="rb",
        block_size=None,
        autocommit=True,
        cache_options=None,
        ref=None,
        **kwargs,
    ):
        obj = self._path_to_object(path, ref or self.ref)
        return MemoryFile(data=obj.data)