Upload operator.py with huggingface_hub
Browse files- operator.py +18 -3
operator.py
CHANGED
@@ -3,6 +3,7 @@ from dataclasses import field
|
|
3 |
from typing import Any, Dict, Generator, List, Optional, Union
|
4 |
|
5 |
from .artifact import Artifact
|
|
|
6 |
from .random_utils import nested_seed
|
7 |
from .stream import MultiStream, Stream
|
8 |
|
@@ -83,9 +84,14 @@ class SourceOperator(StreamSource):
|
|
83 |
|
84 |
"""
|
85 |
|
|
|
|
|
86 |
def __call__(self) -> MultiStream:
|
87 |
with nested_seed():
|
88 |
-
|
|
|
|
|
|
|
89 |
|
90 |
@abstractmethod
|
91 |
def process(self) -> MultiStream:
|
@@ -102,8 +108,13 @@ class StreamInitializerOperator(StreamSource):
|
|
102 |
|
103 |
"""
|
104 |
|
|
|
|
|
105 |
def __call__(self, *args, **kwargs) -> MultiStream:
|
106 |
with nested_seed():
|
|
|
|
|
|
|
107 |
return self.process(*args, **kwargs)
|
108 |
|
109 |
@abstractmethod
|
@@ -118,6 +129,8 @@ class MultiStreamOperator(StreamingOperator):
|
|
118 |
A multi-stream operator is a type of `StreamingOperator` that operates on an entire MultiStream object at once. It takes a `MultiStream` as input and produces a `MultiStream` as output. The `process` method should be implemented by subclasses to define the specific operations to be performed on the input `MultiStream`.
|
119 |
"""
|
120 |
|
|
|
|
|
121 |
def __call__(self, multi_stream: Optional[MultiStream] = None) -> MultiStream:
|
122 |
with nested_seed():
|
123 |
return self._process_multi_stream(multi_stream)
|
@@ -125,6 +138,8 @@ class MultiStreamOperator(StreamingOperator):
|
|
125 |
def _process_multi_stream(self, multi_stream: Optional[MultiStream] = None) -> MultiStream:
|
126 |
result = self.process(multi_stream)
|
127 |
assert isinstance(result, MultiStream), "MultiStreamOperator must return a MultiStream"
|
|
|
|
|
128 |
return result
|
129 |
|
130 |
@abstractmethod
|
@@ -198,7 +213,7 @@ class SingleStreamReducer(StreamingOperator):
|
|
198 |
return result
|
199 |
|
200 |
@abstractmethod
|
201 |
-
def process(self, stream: Stream) ->
|
202 |
pass
|
203 |
|
204 |
|
@@ -296,7 +311,7 @@ class InstanceOperatorWithGlobalAccess(StreamingOperator):
|
|
296 |
|
297 |
if self.cache_accessible_streams:
|
298 |
for stream in self.accessible_streams.values():
|
299 |
-
stream.
|
300 |
|
301 |
for stream_name, stream in multi_stream.items():
|
302 |
stream = Stream(self.generator, gen_kwargs={"stream": stream, "multi_stream": self.accessible_streams})
|
|
|
3 |
from typing import Any, Dict, Generator, List, Optional, Union
|
4 |
|
5 |
from .artifact import Artifact
|
6 |
+
from .dataclass import NonPositionalField
|
7 |
from .random_utils import nested_seed
|
8 |
from .stream import MultiStream, Stream
|
9 |
|
|
|
84 |
|
85 |
"""
|
86 |
|
87 |
+
caching: bool = NonPositionalField(default=None)
|
88 |
+
|
89 |
def __call__(self) -> MultiStream:
|
90 |
with nested_seed():
|
91 |
+
multi_stream = self.process()
|
92 |
+
if self.caching is not None:
|
93 |
+
multi_stream.set_caching(self.caching)
|
94 |
+
return multi_stream
|
95 |
|
96 |
@abstractmethod
|
97 |
def process(self) -> MultiStream:
|
|
|
108 |
|
109 |
"""
|
110 |
|
111 |
+
caching: bool = NonPositionalField(default=None)
|
112 |
+
|
113 |
def __call__(self, *args, **kwargs) -> MultiStream:
|
114 |
with nested_seed():
|
115 |
+
multi_stream = self.process(*args, **kwargs)
|
116 |
+
if self.caching is not None:
|
117 |
+
multi_stream.set_caching(self.caching)
|
118 |
return self.process(*args, **kwargs)
|
119 |
|
120 |
@abstractmethod
|
|
|
129 |
A multi-stream operator is a type of `StreamingOperator` that operates on an entire MultiStream object at once. It takes a `MultiStream` as input and produces a `MultiStream` as output. The `process` method should be implemented by subclasses to define the specific operations to be performed on the input `MultiStream`.
|
130 |
"""
|
131 |
|
132 |
+
caching: bool = NonPositionalField(default=None)
|
133 |
+
|
134 |
def __call__(self, multi_stream: Optional[MultiStream] = None) -> MultiStream:
|
135 |
with nested_seed():
|
136 |
return self._process_multi_stream(multi_stream)
|
|
|
138 |
def _process_multi_stream(self, multi_stream: Optional[MultiStream] = None) -> MultiStream:
|
139 |
result = self.process(multi_stream)
|
140 |
assert isinstance(result, MultiStream), "MultiStreamOperator must return a MultiStream"
|
141 |
+
if self.caching is not None:
|
142 |
+
result.set_caching(self.caching)
|
143 |
return result
|
144 |
|
145 |
@abstractmethod
|
|
|
213 |
return result
|
214 |
|
215 |
@abstractmethod
|
216 |
+
def process(self, stream: Stream) -> Stream:
|
217 |
pass
|
218 |
|
219 |
|
|
|
311 |
|
312 |
if self.cache_accessible_streams:
|
313 |
for stream in self.accessible_streams.values():
|
314 |
+
stream.caching = True
|
315 |
|
316 |
for stream_name, stream in multi_stream.items():
|
317 |
stream = Stream(self.generator, gen_kwargs={"stream": stream, "multi_stream": self.accessible_streams})
|