Spaces:

unitxt
/

metric

Running

App Files Files Community

Elron commited on Jan 5

Commit

24df49f

verified ·

1 Parent(s): 88c61d3

Upload folder using huggingface_hub

Browse files

Files changed (30) hide show

README.md +1 -1
api.py +10 -10
benchmark.py +2 -2
blocks.py +1 -1
card.py +11 -10
dataset_utils.py +2 -2
deprecation_utils.py +14 -9
dialog_operators.py +22 -12
error_utils.py +12 -8
image_operators.py +7 -5
inference.py +86 -48
llm_as_judge.py +23 -23
llm_as_judge_constants.py +5 -5
llm_as_judge_utils.py +3 -1
loaders.py +72 -76
metric_utils.py +6 -9
metrics.py +66 -43
operator.py +34 -17
operators.py +144 -124
processors.py +13 -0
schema.py +3 -0
settings_utils.py +2 -1
span_lableing_operators.py +9 -10
splitters.py +31 -34
standard.py +318 -117
struct_data_operators.py +1 -1
task.py +24 -17
templates.py +33 -31
type_utils.py +3 -0
version.py +1 -1

README.md CHANGED Viewed

@@ -30,7 +30,7 @@ In the dynamic landscape of generative NLP, traditional text processing pipeline
 ![license](https://img.shields.io/github/license/ibm/unitxt)
 ![python](https://img.shields.io/badge/python-3.8%20|%203.9-blue)
 ![tests](https://img.shields.io/github/actions/workflow/status/ibm/unitxt/library_tests.yml?branch=main&label=tests)
-[![codecov](https://codecov.io/gh/IBM/unitxt/branch/main/graph/badge.svg?token=mlrWq9cwz3)](https://codecov.io/gh/IBM/unitxt)
 ![Read the Docs](https://img.shields.io/readthedocs/unitxt)
 [![downloads](https://static.pepy.tech/personalized-badge/unitxt?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/unitxt)

 ![license](https://img.shields.io/github/license/ibm/unitxt)
 ![python](https://img.shields.io/badge/python-3.8%20|%203.9-blue)
 ![tests](https://img.shields.io/github/actions/workflow/status/ibm/unitxt/library_tests.yml?branch=main&label=tests)
+[![Coverage Status](https://coveralls.io/repos/github/IBM/unitxt/badge.svg)](https://coveralls.io/github/IBM/unitxt)
 ![Read the Docs](https://img.shields.io/readthedocs/unitxt)
 [![downloads](https://static.pepy.tech/personalized-badge/unitxt?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/unitxt)

api.py CHANGED Viewed

@@ -18,7 +18,7 @@ from .metric_utils import EvaluationResults, _compute, _inference_post_process
 from .operator import SourceOperator
 from .schema import UNITXT_DATASET_SCHEMA, loads_instance
 from .settings_utils import get_constants, get_settings
-from .standard import StandardRecipe
 from .task import Task
 logger = get_logger()
@@ -35,7 +35,7 @@ def load(source: Union[SourceOperator, str]):
     return source().to_dataset()
-def _get_recipe_from_query(dataset_query: str) -> StandardRecipe:
     dataset_query = dataset_query.replace("sys_prompt", "instruction")
     try:
         dataset_stream, _ = fetch_artifact(dataset_query)
@@ -44,14 +44,14 @@ def _get_recipe_from_query(dataset_query: str) -> StandardRecipe:
     return dataset_stream
-def _get_recipe_from_dict(dataset_params: Dict[str, Any]) -> StandardRecipe:
-    recipe_attributes = list(StandardRecipe.__dict__["__fields__"].keys())
     for param in dataset_params.keys():
         assert param in recipe_attributes, (
-            f"The parameter '{param}' is not an attribute of the 'StandardRecipe' class. "
             f"Please check if the name is correct. The available attributes are: '{recipe_attributes}'."
         )
-    return StandardRecipe(**dataset_params)
 def _verify_dataset_args(dataset_query: Optional[str] = None, dataset_args=None):
@@ -76,8 +76,8 @@ def _verify_dataset_args(dataset_query: Optional[str] = None, dataset_args=None)
         )
-def load_recipe(dataset_query: Optional[str] = None, **kwargs) -> StandardRecipe:
-    if isinstance(dataset_query, StandardRecipe):
         return dataset_query
     _verify_dataset_args(dataset_query, kwargs)
@@ -230,7 +230,7 @@ def infer(
     return_data: bool = False,
     return_log_probs: bool = False,
     return_meta_data: bool = False,
-    previous_messages: Optional[list[dict[str, str]]] = None,
     **kwargs,
 ):
     dataset = produce(instance_or_instances, dataset_query, **kwargs)
@@ -283,7 +283,7 @@ def select(
     engine: OptionSelectingByLogProbsInferenceEngine,
     dataset_query: Optional[str] = None,
     return_data: bool = False,
-    previous_messages: Optional[list[dict[str, str]]] = None,
     **kwargs,
 ):
     dataset = produce(instance_or_instances, dataset_query, **kwargs)

 from .operator import SourceOperator
 from .schema import UNITXT_DATASET_SCHEMA, loads_instance
 from .settings_utils import get_constants, get_settings
+from .standard import DatasetRecipe
 from .task import Task
 logger = get_logger()
     return source().to_dataset()
+def _get_recipe_from_query(dataset_query: str) -> DatasetRecipe:
     dataset_query = dataset_query.replace("sys_prompt", "instruction")
     try:
         dataset_stream, _ = fetch_artifact(dataset_query)
     return dataset_stream
+def _get_recipe_from_dict(dataset_params: Dict[str, Any]) -> DatasetRecipe:
+    recipe_attributes = list(DatasetRecipe.__dict__["__fields__"].keys())
     for param in dataset_params.keys():
         assert param in recipe_attributes, (
+            f"The parameter '{param}' is not an attribute of the 'DatasetRecipe' class. "
             f"Please check if the name is correct. The available attributes are: '{recipe_attributes}'."
         )
+    return DatasetRecipe(**dataset_params)
 def _verify_dataset_args(dataset_query: Optional[str] = None, dataset_args=None):
         )
+def load_recipe(dataset_query: Optional[str] = None, **kwargs) -> DatasetRecipe:
+    if isinstance(dataset_query, DatasetRecipe):
         return dataset_query
     _verify_dataset_args(dataset_query, kwargs)
     return_data: bool = False,
     return_log_probs: bool = False,
     return_meta_data: bool = False,
+    previous_messages: Optional[List[Dict[str, str]]] = None,
     **kwargs,
 ):
     dataset = produce(instance_or_instances, dataset_query, **kwargs)
     engine: OptionSelectingByLogProbsInferenceEngine,
     dataset_query: Optional[str] = None,
     return_data: bool = False,
+    previous_messages: Optional[List[Dict[str, str]]] = None,
     **kwargs,
 ):
     dataset = produce(instance_or_instances, dataset_query, **kwargs)

benchmark.py CHANGED Viewed

@@ -5,7 +5,7 @@ from .dataclass import NonPositionalField
 from .formats import Format
 from .fusion import FixedFusion, WeightedFusion
 from .operator import SourceOperator
-from .standard import StandardRecipe
 from .stream import MultiStream
 from .system_prompts import SystemPrompt
@@ -22,7 +22,7 @@ class BaseBenchmark(SourceOperator):
 class Benchmark(BaseBenchmark):
-    subsets: Dict[str, Union[StandardRecipe, BaseBenchmark]]
     max_total_samples: int = None
     max_samples_per_subset: int = None

 from .formats import Format
 from .fusion import FixedFusion, WeightedFusion
 from .operator import SourceOperator
+from .standard import DatasetRecipe
 from .stream import MultiStream
 from .system_prompts import SystemPrompt
 class Benchmark(BaseBenchmark):
+    subsets: Dict[str, Union[DatasetRecipe, BaseBenchmark]]
     max_total_samples: int = None
     max_samples_per_subset: int = None

blocks.py CHANGED Viewed

@@ -18,7 +18,7 @@ from .operators import (
 )
 from .processors import ToString, ToStringStripped
 from .recipe import SequentialRecipe
-from .splitters import RandomSampler, Sample, SliceSplit, SplitRandomMix
 from .stream import MultiStream
 from .struct_data_operators import (
     ConstructTableFromRowsCols,

 )
 from .processors import ToString, ToStringStripped
 from .recipe import SequentialRecipe
+from .splitters import AssignDemosToInstance, RandomSampler, SliceSplit, SplitRandomMix
 from .stream import MultiStream
 from .struct_data_operators import (
     ConstructTableFromRowsCols,

card.py CHANGED Viewed

@@ -12,16 +12,17 @@ from .templates import Template, TemplatesDict, TemplatesList
 class TaskCard(Artifact):
     """TaskCard delineates the phases in transforming the source dataset into model input, and specifies the metrics for evaluation of model output.
-    Attributes:
-        loader: specifies the source address and the loading operator that can access that source and transform it into a unitxt multistream.
-        preprocess_steps: list of unitxt operators to process the data source into model input.
-        task: specifies the fields (of the already (pre)processed instance) making the inputs, the fields making the outputs, and the metrics to be used for evaluating the model output.
-        templates: format strings to be applied on the input fields (specified by the task) and the output fields. The template also carries the instructions and the list of postprocessing steps, to be applied to the model output.
-        default_template: a default template for tasks with very specific task dataset specific template
     """
     loader: Loader

 class TaskCard(Artifact):
     """TaskCard delineates the phases in transforming the source dataset into model input, and specifies the metrics for evaluation of model output.
+    Args:
+        loader:
+            specifies the source address and the loading operator that can access that source and transform it into a unitxt multistream.
+        preprocess_steps:
+            list of unitxt operators to process the data source into model input.
+        task:
+            specifies the fields (of the already (pre)processed instance) making the inputs, the fields making the outputs, and the metrics to be used for evaluating the model output.
+        templates:
+            format strings to be applied on the input fields (specified by the task) and the output fields. The template also carries the instructions and the list of postprocessing steps, to be applied to the model output.
+        default_template:
+            a default template for tasks with very specific task dataset specific template
     """
     loader: Loader

dataset_utils.py CHANGED Viewed

@@ -5,7 +5,7 @@ from .logging_utils import get_logger
 from .parsing_utils import parse_key_equals_value_string_to_dict
 from .register import _reset_env_local_catalogs, register_all_artifacts
 from .settings_utils import get_settings
-from .standard import BaseRecipe
 logger = get_logger()
 settings = get_settings()
@@ -24,7 +24,7 @@ def parse(query: str):
 def get_dataset_artifact(dataset):
-    if isinstance(dataset, BaseRecipe):
         return dataset
     assert isinstance(
         dataset, str

 from .parsing_utils import parse_key_equals_value_string_to_dict
 from .register import _reset_env_local_catalogs, register_all_artifacts
 from .settings_utils import get_settings
+from .standard import DatasetRecipe
 logger = get_logger()
 settings = get_settings()
 def get_dataset_artifact(dataset):
+    if isinstance(dataset, DatasetRecipe):
         return dataset
     assert isinstance(
         dataset, str

deprecation_utils.py CHANGED Viewed

@@ -18,19 +18,24 @@ def compare_versions(version1, version2):
     """Compare two semantic versioning strings and determine their relationship.
     Parameters:
-    - version1 (str): The first version string to compare.
-    - version2 (str): The second version string to compare.
     Returns:
-    - int: -1 if version1 < version2, 1 if version1 > version2, 0 if equal.
     Example:
-    >>> compare_versions("1.2.0", "1.2.3")
-    -1
-    >>> compare_versions("1.3.0", "1.2.8")
-    1
-    >>> compare_versions("1.0.0", "1.0.0")
-    0
     """
     parts1 = [int(part) for part in version1.split(".")]
     parts2 = [int(part) for part in version2.split(".")]

     """Compare two semantic versioning strings and determine their relationship.
     Parameters:
+        version1 (str):
+            The first version string to compare.
+        version2 (str):
+            The second version string to compare.
     Returns:
+        int: -1 if version1 < version2, 1 if version1 > version2, 0 if equal.
     Example:
+    .. code-block:: text
+        >>> compare_versions("1.2.0", "1.2.3")
+        -1
+        >>> compare_versions("1.3.0", "1.2.8")
+        1
+        >>> compare_versions("1.0.0", "1.0.0")
+        0
     """
     parts1 = [int(part) for part in version1.split(".")]
     parts2 = [int(part) for part in version2.split(".")]

dialog_operators.py CHANGED Viewed

@@ -27,12 +27,17 @@ class SerializeDialog(InstanceFieldOperator):
     of system responses and can operate on a per-turn basis or aggregate the entire
     dialog.
-    Attributes:
-        field (str): The field in the input data that contains the dialog.
-        to_field (Optional[str]): The field in the output data where the serialized dialog will be stored.
-        last_user_turn_to_field (Optional[str]): Field to store the last user turn.
-        last_system_turn_to_field (Optional[str]): Field to store the last system turn.
-        context_field (Optional[str]): Field that contains additional context to be prepended to the dialog.
     """
     format: SystemFormat = None
@@ -100,12 +105,17 @@ class SerializeOpenAiFormatDialog(SerializeDialog):
     of system responses and can operate on a per-turn basis or aggregate the entire
     dialog.
-    Attributes:
-        field (str): The field in the input data that contains the dialog.
-        to_field (Optional[str]): The field in the output data where the serialized dialog will be stored.
-        last_user_turn_to_field (Optional[str]): Field to store the last user turn.
-        last_system_turn_to_field (Optional[str]): Field to store the last system turn.
-        context_field (Optional[str]): Field that contains additional context to be prepended to the dialog.
     """
     is_last_turn_user_only: bool = True

     of system responses and can operate on a per-turn basis or aggregate the entire
     dialog.
+    Args:
+        field (str):
+            The field in the input data that contains the dialog.
+        to_field (Optional[str]):
+            The field in the output data where the serialized dialog will be stored.
+        last_user_turn_to_field (Optional[str]):
+            Field to store the last user turn.
+        last_system_turn_to_field (Optional[str]):
+            Field to store the last system turn.
+        context_field (Optional[str]):
+            Field that contains additional context to be prepended to the dialog.
     """
     format: SystemFormat = None
     of system responses and can operate on a per-turn basis or aggregate the entire
     dialog.
+    Args:
+        field (str):
+            The field in the input data that contains the dialog.
+        to_field (Optional[str]):
+            The field in the output data where the serialized dialog will be stored.
+        last_user_turn_to_field (Optional[str]):
+            Field to store the last user turn.
+        last_system_turn_to_field (Optional[str]):
+            Field to store the last system turn.
+        context_field (Optional[str]):
+            Field that contains additional context to be prepended to the dialog.
     """
     is_last_turn_user_only: bool = True

error_utils.py CHANGED Viewed

@@ -27,10 +27,12 @@ def additional_info(path: str) -> str:
 class UnitxtError(Exception):
     """Exception raised for Unitxt errors.
-    Attributes:
-        message : str -- explanation of the error
-        additional_info_id : Optional[str] -- relative path to additional documentation on web
-        If set, should be one of the DOCUMENATION_* constants in the error_utils.py file.
     """
@@ -43,10 +45,12 @@ class UnitxtError(Exception):
 class UnitxtWarning:
     """Object to format warning message to log.
-    Attributes:
-        message -- explanation of the warning
-        additional_info_id : Optional[str] -- relative path to additional documentation on web
-        If set, should be one of the DOCUMENATION_* constants in the error_utils.py file.
     """
     def __init__(self, message: str, additional_info_id: Optional[str] = None):

 class UnitxtError(Exception):
     """Exception raised for Unitxt errors.
+    Args:
+        message (str):
+            explanation of the error
+        additional_info_id (Optional[str]):
+            relative path to additional documentation on web
+            If set, should be one of the DOCUMENATION_* constants in the error_utils.py file.
     """
 class UnitxtWarning:
     """Object to format warning message to log.
+    Args:
+        message (str):
+            explanation of the warning
+        additional_info_id (Optional[str]):
+            relative path to additional documentation on web
+            If set, should be one of the DOCUMENATION_* constants in the error_utils.py file.
     """
     def __init__(self, message: str, additional_info_id: Optional[str] = None):

image_operators.py CHANGED Viewed

@@ -216,13 +216,15 @@ class GridLines(ImageAugmentor):
 class PixelNoise(ImageAugmentor):
     """A class that overlays a mask of randomly colored nxn squares across an image based on a specified noise rate.
-    Attributes:
-        square_size (int): Size of each square in pixels.
-        noise_rate (float): Proportion of the image that should be affected by noise (0 to 1).
     Methods:
-        process_image(image): Adds the random square mask to the provided image and returns the modified image.
     """
     square_size: int = 1

 class PixelNoise(ImageAugmentor):
     """A class that overlays a mask of randomly colored nxn squares across an image based on a specified noise rate.
+    Args:
+        square_size (int):
+            Size of each square in pixels.
+        noise_rate (float):
+            Proportion of the image that should be affected by noise (0 to 1).
     Methods:
+        process_image(image):
+            Adds the random square mask to the provided image and returns the modified image.
     """
     square_size: int = 1

inference.py CHANGED Viewed

@@ -9,6 +9,7 @@ import sys
 import time
 import uuid
 from collections import Counter
 from typing import (
     Any,
     Dict,
@@ -63,8 +64,8 @@ class StandardAPIParamsMixin(Artifact):
     n: Optional[int] = None
     parallel_tool_calls: Optional[bool] = None
     service_tier: Optional[Literal["auto", "default"]] = None
-    credentials: Optional[dict[str, str]] = {}
-    extra_headers: Optional[dict[str, str]] = None
 def get_model_and_label_id(model_name, label):
@@ -1171,8 +1172,8 @@ class OptionSelectingByLogProbsInferenceEngine:
             for option in instance["task_data"]["options"]
         ]
-        dataset_with_options_logprobs: list[
-            list[dict[str, float | str]]
         ] = self.get_options_log_probs(dataset_with_options)
         dataset_iterator = iter(dataset_with_options_logprobs)
@@ -1469,6 +1470,13 @@ class OpenAiInferenceEngineParams(Artifact):
     service_tier: Optional[Literal["auto", "default"]] = None
 class OpenAiInferenceEngine(
     InferenceEngine,
     LogProbInferenceEngine,
@@ -1485,6 +1493,7 @@ class OpenAiInferenceEngine(
     base_url: Optional[str] = None
     default_headers: Dict[str, str] = {}
     credentials: CredentialsOpenAi = {}
     def get_engine_id(self) -> str:
         return get_model_and_label_id(self.model_name, self.label)
@@ -1528,52 +1537,76 @@ class OpenAiInferenceEngine(
             if v is not None
         }
-    def _infer(
         self,
         dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
         outputs = []
-        for instance in tqdm(dataset, desc="Inferring with openAI API"):
-            messages = self.to_messages(instance)
-            response = self.client.chat.completions.create(
-                messages=messages,
-                model=self.model_name,
-                **self._get_completion_kwargs(),
-            )
-            prediction = response.choices[0].message.content
-            output = self.get_return_object(prediction, response, return_meta_data)
-            outputs.append(output)
         return outputs
     def _infer_log_probs(
         self,
         dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
-        outputs = []
-        for instance in tqdm(dataset, desc="Inferring with openAI API"):
-            messages = self.to_messages(instance)
-            response = self.client.chat.completions.create(
-                messages=messages,
-                model=self.model_name,
-                **self._get_completion_kwargs(),
-            )
-            top_logprobs_response = response.choices[0].logprobs.content
-            pred_output = [
-                {
-                    "top_tokens": [
-                        {"text": obj.token, "logprob": obj.logprob}
-                        for obj in generated_token.top_logprobs
-                    ]
-                }
-                for generated_token in top_logprobs_response
-            ]
-            output = self.get_return_object(pred_output, response, return_meta_data)
-            outputs.append(output)
-        return outputs
     def get_return_object(self, predict_result, response, return_meta_data):
         if return_meta_data:
@@ -1807,16 +1840,19 @@ class WMLInferenceEngineBase(
 ):
     """Base for classes running inference using ibm-watsonx-ai.
-    Attributes:
-        credentials (Dict[str, str], optional): By default, it is created by a class
             instance which tries to retrieve proper environment variables
             ("WML_URL", "WML_PROJECT_ID", "WML_SPACE_ID", "WML_APIKEY", "WML_USERNAME", "WML_PASSWORD").
             However, a dictionary with the following keys: "url", "apikey", "project_id", "space_id",
             "username", "password".
             can be directly provided instead.
-        model_name (str, optional): ID of a model to be used for inference. Mutually
             exclusive with 'deployment_id'.
-        deployment_id (str, optional): Deployment ID of a tuned model to be used for
             inference. Mutually exclusive with 'model_name'.
         parameters (Union[WMLInferenceEngineParams, WMLGenerationParamsMixin, WMLChatParamsMixin], optional):
             Defines inference parameters and their values. Deprecated attribute, please pass respective
@@ -2077,9 +2113,10 @@ class WMLInferenceEngineGeneration(WMLInferenceEngineBase, WMLGenerationParamsMi
     If you want to include images in your input, please use 'WMLInferenceEngineChat' instead.
-    Attributes:
-        concurrency_limit (int): Number of concurrent requests sent to a model. Default is 10,
-        which is also the maximum value.
     Examples:
         .. code-block:: python
@@ -2207,10 +2244,11 @@ class WMLInferenceEngineChat(WMLInferenceEngineBase, WMLChatParamsMixin):
     concatenate images within an instance into a single image and adjust your query
     accordingly (if necessary).
-    Attributes:
-        image_encoder (EncodeImageToString, optional): operator which encodes images in
-        given format to base64 strings required by service. You should specify it when
-        you are using images in your inputs.
     Example:
         .. code-block:: python

 import time
 import uuid
 from collections import Counter
+from multiprocessing.pool import ThreadPool
 from typing import (
     Any,
     Dict,
     n: Optional[int] = None
     parallel_tool_calls: Optional[bool] = None
     service_tier: Optional[Literal["auto", "default"]] = None
+    credentials: Optional[Dict[str, str]] = {}
+    extra_headers: Optional[Dict[str, str]] = None
 def get_model_and_label_id(model_name, label):
             for option in instance["task_data"]["options"]
         ]
+        dataset_with_options_logprobs: List[
+            List[Dict[str, Union[float, str]]]
         ] = self.get_options_log_probs(dataset_with_options)
         dataset_iterator = iter(dataset_with_options_logprobs)
     service_tier: Optional[Literal["auto", "default"]] = None
+def run_with_imap(func):
+    def inner(self, args):
+        return func(self, *args)
+    return inner
 class OpenAiInferenceEngine(
     InferenceEngine,
     LogProbInferenceEngine,
     base_url: Optional[str] = None
     default_headers: Dict[str, str] = {}
     credentials: CredentialsOpenAi = {}
+    num_parallel_requests: int = 20
     def get_engine_id(self) -> str:
         return get_model_and_label_id(self.model_name, self.label)
             if v is not None
         }
+    def _parallel_infer(
         self,
         dataset: Union[List[Dict[str, Any]], Dataset],
+        infer_func,
         return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
+        inputs = [(instance, return_meta_data) for instance in dataset]
         outputs = []
+        with ThreadPool(processes=self.num_parallel_requests) as pool:
+            for output in tqdm(
+                pool.imap(infer_func, inputs),
+                total=len(inputs),
+                desc=f"Inferring with {self.__class__.__name__}",
+            ):
+                outputs.append(output)
         return outputs
+    def _infer(
+        self,
+        dataset: Union[List[Dict[str, Any]], Dataset],
+        return_meta_data: bool = False,
+    ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
+        return self._parallel_infer(
+            dataset=dataset,
+            return_meta_data=return_meta_data,
+            infer_func=self._get_chat_completion,
+        )
     def _infer_log_probs(
         self,
         dataset: Union[List[Dict[str, Any]], Dataset],
         return_meta_data: bool = False,
     ) -> Union[List[Dict], List[TextGenerationInferenceOutput]]:
+        return self._parallel_infer(
+            dataset=dataset,
+            return_meta_data=return_meta_data,
+            infer_func=self._get_logprobs,
+        )
+    @run_with_imap
+    def _get_chat_completion(self, instance, return_meta_data):
+        messages = self.to_messages(instance)
+        response = self.client.chat.completions.create(
+            messages=messages,
+            model=self.model_name,
+            **self._get_completion_kwargs(),
+        )
+        prediction = response.choices[0].message.content
+        return self.get_return_object(prediction, response, return_meta_data)
+    @run_with_imap
+    def _get_logprobs(self, instance, return_meta_data):
+        messages = self.to_messages(instance)
+        response = self.client.chat.completions.create(
+            messages=messages,
+            model=self.model_name,
+            **self._get_completion_kwargs(),
+        )
+        top_logprobs_response = response.choices[0].logprobs.content
+        pred_output = [
+            {
+                "top_tokens": [
+                    {"text": obj.token, "logprob": obj.logprob}
+                    for obj in generated_token.top_logprobs
+                ]
+            }
+            for generated_token in top_logprobs_response
+        ]
+        return self.get_return_object(pred_output, response, return_meta_data)
     def get_return_object(self, predict_result, response, return_meta_data):
         if return_meta_data:
 ):
     """Base for classes running inference using ibm-watsonx-ai.
+    Args:
+        credentials (Dict[str, str], optional):
+            By default, it is created by a class
             instance which tries to retrieve proper environment variables
             ("WML_URL", "WML_PROJECT_ID", "WML_SPACE_ID", "WML_APIKEY", "WML_USERNAME", "WML_PASSWORD").
             However, a dictionary with the following keys: "url", "apikey", "project_id", "space_id",
             "username", "password".
             can be directly provided instead.
+        model_name (str, optional):
+            ID of a model to be used for inference. Mutually
             exclusive with 'deployment_id'.
+        deployment_id (str, optional):
+            Deployment ID of a tuned model to be used for
             inference. Mutually exclusive with 'model_name'.
         parameters (Union[WMLInferenceEngineParams, WMLGenerationParamsMixin, WMLChatParamsMixin], optional):
             Defines inference parameters and their values. Deprecated attribute, please pass respective
     If you want to include images in your input, please use 'WMLInferenceEngineChat' instead.
+    Args:
+        concurrency_limit (int):
+            Number of concurrent requests sent to a model. Default is 10,
+            which is also the maximum value.
     Examples:
         .. code-block:: python
     concatenate images within an instance into a single image and adjust your query
     accordingly (if necessary).
+    Args:
+        image_encoder (EncodeImageToString, optional):
+            operator which encodes images in
+            given format to base64 strings required by service. You should specify it when
+            you are using images in your inputs.
     Example:
         .. code-block:: python

llm_as_judge.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import itertools
 from difflib import get_close_matches
-from typing import List, Optional, Union
 from .api import infer
 from .artifact import fetch_artifact
@@ -145,7 +145,7 @@ class LLMJudge(BulkInstanceMetric):
             )
         return
-    def get_contexts(self, task_data: list[dict[str, any]]) -> list[dict[str, str]]:
         return [
             get_parsed_context(
                 {
@@ -161,7 +161,7 @@ class LLMJudge(BulkInstanceMetric):
         instances: list,
         task: Task,
         template: Template,
-        previous_messages: Optional[list[dict[str, str]]] = None,
     ):
         outputs_dataset = infer(
             instances,
@@ -172,11 +172,11 @@ class LLMJudge(BulkInstanceMetric):
             return_data=True,
             previous_messages=previous_messages,
         )
-        prompts: list[str] = [instance["source"] for instance in outputs_dataset]
-        raw_predictions: list[str] = [
             instance["raw_prediction"] for instance in outputs_dataset
         ]
-        predictions: list[str] = [
             instance["prediction"] for instance in outputs_dataset
         ]
         return (prompts, raw_predictions, predictions)
@@ -274,7 +274,7 @@ class LLMJudgeDirect(LLMJudge):
                 raise Exception(
                     f"The type of the criteria must be 'CriteriaWithOptions', instead it is of type '{type(self.criteria)}'"
                 )
-            criterias: list[CriteriaWithOptions] = [self.criteria] * eval_count
         unique_criterias = list({criteria.name for criteria in criterias})
         self.logger.info(f"Criteria names are '{', '.join(unique_criterias)}'")
         return criterias
@@ -289,8 +289,8 @@ class LLMJudgeDirect(LLMJudge):
         option_selection_outputs,
         selections,
         evaluations_count,
-        criterias: list[CriteriaWithOptions],
-    ) -> list[dict[str, any]]:
         positional_bias = None
         if self.check_positional_bias:
             positional_bias = [
@@ -353,9 +353,9 @@ class LLMJudgeDirect(LLMJudge):
     def compute(
         self,
-        references: list[list[str]],
-        predictions: list[str],
-        task_data: list[dict[str, any]],
     ) -> dict:
         self.logger.info(
             f'Starting evaluation with evaluator "{self.evaluator_name}" and provider "{self.inference_engine.get_pretty_print_name()}'
@@ -545,7 +545,7 @@ class LLMJudgePairwise(LLMJudge):
                     f"The type of the criteria must be 'Criteria', instead it is of type '{type(self.criteria)}'"
                 )
-            criterias: list[Criteria] = [self.criteria] * eval_count
         unique_criterias = list({criteria.name for criteria in criterias})
         self.logger.info(f"Criteria names are '{', '.join(unique_criterias)}'")
@@ -553,7 +553,7 @@ class LLMJudgePairwise(LLMJudge):
     def get_instance_results(
         self,
-        instance_predictions: dict[str, str],
         assessment_prompts,
         assessment_outputs,
         summarization_prompts,
@@ -728,7 +728,7 @@ class LLMJudgePairwise(LLMJudge):
         all_results["criteria"] = criteria.to_json()
         return self.clean_results(all_results)
-    def parse_prediction_to_dict(self, prediction: Union[dict[str, str], list[str]]):
         if isinstance(prediction, list):
             return {f"{key + 1}": value for key, value in enumerate(prediction)}
@@ -740,15 +740,15 @@ class LLMJudgePairwise(LLMJudge):
         )
     def convert_predictions_to_dicts(
-        self, predictions: Union[list[dict[str, str], list[str]]]
     ):
         return [self.parse_prediction_to_dict(prediction) for prediction in predictions]
     def compute(
         self,
-        references: list[list[str]],
-        predictions: Union[list[dict[str, str], list[str]]],
-        task_data: list[dict[str, str]],
     ) -> dict:
         self.logger.info(
             f'Starting evaluation with evaluator "{self.evaluator_name}" and provider {self.inference_engine.get_pretty_print_name()}'
@@ -775,8 +775,8 @@ class LLMJudgePairwise(LLMJudge):
             f"The evaluation will perform {sum(contests_count_list) * [1,2][self.check_positional_bias]} ({' + '.join([f'{c * [1,2][self.check_positional_bias]}' for c in contests_count_list])}) pairwise comparisons"
         )
-        response_pairs_list: list[list[list[str]]] = []
-        option_pairs_list: list[list[list[str]]] = []
         predictions_names = set(predictions[0].keys())
         for i, combination_indexes in enumerate(combination_indexes_list):
             instance_predictions = predictions[i]
@@ -786,8 +786,8 @@ class LLMJudgePairwise(LLMJudge):
                     f"The set of prediction names is different between instance 0 and instance {i}. In prediction 0, it is {sorted(predictions_names)}. In prediction {i}, it is {sorted(instance_predictions_names)}. Make sure the same number of predictions is passed for all instances."
                 )
-            response_pairs: list[list[str]] = []
-            option_pairs: list[list[str]] = []
             for combination in combination_indexes:
                 (idx_1, idx_2) = combination
                 response_name_1 = instance_predictions_names[idx_1]

 import itertools
 from difflib import get_close_matches
+from typing import Any, Dict, List, Optional, Union
 from .api import infer
 from .artifact import fetch_artifact
             )
         return
+    def get_contexts(self, task_data: List[Dict[str, Any]]) -> List[Dict[str, str]]:
         return [
             get_parsed_context(
                 {
         instances: list,
         task: Task,
         template: Template,
+        previous_messages: Optional[List[Dict[str, str]]] = None,
     ):
         outputs_dataset = infer(
             instances,
             return_data=True,
             previous_messages=previous_messages,
         )
+        prompts: List[str] = [instance["source"] for instance in outputs_dataset]
+        raw_predictions: List[str] = [
             instance["raw_prediction"] for instance in outputs_dataset
         ]
+        predictions: List[str] = [
             instance["prediction"] for instance in outputs_dataset
         ]
         return (prompts, raw_predictions, predictions)
                 raise Exception(
                     f"The type of the criteria must be 'CriteriaWithOptions', instead it is of type '{type(self.criteria)}'"
                 )
+            criterias: List[CriteriaWithOptions] = [self.criteria] * eval_count
         unique_criterias = list({criteria.name for criteria in criterias})
         self.logger.info(f"Criteria names are '{', '.join(unique_criterias)}'")
         return criterias
         option_selection_outputs,
         selections,
         evaluations_count,
+        criterias: List[CriteriaWithOptions],
+    ) -> List[Dict[str, Any]]:
         positional_bias = None
         if self.check_positional_bias:
             positional_bias = [
     def compute(
         self,
+        references: List[List[str]],
+        predictions: List[str],
+        task_data: List[Dict[str, Any]],
     ) -> dict:
         self.logger.info(
             f'Starting evaluation with evaluator "{self.evaluator_name}" and provider "{self.inference_engine.get_pretty_print_name()}'
                     f"The type of the criteria must be 'Criteria', instead it is of type '{type(self.criteria)}'"
                 )
+            criterias: List[Criteria] = [self.criteria] * eval_count
         unique_criterias = list({criteria.name for criteria in criterias})
         self.logger.info(f"Criteria names are '{', '.join(unique_criterias)}'")
     def get_instance_results(
         self,
+        instance_predictions: Dict[str, str],
         assessment_prompts,
         assessment_outputs,
         summarization_prompts,
         all_results["criteria"] = criteria.to_json()
         return self.clean_results(all_results)
+    def parse_prediction_to_dict(self, prediction: Union[Dict[str, str], List[str]]):
         if isinstance(prediction, list):
             return {f"{key + 1}": value for key, value in enumerate(prediction)}
         )
     def convert_predictions_to_dicts(
+        self, predictions: Union[List[Dict[str, str]], List[str]]
     ):
         return [self.parse_prediction_to_dict(prediction) for prediction in predictions]
     def compute(
         self,
+        references: List[List[str]],
+        predictions: Union[List[Dict[str, str]], List[str]],
+        task_data: List[Dict[str, str]],
     ) -> dict:
         self.logger.info(
             f'Starting evaluation with evaluator "{self.evaluator_name}" and provider {self.inference_engine.get_pretty_print_name()}'
             f"The evaluation will perform {sum(contests_count_list) * [1,2][self.check_positional_bias]} ({' + '.join([f'{c * [1,2][self.check_positional_bias]}' for c in contests_count_list])}) pairwise comparisons"
         )
+        response_pairs_list: List[List[List[str]]] = []
+        option_pairs_list: List[List[List[str]]] = []
         predictions_names = set(predictions[0].keys())
         for i, combination_indexes in enumerate(combination_indexes_list):
             instance_predictions = predictions[i]
                     f"The set of prediction names is different between instance 0 and instance {i}. In prediction 0, it is {sorted(predictions_names)}. In prediction {i}, it is {sorted(instance_predictions_names)}. Make sure the same number of predictions is passed for all instances."
                 )
+            response_pairs: List[List[str]] = []
+            option_pairs: List[List[str]] = []
             for combination in combination_indexes:
                 (idx_1, idx_2) = combination
                 response_name_1 = instance_predictions_names[idx_1]

llm_as_judge_constants.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 from enum import Enum
-from typing import Optional
 from .artifact import Artifact
 from .inference import (
@@ -36,15 +36,15 @@ class Criteria(Artifact):
 class CriteriaWithOptions(Criteria):
-    options: list[CriteriaOption]
-    option_map: Optional[dict[str, float]] = None
     @staticmethod
     def from_jsons(s: str):
         return CriteriaWithOptions.from_obj(json.loads(s))
     @staticmethod
-    def from_obj(criteria_dict: dict):
         return CriteriaWithOptions(
             name=criteria_dict["name"],
             description=criteria_dict["description"],
@@ -132,7 +132,7 @@ PROVIDER_TO_STRATEGY = {
 class EvaluatorMetadata:
     name: EvaluatorNameEnum
-    providers: list[ModelProviderEnum]
     def __init__(self, name, providers):
         self.name = name

 import json
 from enum import Enum
+from typing import Dict, List, Optional
 from .artifact import Artifact
 from .inference import (
 class CriteriaWithOptions(Criteria):
+    options: List[CriteriaOption]
+    option_map: Optional[Dict[str, float]] = None
     @staticmethod
     def from_jsons(s: str):
         return CriteriaWithOptions.from_obj(json.loads(s))
     @staticmethod
+    def from_obj(criteria_dict: Dict):
         return CriteriaWithOptions(
             name=criteria_dict["name"],
             description=criteria_dict["description"],
 class EvaluatorMetadata:
     name: EvaluatorNameEnum
+    providers: List[ModelProviderEnum]
     def __init__(self, name, providers):
         self.name = name

llm_as_judge_utils.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from .llm_as_judge_constants import (
     EVALUATORS_METADATA,
     MODEL_RENAMINGS,
@@ -7,7 +9,7 @@ from .llm_as_judge_constants import (
 )
-def get_parsed_context(context: dict[str, str]):
     return (
         "\n".join([f"{key}: {value}" for key, value in context.items()])
         if len(context) > 1

+from typing import Dict
 from .llm_as_judge_constants import (
     EVALUATORS_METADATA,
     MODEL_RENAMINGS,
 )
+def get_parsed_context(context: Dict[str, str]):
     return (
         "\n".join([f"{key}: {value}" for key, value in context.items()])
         if len(context) > 1

loaders.py CHANGED Viewed

@@ -41,6 +41,7 @@ from tempfile import TemporaryDirectory
 from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Union
 import pandas as pd
 from datasets import load_dataset as hf_load_dataset
 from huggingface_hub import HfApi
 from tqdm import tqdm
@@ -51,7 +52,7 @@ from .logging_utils import get_logger
 from .operator import SourceOperator
 from .operators import Set
 from .settings_utils import get_settings
-from .stream import DynamicStream, MultiStream
 from .type_utils import isoftype
 from .utils import LRUCache
@@ -122,7 +123,7 @@ class Loader(SourceOperator):
         )
         return operator(multi_stream)
-    def sef_default_data_classification(
         self, default_data_classification_policy, additional_info
     ):
         if self.data_classification_policy is None:
@@ -162,23 +163,24 @@ class LoadHF(Loader):
     and it can filter datasets upon loading.
     Args:
-        path: The path or identifier of the dataset on the HuggingFace Hub.
-        name: An optional dataset name.
-        data_dir: Optional directory to store downloaded data.
-        split: Optional specification of which split to load.
-        data_files: Optional specification of particular data files to load.
-        revision: Optional. The revision of the dataset. Often the commit id. Use in case you want to set the dataset version.
-        streaming (bool): indicating if streaming should be used.
-        filtering_lambda: A lambda function for filtering the data after loading.
-        num_proc (int): Optional integer to specify the number of processes to use for parallel dataset loading.
     Example:
         Loading glue's mrpc dataset
@@ -278,40 +280,22 @@ class LoadHF(Loader):
             for split in dataset.keys():
                 dataset[split] = dataset[split].to_iterable_dataset()
         else:
-            dataset = {self.split: dataset}
-        if self.filtering_lambda is not None:
-            dataset = self.filter_load(dataset)
         return dataset
-    def split_limited_load(self, dataset, split_name):
-        yield from itertools.islice(dataset[split_name], self.get_limit())
-    def limited_load(self, dataset):
-        self.log_limited_loading()
-        return MultiStream(
-            {
-                name: DynamicStream(
-                    generator=self.split_limited_load,
-                    gen_kwargs={"dataset": dataset, "split_name": name},
-                )
-                for name in dataset.keys()
-            }
-        )
     def _maybe_set_classification_policy(self):
         if os.path.exists(self.path):
-            self.sef_default_data_classification(
                 ["proprietary"], "when loading from local files"
             )
         else:
-            self.sef_default_data_classification(
                 ["public"],
                 None,  # No warning when loading from public hub
             )
-    def load_iterables(self):
         try:
             dataset = self.stream_dataset()
         except (
@@ -319,8 +303,15 @@ class LoadHF(Loader):
         ):  # streaming is not supported for zipped files so we load without streaming
             dataset = self.load_dataset()
         if self.get_limit() is not None:
-            return self.limited_load(dataset=dataset)
         return dataset
@@ -352,7 +343,7 @@ class LoadCSV(Loader):
     sep: str = ","
     def _maybe_set_classification_policy(self):
-        self.sef_default_data_classification(
             ["proprietary"], "when loading from local files"
         )
@@ -365,9 +356,7 @@ class LoadCSV(Loader):
                     file_path, nrows=self.get_limit(), sep=self.sep
                 ).to_dict("records")
             else:
-                iterables[split_name] = pd.read_csv(file_path, sep=self.sep).to_dict(
-                    "records"
-                )
         return iterables
@@ -475,14 +464,22 @@ class LoadFromIBMCloud(Loader):
     3. Mapping: split -> file_names, e.g. {"test" : ["test1.json", "test2.json"], "train": ["train.json"]}
     Args:
-        endpoint_url_env: Environment variable name for the IBM Cloud endpoint URL.
-        aws_access_key_id_env: Environment variable name for the AWS access key ID.
-        aws_secret_access_key_env: Environment variable name for the AWS secret access key.
-        bucket_name: Name of the S3 bucket from which to load data.
-        data_dir: Optional directory path within the bucket.
-        data_files: Union type allowing either a list of file names or a mapping of splits to file names.
-        data_field: The dataset key for nested JSON file, i.e. when multiple datasets are nested in the same file
-        caching: Bool indicating if caching is enabled to avoid re-downloading data.
     Example:
         Loading from IBM Cloud
@@ -578,7 +575,7 @@ class LoadFromIBMCloud(Loader):
             raise NotImplementedError("LoadFromKaggle cannot load with streaming.")
     def _maybe_set_classification_policy(self):
-        self.sef_default_data_classification(
             ["proprietary"], "when loading from IBM COS"
         )
@@ -729,7 +726,7 @@ class LoadFromDictionary(Loader):
                     )
     def _maybe_set_classification_policy(self):
-        self.sef_default_data_classification(
             ["proprietary"], "when loading from python dictionary"
         )
@@ -744,25 +741,24 @@ class LoadFromHFSpace(LoadHF):
     from the given space and then reads them as a HuggingFace Dataset.
     Args:
-        space_name (str): Name of the HuggingFace Space to be accessed.
-        data_files (str | Sequence[str] | Mapping[str, str | Sequence[str]]): Relative
-        paths to files within a given repository. If given as a mapping, paths should
-        be values, while keys should represent the type of respective files
-        (training, testing etc.).
-        path (str, optional): Absolute path to a directory where data should be downloaded.
-        revision (str, optional): ID of a Git branch or commit to be used. By default, it is
-        set to None, thus data is downloaded from the main branch of the accessed
-        repository.
-        use_token (bool, optional): Whether a token is used for authentication when accessing
-        the HuggingFace Space. If necessary, the token is read from the HuggingFace
-        config folder.
-        token_env (str, optional): Key of an env variable which value will be used for
-        authentication when accessing the HuggingFace Space - if necessary.
     Example:
         Loading from a HuggingFace Space
@@ -910,7 +906,7 @@ class LoadFromHFSpace(LoadHF):
             )
     def _maybe_set_classification_policy(self):
-        self.sef_default_data_classification(
             ["public"], "when loading from Huggingface spaces"
         )

 from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Union
 import pandas as pd
+from datasets import IterableDatasetDict
 from datasets import load_dataset as hf_load_dataset
 from huggingface_hub import HfApi
 from tqdm import tqdm
 from .operator import SourceOperator
 from .operators import Set
 from .settings_utils import get_settings
+from .stream import MultiStream
 from .type_utils import isoftype
 from .utils import LRUCache
         )
         return operator(multi_stream)
+    def set_default_data_classification(
         self, default_data_classification_policy, additional_info
     ):
         if self.data_classification_policy is None:
     and it can filter datasets upon loading.
     Args:
+        path:
+            The path or identifier of the dataset on the HuggingFace Hub.
+        name:
+            An optional dataset name.
+        data_dir:
+            Optional directory to store downloaded data.
+        split:
+            Optional specification of which split to load.
+        data_files:
+            Optional specification of particular data files to load.
+        revision:
+            Optional. The revision of the dataset. Often the commit id. Use in case you want to set the dataset version.
+        streaming (bool):
+            indicating if streaming should be used.
+        filtering_lambda (str, optional):
+            A lambda function for filtering the data after loading.
+        num_proc (int, optional):
+            Specifies the number of processes to use for parallel dataset loading.
     Example:
         Loading glue's mrpc dataset
             for split in dataset.keys():
                 dataset[split] = dataset[split].to_iterable_dataset()
         else:
+            dataset = {self.split: dataset.to_iterable_dataset()}
         return dataset
     def _maybe_set_classification_policy(self):
         if os.path.exists(self.path):
+            self.set_default_data_classification(
                 ["proprietary"], "when loading from local files"
             )
         else:
+            self.set_default_data_classification(
                 ["public"],
                 None,  # No warning when loading from public hub
             )
+    def load_iterables(self) -> IterableDatasetDict:
         try:
             dataset = self.stream_dataset()
         except (
         ):  # streaming is not supported for zipped files so we load without streaming
             dataset = self.load_dataset()
+        if self.filtering_lambda is not None:
+            dataset = self.filter_load(dataset)
         if self.get_limit() is not None:
+            self.log_limited_loading()
+            return {
+                split_name: dataset[split_name].take(self.get_limit())
+                for split_name in dataset
+            }
         return dataset
     sep: str = ","
     def _maybe_set_classification_policy(self):
+        self.set_default_data_classification(
             ["proprietary"], "when loading from local files"
         )
                     file_path, nrows=self.get_limit(), sep=self.sep
                 ).to_dict("records")
             else:
+                iterables[split_name] = pd.read_csv(file_path).to_dict("records")
         return iterables
     3. Mapping: split -> file_names, e.g. {"test" : ["test1.json", "test2.json"], "train": ["train.json"]}
     Args:
+        endpoint_url_env:
+            Environment variable name for the IBM Cloud endpoint URL.
+        aws_access_key_id_env:
+            Environment variable name for the AWS access key ID.
+        aws_secret_access_key_env:
+            Environment variable name for the AWS secret access key.
+        bucket_name:
+            Name of the S3 bucket from which to load data.
+        data_dir:
+            Optional directory path within the bucket.
+        data_files:
+            Union type allowing either a list of file names or a mapping of splits to file names.
+        data_field:
+            The dataset key for nested JSON file, i.e. when multiple datasets are nested in the same file
+        caching (bool):
+            indicating if caching is enabled to avoid re-downloading data.
     Example:
         Loading from IBM Cloud
             raise NotImplementedError("LoadFromKaggle cannot load with streaming.")
     def _maybe_set_classification_policy(self):
+        self.set_default_data_classification(
             ["proprietary"], "when loading from IBM COS"
         )
                     )
     def _maybe_set_classification_policy(self):
+        self.set_default_data_classification(
             ["proprietary"], "when loading from python dictionary"
         )
     from the given space and then reads them as a HuggingFace Dataset.
     Args:
+        space_name (str):
+            Name of the HuggingFace Space to be accessed.
+        data_files (str | Sequence[str] | Mapping[str, str | Sequence[str]]):
+            Relative paths to files within a given repository. If given as a mapping,
+            paths should be values, while keys should represent the type of respective files
+            (training, testing etc.).
+        path (str, optional):
+            Absolute path to a directory where data should be downloaded.
+        revision (str, optional):
+            ID of a Git branch or commit to be used. By default, it is set to None,
+            thus data is downloaded from the main branch of the accessed repository.
+        use_token (bool, optional):
+            Whether a token is used for authentication when accessing
+            the HuggingFace Space. If necessary, the token is read from the HuggingFace
+            config folder.
+        token_env (str, optional):
+            Key of an env variable which value will be used for
+            authentication when accessing the HuggingFace Space - if necessary.
     Example:
         Loading from a HuggingFace Space
             )
     def _maybe_set_classification_policy(self):
+        self.set_default_data_classification(
             ["public"], "when loading from Huggingface spaces"
         )

metric_utils.py CHANGED Viewed

@@ -353,13 +353,11 @@ UNITXT_METRIC_SCHEMA = Features(
 class GlobalScores(dict):
     """GlobalScores is a dictionary-based class designed to handle and transform metric results into a structured format.
-    Attributes:
-        score (float): The main score value.
-        score_name (str): The name of the main score.
-    Methods:
-        to_df():
-            Transforms the dictionary of results into a pandas DataFrame with score_name as the index,
     """
     @property
@@ -550,12 +548,11 @@ class GroupsScores(dict):
     This class provides a property to summarize the scores and a custom
     string representation for pretty-printing.
-    Attributes:
-        summary (property): A property to get a summary of the group scores.
     """
     @property
     def summary(self):
         data = self
         # Desired metric columns
         metric_cols = [

 class GlobalScores(dict):
     """GlobalScores is a dictionary-based class designed to handle and transform metric results into a structured format.
+    Args:
+        score (float):
+            The main score value.
+        score_name (str):
+            The name of the main score.
     """
     @property
     This class provides a property to summarize the scores and a custom
     string representation for pretty-printing.
     """
     @property
     def summary(self):
+        """A property to get a summary of the group scores."""
         data = self
         # Desired metric columns
         metric_cols = [

metrics.py CHANGED Viewed

@@ -48,7 +48,7 @@ from .random_utils import get_seed
 from .settings_utils import get_settings
 from .stream import MultiStream, Stream
 from .type_utils import Type, isoftype, parse_type_string, to_type_string
-from .utils import deep_copy
 logger = get_logger()
 settings = get_settings()
@@ -992,7 +992,17 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval):
     reference_field: str = NonPositionalField(default="references")
     prediction_field: str = NonPositionalField(default="prediction")
-    def _validate_group_mean_reduction(self, instances: List[dict]):
         """Ensure that group_mean reduction_map is properly formatted.
         Example: Apply the variance (np.var) to group Accuracy instance scores.  This class would be specified as follows:
@@ -1042,17 +1052,6 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval):
             1           'How do I repair my engine?'                 'paraphrase'
             2           'Why are ants eating my food?'               'original'
         """
-        # instances need to all have task_data field with field group_id
-        assert all(
-            "task_data" in instance for instance in instances
-        ), "each instance must have an task_data field"
-        assert all(
-            isinstance(instance["task_data"], dict) for instance in instances
-        ), "each instance must have an task_data field that is a dict"
-        assert all(
-            "group_id" in instance["task_data"] for instance in instances
-        ), "each instance task_data dict must have a key group_id"
         # validate the reduction_map
         assert (
             "group_mean" in self.reduction_map
@@ -1081,16 +1080,9 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval):
         if "score_fields" in fields:
             assert isinstance(fields["score_fields"], list)
-        # for aggregation functions that use the subgroup_column (expect a dict of lists), check that
-        # this field exists
-        if self.subgroup_column is not None:
-            assert all(
-                self.subgroup_column in instance["task_data"] for instance in instances
-            ), f"each instance task_data dict must have a key {self.subgroup_column}"
     def process(self, stream: Stream, stream_name: Optional[str] = None) -> Generator:
-        instances = self.compute_instance_scores(stream)
-        global_score = {"num_of_instances": len(instances)}
         for reduction_type, reduction_params in self.reduction_map.items():
             assert (
                 reduction_type in self.implemented_reductions
@@ -1103,15 +1095,15 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval):
                 aggregation_function = self.average_item_scores
                 reduction_fields = list(set(reduction_params))
                 # no group reduction, so resample instances individually
-                scores_to_resample = instances
             elif reduction_type == "max":
                 aggregation_function = self.max_item_scores
                 reduction_fields = list(set(reduction_params))
                 # no group reduction, so resample instances individually
-                scores_to_resample = instances
             elif reduction_type == "group_mean":
                 aggregation_function = self.average_item_scores
-                self._validate_group_mean_reduction(instances=instances)
                 reduction_fields = (
                     [self.main_score]
                     if "score_fields" not in reduction_params
@@ -1127,7 +1119,7 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval):
                     scores_to_resample,
                     aggregation_function,
                 ) = self._set_up_group_mean_aggregation(
-                    instances,
                     reduction_params,
                     reduction_fields,
                 )
@@ -1168,18 +1160,32 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval):
                 )
                 global_score.update(confidence_interval)
-        for instance in instances:
             self.update_and_adjust_global_score(instance, global_score)
-        yield from instances
     def compute_instance_scores(
         self, stream: Stream, stream_name: Optional[str] = None
     ):
-        instances = []
         for instance in stream:
             instance = self.verify_instance(instance)
             task_data = instance["task_data"] if "task_data" in instance else {}
             if self.reference_field == "references":
@@ -1214,9 +1220,18 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval):
                     instance_score, instance["score"]["instance"]
                 )
             )
-            instances.append(instance)
-        return instances
     def get_group_scores(
         self,
@@ -1228,12 +1243,16 @@ class InstanceMetric(StreamOperator, MetricWithConfidenceInterval):
         """Group scores by the group_id and subgroup_type fields of each instance, and compute group_aggregation_func by group.
         Args:
-            instances: List of observation instances with instance-level scores (fields) computed.
-            score_names: List of instance score names in each instance to apply the aggregation function.
-            group_aggregation_func: Callable aggregation function accepting a list of numeric scores;
                 or, if self.subgroup_column is not None, a dict of subgroup types scores by subgroup_column value.
                 callable function returns a single score for the group
-            prepend_score_prefix: if True - prepend the score_prefix to the score names in the returned dicts. Set to False
                 if down the stream such a prepending is expected.
         Returns:
@@ -4910,14 +4929,18 @@ class IsCodeMixed(BulkInstanceMetric):
 class MetricsEnsemble(InstanceMetric, ArtifactFetcherMixin):
     """Metrics Ensemble class for creating ensemble of given metrics.
-    Attributes:
-        main_score (str): The main score label used for evaluation.
-        metrics (List[Union[Metric, str]]): List of metrics that will be ensemble.
-        weights (List[float]): Weight of each the metrics
-        InstanceMetric currently allows two reductions:
-        reduction_map (Dict[str, List[str]]. Parameter for specifying the redaction method of the global score.
-                                             (see it definition at InstanceMetric class). This class define its default
-                                             value to reduce by the mean of the main score.
     """

 from .settings_utils import get_settings
 from .stream import MultiStream, Stream
 from .type_utils import Type, isoftype, parse_type_string, to_type_string
+from .utils import deep_copy, recursive_copy
 logger = get_logger()
 settings = get_settings()
     reference_field: str = NonPositionalField(default="references")
     prediction_field: str = NonPositionalField(default="prediction")
+    def _validate_group_mean_task_data(self, instance):
+        # instances need to all have task_data field with field group_id
+        assert "task_data" in instance, "each instance must have an task_data field"
+        assert isinstance(
+            instance["task_data"], dict
+        ), "each instance must have an task_data field that is a dict"
+        assert (
+            "group_id" in instance["task_data"]
+        ), "each instance task_data dict must have a key group_id"
+    def _validate_group_mean_reduction(self):
         """Ensure that group_mean reduction_map is properly formatted.
         Example: Apply the variance (np.var) to group Accuracy instance scores.  This class would be specified as follows:
             1           'How do I repair my engine?'                 'paraphrase'
             2           'Why are ants eating my food?'               'original'
         """
         # validate the reduction_map
         assert (
             "group_mean" in self.reduction_map
         if "score_fields" in fields:
             assert isinstance(fields["score_fields"], list)
     def process(self, stream: Stream, stream_name: Optional[str] = None) -> Generator:
+        instance_scores = self.compute_instance_scores(stream)
+        global_score = {"num_of_instances": len(instance_scores)}
         for reduction_type, reduction_params in self.reduction_map.items():
             assert (
                 reduction_type in self.implemented_reductions
                 aggregation_function = self.average_item_scores
                 reduction_fields = list(set(reduction_params))
                 # no group reduction, so resample instances individually
+                scores_to_resample = instance_scores
             elif reduction_type == "max":
                 aggregation_function = self.max_item_scores
                 reduction_fields = list(set(reduction_params))
                 # no group reduction, so resample instances individually
+                scores_to_resample = instance_scores
             elif reduction_type == "group_mean":
                 aggregation_function = self.average_item_scores
+                self._validate_group_mean_reduction()
                 reduction_fields = (
                     [self.main_score]
                     if "score_fields" not in reduction_params
                     scores_to_resample,
                     aggregation_function,
                 ) = self._set_up_group_mean_aggregation(
+                    instance_scores,
                     reduction_params,
                     reduction_fields,
                 )
                 )
                 global_score.update(confidence_interval)
+        for instance in instance_scores:
             self.update_and_adjust_global_score(instance, global_score)
+        for i, instance in enumerate(stream):
+            instance["score"] = recursive_copy(instance_scores[i]["score"])
+            yield instance
     def compute_instance_scores(
         self, stream: Stream, stream_name: Optional[str] = None
     ):
+        instance_scores = []
         for instance in stream:
             instance = self.verify_instance(instance)
+            if "group_mean" in self.reduction_map:
+                self._validate_group_mean_task_data(instance)
+            # for aggregation functions that use the subgroup_column (expect a dict of lists), check that
+            # this field exists
+            if self.subgroup_column is not None:
+                assert (
+                    "task_data" in instance
+                    and self.subgroup_column in instance["task_data"]
+                ), f"each instance task_data dict must have a key {self.subgroup_column}"
             task_data = instance["task_data"] if "task_data" in instance else {}
             if self.reference_field == "references":
                     instance_score, instance["score"]["instance"]
                 )
             )
+            task_data = {}
+            if "task_data" in instance:
+                if "group_id" in instance["task_data"]:
+                    task_data["group_id"] = instance["task_data"]["group_id"]
+                if self.subgroup_column in instance["task_data"]:
+                    task_data[self.subgroup_column] = instance["task_data"][
+                        self.subgroup_column
+                    ]
+            instance_scores.append({"score": instance["score"], "task_data": task_data})
+        return instance_scores
     def get_group_scores(
         self,
         """Group scores by the group_id and subgroup_type fields of each instance, and compute group_aggregation_func by group.
         Args:
+            instances (list):
+                List of observation instances with instance-level scores (fields) computed.
+            score_names (list):
+                List of instance score names in each instance to apply the aggregation function.
+            group_aggregation_func (Callable):
+                aggregation function accepting a list of numeric scores;
                 or, if self.subgroup_column is not None, a dict of subgroup types scores by subgroup_column value.
                 callable function returns a single score for the group
+            prepend_score_prefix (bool):
+                if True - prepend the score_prefix to the score names in the returned dicts. Set to False
                 if down the stream such a prepending is expected.
         Returns:
 class MetricsEnsemble(InstanceMetric, ArtifactFetcherMixin):
     """Metrics Ensemble class for creating ensemble of given metrics.
+    Args:
+        main_score (str):
+            The main score label used for evaluation.
+        metrics (List[Union[Metric, str]]):
+            List of metrics that will be ensemble.
+        weights (List[float]):
+            Weight of each the metrics
+        reduction_map (Dict[str, List[str]]):
+            Specifies the redaction method of the global score.
+            InstanceMetric currently allows two reductions
+            (see it definition at InstanceMetric class).
+            This class define its default value to reduce by the mean of the main score.
     """

operator.py CHANGED Viewed

@@ -222,11 +222,11 @@ class SourceOperator(MultiStreamOperator):
     A source operator is responsible for generating the data stream from some source, such as a database or a file.
     This is the starting point of a stream processing pipeline.
-    The `SourceOperator` class is a type of `SourceOperator`, which is a special type of `StreamingOperator`
     that generates an output stream but does not take any input streams.
-    When called, a `SourceOperator` invokes its `process` method, which should be implemented by all subclasses
-    to generate the required `MultiStream`.
     """
@@ -247,9 +247,14 @@ class SourceOperator(MultiStreamOperator):
 class StreamInitializerOperator(SourceOperator):
     """A class representing a stream initializer operator in the streaming system.
-    A stream initializer operator is a special type of `SourceOperator` that is capable of taking parameters during the stream generation process. This can be useful in situations where the stream generation process needs to be customized or configured based on certain parameters.
-    When called, a `StreamInitializerOperator` invokes its `process` method, passing any supplied arguments and keyword arguments. The `process` method should be implemented by all subclasses to generate the required `MultiStream` based on the given arguments and keyword arguments.
     """
@@ -278,11 +283,12 @@ def instance_result(result_stream):
 class StreamOperator(MultiStreamOperator):
     """A class representing a single-stream operator in the streaming system.
-    A single-stream operator is a type of `MultiStreamOperator` that operates on individual
-    `Stream` objects within a `MultiStream`. It iterates through each `Stream` in the `MultiStream`
-    and applies the `process` method.
-    The `process` method should be implemented by subclasses to define the specific operations
-    to be performed on each `Stream`.
     """
@@ -353,13 +359,15 @@ class SingleStreamOperator(StreamOperator):
 class PagedStreamOperator(StreamOperator):
     """A class representing a paged-stream operator in the streaming system.
-    A paged-stream operator is a type of `StreamOperator` that operates on a page of instances
-    in a `Stream` at a time, where a page is a subset of instances.
-    The `process` method should be implemented by subclasses to define the specific operations
     to be performed on each page.
     Args:
-        page_size (int): The size of each page in the stream. Defaults to 1000.
     """
     page_size: int = 1000
@@ -393,7 +401,12 @@ class PagedStreamOperator(StreamOperator):
 class SingleStreamReducer(StreamingOperator):
     """A class representing a single-stream reducer in the streaming system.
-    A single-stream reducer is a type of `StreamingOperator` that operates on individual `Stream` objects within a `MultiStream` and reduces each `Stream` to a single output value. The `process` method should be implemented by subclasses to define the specific reduction operation to be performed on each `Stream`.
     """
     def __call__(self, multi_stream: Optional[MultiStream] = None) -> Dict[str, Any]:
@@ -412,7 +425,10 @@ class SingleStreamReducer(StreamingOperator):
 class InstanceOperator(StreamOperator):
     """A class representing a stream instance operator in the streaming system.
-    A stream instance operator is a type of `StreamOperator` that operates on individual instances within a `Stream`. It iterates through each instance in the `Stream` and applies the `process` method. The `process` method should be implemented by subclasses to define the specific operations to be performed on each instance.
     """
     def _process_stream(
@@ -449,7 +465,8 @@ class InstanceOperator(StreamOperator):
 class InstanceOperatorValidator(InstanceOperator):
     """A class representing a stream instance operator validator in the streaming system.
-    A stream instance operator validator is a type of `InstanceOperator` that includes a validation step. It operates on individual instances within a `Stream` and validates the result of processing each instance.
     """
     @abstractmethod

     A source operator is responsible for generating the data stream from some source, such as a database or a file.
     This is the starting point of a stream processing pipeline.
+    The ``SourceOperator`` class is a type of ``MultiStreamOperator``, which is a special type of ``StreamingOperator``
     that generates an output stream but does not take any input streams.
+    When called, a ``SourceOperator`` invokes its ``process`` method, which should be implemented by all subclasses
+    to generate the required ``MultiStream``.
     """
 class StreamInitializerOperator(SourceOperator):
     """A class representing a stream initializer operator in the streaming system.
+    A stream initializer operator is a special type of ``SourceOperator`` that is capable
+    of taking parameters during the stream generation process.
+    This can be useful in situations where the stream generation process needs to be
+    customized or configured based on certain parameters.
+    When called, a ``StreamInitializerOperator`` invokes its ``process`` method, passing any supplied
+    arguments and keyword arguments. The ``process`` method should be implemented by all subclasses
+    to generate the required ``MultiStream`` based on the given arguments and keyword arguments.
     """
 class StreamOperator(MultiStreamOperator):
     """A class representing a single-stream operator in the streaming system.
+    A single-stream operator is a type of ``MultiStreamOperator`` that operates on individual
+    ``Stream`` objects within a ``MultiStream``. It iterates through each ``Stream`` in the ``MultiStream``
+    and applies the ``process`` method.
+    The ``process`` method should be implemented by subclasses to define the specific operations
+    to be performed on each ``Stream``.
     """
 class PagedStreamOperator(StreamOperator):
     """A class representing a paged-stream operator in the streaming system.
+    A paged-stream operator is a type of ``StreamOperator`` that operates on a page of instances
+    in a ``Stream`` at a time, where a page is a subset of instances.
+    The ``process`` method should be implemented by subclasses to define the specific operations
     to be performed on each page.
     Args:
+        page_size (int):
+            The size of each page in the stream. Defaults to 1000.
     """
     page_size: int = 1000
 class SingleStreamReducer(StreamingOperator):
     """A class representing a single-stream reducer in the streaming system.
+    A single-stream reducer is a type of ``StreamingOperator`` that operates on individual
+    ``Stream`` objects within a ``MultiStream`` and reduces each ``Stream`` to a single output value.
+    The ``process`` method should be implemented by subclasses to define the specific reduction operation
+    to be performed on each ``Stream``.
     """
     def __call__(self, multi_stream: Optional[MultiStream] = None) -> Dict[str, Any]:
 class InstanceOperator(StreamOperator):
     """A class representing a stream instance operator in the streaming system.
+    A stream instance operator is a type of ``StreamOperator`` that operates on individual instances
+    within a ``Stream``. It iterates through each instance in the ``Stream`` and applies the ``process`` method.
+    The ``process`` method should be implemented by subclasses to define the specific operations
+    to be performed on each instance.
     """
     def _process_stream(
 class InstanceOperatorValidator(InstanceOperator):
     """A class representing a stream instance operator validator in the streaming system.
+    A stream instance operator validator is a type of ``InstanceOperator`` that includes a validation step.
+    It operates on individual instances within a ``Stream`` and validates the result of processing each instance.
     """
     @abstractmethod

operators.py CHANGED Viewed

@@ -66,6 +66,7 @@ from .artifact import Artifact, fetch_artifact
 from .dataclass import NonPositionalField, OptionalField
 from .deprecation_utils import deprecation
 from .dict_utils import dict_delete, dict_get, dict_set, is_subpath
 from .operator import (
     InstanceOperator,
     MultiStream,
@@ -81,7 +82,7 @@ from .operator import (
 )
 from .random_utils import new_random_generator
 from .settings_utils import get_settings
-from .stream import DynamicStream, ListStream, Stream
 from .text_utils import nested_tuple_to_string
 from .type_utils import isoftype
 from .utils import (
@@ -132,23 +133,24 @@ class IterableSource(SourceOperator):
 class MapInstanceValues(InstanceOperator):
     """A class used to map instance values into other values.
-    This class is a type of InstanceOperator,
     it maps values of instances in a stream using predefined mappers.
-    Attributes:
-        mappers (Dict[str, Dict[str, Any]]): The mappers to use for mapping instance values.
-        Keys are the names of the fields to undergo mapping, and values are dictionaries
-        that define the mapping from old values to new values.
-        Note that mapped values are defined by their string representation, so mapped values
-        are converted to strings before being looked up in the mappers.
-        strict (bool): If True, the mapping is applied strictly. That means if a value
-        does not exist in the mapper, it will raise a KeyError. If False, values
-        that are not present in the mapper are kept as they are.
-        process_every_value (bool): If True, all fields to be mapped should be lists, and the mapping
-        is to be applied to their individual elements. If False, mapping is only applied to a field
-        containing a single value.
     Examples:
         ``MapInstanceValues(mappers={"a": {"1": "hi", "2": "bye"}})``
@@ -335,23 +337,23 @@ class InstanceFieldOperator(InstanceOperator):
     """A general stream instance operator that processes the values of a field (or multiple ones).
     Args:
-        field (Optional[str]): The field to process, if only a single one is passed. Defaults to None
-        to_field (Optional[str]): Field name to save result into, if only one field is processed, if None is passed the
-        operation would happen in-place and its result would replace the value of ``field``. Defaults to None
-        field_to_field (Optional[Union[List[List[str]], Dict[str, str]]]): Mapping from names of fields to process,
-        to names of fields to save the results into. Inner List, if used, should be of length 2.
-        | A field is processed by feeding its value into method ``process_value`` and storing the result in ``to_field`` that
-          is mapped to the field.
-        | When the type of argument ``field_to_field`` is List, the order by which the fields are processed is their order
-          in the (outer) List. But when the type of argument ``field_to_field`` is Dict, there is no uniquely determined
-          order. The end result might depend on that order if either (1) two different fields are mapped to the same
-          to_field, or (2) a field shows both as a key and as a value in different mappings.
-        | The operator throws an AssertionError in either of these cases.
-        | field_to_field defaults to None
-        process_every_value (bool): Processes the values in a list instead of the list as a value, similar to python's ``*var``. Defaults to False
     Note: if ``field`` and ``to_field`` (or both members of a pair in ``field_to_field`` ) are equal (or share a common
     prefix if ``field`` and ``to_field`` contain a / ), then the result of the operation is saved within ``field`` .
@@ -806,10 +808,16 @@ class TakeByField(InstanceOperator):
 class Perturb(FieldOperator):
-    """Slightly perturbs the contents of 'field'. Could be Handy for imitating prediction from given target.
-    When task was classification, argument 'select_from' can be used to list the other potential classes, as a
     relevant perturbation
     """
     select_from: List[Any] = []
@@ -937,12 +945,13 @@ class CastFields(InstanceOperator):
     """Casts specified fields to specified types.
     Args:
-        fields (Dict[str, str]): A dictionary mapping field names to the names of the types to cast the fields to.
-        e.g: "int", "str", "float", "bool". Basic names of types
-        defaults (Dict[str, object]): A dictionary mapping field names to default values for cases of casting failure.
-        process_every_value (bool): If true, all fields involved must contain lists, and each value in the list is then casted. Defaults to False.
     Example:
         .. code-block:: python
@@ -1268,16 +1277,19 @@ class FilterByExpression(StreamOperator, ComputeExpressionMixin):
     Raises an error if a field participating in the specified condition is missing from the instance
     Args:
-       expression (str): a condition over fields of the instance, to be processed by python's eval()
-       imports_list (List[str]): names of imports needed for the eval of the query (e.g. 're', 'json')
-       error_on_filtered_all (bool, optional): If True, raises an error if all instances are filtered out. Defaults to True.
     Examples:
-       FilterByExpression(expression = "a > 4") will yield only instances where "a">4
-       FilterByExpression(expression = "a <= 4 and b > 5") will yield only instances where the value of field "a" is not exceeding 4 and in field "b" -- greater than 5
-       FilterByExpression(expression = "a in [4, 8]") will yield only instances where "a" is 4 or 8
-       FilterByExpression(expression = "a not in [4, 8]") will yield only instances where "a" is neither 4 nor 8
-       FilterByExpression(expression = "a['b'] not in [4, 8]") will yield only instances where "a" is a dict in which key 'b' is mapped to a value that is neither 4 nor 8
     """
     error_on_filtered_all: bool = True
@@ -1635,23 +1647,17 @@ class ApplyMetric(StreamOperator, ArtifactFetcherMixin):
     def process(self, stream: Stream, stream_name: Optional[str] = None) -> Generator:
         from .metrics import Metric, MetricsList
-        # Number of instances in input stream is assumed to be small. This is why
-        # each metric consumes all of them and lays them in its main memory, and even generates
-        # some 1000 copies thereof for the sake of CI.
-        # So we start with deep copying here, to make a 'frozen' status of the stream, having
-        # passed the preprocess_steps of the task, and inference, and now getting to be evaluated,
-        # a frozen status to be fed into each of the metrics listed in metric_field,
-        # so that the evaluation of one does not affect the evaluation of another
-        # (typically, affecting via change of instance as part of
-        # preprocess_steps of MetricPipeline, as illustrated in docs/adding_metrics/Using Metric Pipelines).
-        instances_upon_entrance_to_metrics_evaluations = []
-        for instance in stream:
-            instances_upon_entrance_to_metrics_evaluations.append(
-                recursive_copy(instance)
-            )
-        first_instance = instances_upon_entrance_to_metrics_evaluations[0]
         metric_names = first_instance.get(self.metric_field, [])
         if not metric_names:
@@ -1680,26 +1686,28 @@ class ApplyMetric(StreamOperator, ArtifactFetcherMixin):
         # by the first listed metric (as desired).
         metrics_list = list(reversed(metrics_list))
-        for metric in metrics_list:
             if not self.calc_confidence_intervals:
                 metric.disable_confidence_interval_calculation()
-            multi_stream = MultiStream(
-                {
-                    "tmp": ListStream(
-                        instances_list=instances_upon_entrance_to_metrics_evaluations,
-                        copying=True,  # ensures deep copy when iterating over instances
-                    )
-                }
-            )
-            multi_stream = metric(multi_stream)
-            for evaluated_instance, freezed_instance in zip(
-                multi_stream["tmp"], instances_upon_entrance_to_metrics_evaluations
-            ):
-                freezed_instance["score"] = recursive_shallow_copy(
-                    evaluated_instance["score"]
                 )
-        yield from instances_upon_entrance_to_metrics_evaluations
 class MergeStreams(MultiStreamOperator):
@@ -1872,13 +1880,15 @@ class StreamRefiner(StreamOperator):
     input stream. And if the input stream consists of more than 'max_instances' instances, the resulting stream only consists
     of the leading 'max_instances' of the input stream.
-    Args:  max_instances (int)
-           apply_to_streams (optional, list(str)): names of streams to refine.
     Examples:
-        when input = [{"a": 1},{"a": 2},{"a": 3},{"a": 4},{"a": 5},{"a": 6}] is fed into
-        StreamRefiner(max_instances=4)
-        the resulting stream is [{"a": 1},{"a": 2},{"a": 3},{"a": 4}]
     """
     max_instances: int = None
@@ -1899,18 +1909,20 @@ class DeterministicBalancer(StreamRefiner):
     When also input 'max_instances' is specified, DeterministicBalancer maintains a total instance count not exceeding
     'max_instances'. The total number of discarded instances is as few as possible.
-    Attributes:
-        fields (List[str]): A list of field names to be used in producing the instance's signature.
-        max_instances (Optional, int)
     Usage:
-        balancer = DeterministicBalancer(fields=["field1", "field2"], max_instances=200)
-        balanced_stream = balancer.process(stream)
     Example:
-        When input [{"a": 1, "b": 1},{"a": 1, "b": 2},{"a": 2},{"a": 3},{"a": 4}] is fed into
-        DeterministicBalancer(fields=["a"])
-        the resulting stream will be: [{"a": 1, "b": 1},{"a": 2},{"a": 3},{"a": 4}]
     """
     fields: List[str]
@@ -1947,24 +1959,28 @@ class DeterministicBalancer(StreamRefiner):
 class MinimumOneExamplePerLabelRefiner(StreamRefiner):
     """A class used to return a specified number instances ensuring at least one example  per label.
-    For each instance, a signature value is constructed from the values of the instance in specified input 'fields'.
-    MinimumOneExamplePerLabelRefiner takes first instance that appears from each label (each unique signature), and then adds more elements up to the max_instances limit.  In general, the refiner takes the first elements in the stream that meet the required conditions.
-    MinimumOneExamplePerLabelRefiner then shuffles the results to avoid having one instance
     from each class first and then the rest . If max instance is not set, the original stream will be used
-    Attributes:
-        fields (List[str]): A list of field names to be used in producing the instance's signature.
-        max_instances (Optional, int): Number of elements to select. Note that max_instances of StreamRefiners that are passed to the recipe (e.g. 'train_refiner'. `test_refiner`) are overridden by the recipe parameters ( `max_train_instances`, `max_test_instances`)
     Usage:
-        balancer = MinimumOneExamplePerLabelRefiner(fields=["field1", "field2"], max_instances=200)
-        balanced_stream = balancer.process(stream)
     Example:
-        When input [{"a": 1, "b": 1},{"a": 1, "b": 2},{"a": 1, "b": 3},{"a": 1, "b": 4},{"a": 2, "b": 5}] is fed into
-        MinimumOneExamplePerLabelRefiner(fields=["a"], max_instances=3)
         the resulting stream will be:
-        [{'a': 1, 'b': 1}, {'a': 1, 'b': 2}, {'a': 2, 'b': 5}] (order may be different)
     """
     fields: List[str]
@@ -2022,20 +2038,19 @@ class LengthBalancer(DeterministicBalancer):
     """Balances by a signature that reflects the total length of the fields' values, quantized into integer segments.
     Args:
-        segments_boundaries (List[int]): distinct integers sorted in increasing order, that maps a given total length
-        into the index of the least of them that exceeds the total length. (If none exceeds -- into one index
-        beyond, namely, the length of segments_boundaries)
-        fields (Optional, List[str])
     Example:
-        when input [{"a": [1, 3], "b": 0, "id": 0}, {"a": [1, 3], "b": 0, "id": 1}, {"a": [], "b": "a", "id": 2}] is fed into
-        .. code-block::
-            LengthBalancer(fields=["a"], segments_boundaries=[1])
-        input instances will be counted and balanced against two categories: empty total length (less than 1), and non-empty.
     """
     segments_boundaries: List[int]
@@ -2067,9 +2082,11 @@ class UnexpectedHttpCodeError(Exception):
 class DownloadOperator(SideEffectOperator):
     """Operator for downloading a file from a given URL to a specified local path.
-    Attributes:
-        source (str): URL of the file to be downloaded.
-        target (str): Local path where the downloaded file should be saved.
     """
     source: str
@@ -2089,9 +2106,11 @@ class DownloadOperator(SideEffectOperator):
 class ExtractZipFile(SideEffectOperator):
     """Operator for extracting files from a zip archive.
-    Attributes:
-        zip_file (str): Path of the zip file to be extracted.
-        target_dir (str): Directory where the contents of the zip file will be extracted.
     """
     zip_file: str
@@ -2105,8 +2124,9 @@ class ExtractZipFile(SideEffectOperator):
 class DuplicateInstances(StreamOperator):
     """Operator which duplicates each instance in stream a given number of times.
-    Attributes:
-        num_duplications (int): How many times each instance should be duplicated (1 means no duplication).
         duplication_index_field (Optional[str]):
             If given, then additional field with specified name is added to each duplicated instance,
             which contains id of a given duplication. Defaults to None, so no field is added.

 from .dataclass import NonPositionalField, OptionalField
 from .deprecation_utils import deprecation
 from .dict_utils import dict_delete, dict_get, dict_set, is_subpath
+from .generator_utils import ReusableGenerator
 from .operator import (
     InstanceOperator,
     MultiStream,
 )
 from .random_utils import new_random_generator
 from .settings_utils import get_settings
+from .stream import DynamicStream, Stream
 from .text_utils import nested_tuple_to_string
 from .type_utils import isoftype
 from .utils import (
 class MapInstanceValues(InstanceOperator):
     """A class used to map instance values into other values.
+    This class is a type of ``InstanceOperator``,
     it maps values of instances in a stream using predefined mappers.
+    Args:
+        mappers (Dict[str, Dict[str, Any]]):
+            The mappers to use for mapping instance values.
+            Keys are the names of the fields to undergo mapping, and values are dictionaries
+            that define the mapping from old values to new values.
+            Note that mapped values are defined by their string representation, so mapped values
+            are converted to strings before being looked up in the mappers.
+        strict (bool):
+            If True, the mapping is applied strictly. That means if a value
+            does not exist in the mapper, it will raise a KeyError. If False, values
+            that are not present in the mapper are kept as they are.
+        process_every_value (bool):
+            If True, all fields to be mapped should be lists, and the mapping
+            is to be applied to their individual elements.
+            If False, mapping is only applied to a field containing a single value.
     Examples:
         ``MapInstanceValues(mappers={"a": {"1": "hi", "2": "bye"}})``
     """A general stream instance operator that processes the values of a field (or multiple ones).
     Args:
+        field (Optional[str]):
+            The field to process, if only a single one is passed. Defaults to None
+        to_field (Optional[str]):
+            Field name to save result into, if only one field is processed, if None is passed the
+            operation would happen in-place and its result would replace the value of ``field``. Defaults to None
+        field_to_field (Optional[Union[List[List[str]], Dict[str, str]]]):
+            Mapping from names of fields to process,
+            to names of fields to save the results into. Inner List, if used, should be of length 2.
+            A field is processed by feeding its value into method ``process_value`` and storing the result in ``to_field`` that
+            is mapped to the field. When the type of argument ``field_to_field`` is List, the order by which the fields are processed is their order
+            in the (outer) List. But when the type of argument ``field_to_field`` is Dict, there is no uniquely determined
+            order. The end result might depend on that order if either (1) two different fields are mapped to the same
+            to_field, or (2) a field shows both as a key and as a value in different mappings.
+            The operator throws an AssertionError in either of these cases. ``field_to_field``
+            defaults to None.
+        process_every_value (bool):
+            Processes the values in a list instead of the list as a value, similar to python's ``*var``. Defaults to False
     Note: if ``field`` and ``to_field`` (or both members of a pair in ``field_to_field`` ) are equal (or share a common
     prefix if ``field`` and ``to_field`` contain a / ), then the result of the operation is saved within ``field`` .
 class Perturb(FieldOperator):
+    """Slightly perturbs the contents of ``field``. Could be Handy for imitating prediction from given target.
+    When task was classification, argument ``select_from`` can be used to list the other potential classes, as a
     relevant perturbation
+    Args:
+        percentage_to_perturb (int):
+            the percentage of the instances for which to apply this perturbation. Defaults to 1 (1 percent)
+        select_from: List[Any]:
+            a list of values to select from, as a perturbation of the field's value. Defaults to [].
     """
     select_from: List[Any] = []
     """Casts specified fields to specified types.
     Args:
+        fields (Dict[str, str]):
+            A dictionary mapping field names to the names of the types to cast the fields to.
+            e.g: "int", "str", "float", "bool". Basic names of types
+        defaults (Dict[str, object]):
+            A dictionary mapping field names to default values for cases of casting failure.
+        process_every_value (bool):
+            If true, all fields involved must contain lists, and each value in the list is then casted. Defaults to False.
     Example:
         .. code-block:: python
     Raises an error if a field participating in the specified condition is missing from the instance
     Args:
+        expression (str):
+            a condition over fields of the instance, to be processed by python's eval()
+        imports_list (List[str]):
+            names of imports needed for the eval of the query (e.g. 're', 'json')
+        error_on_filtered_all (bool, optional):
+            If True, raises an error if all instances are filtered out. Defaults to True.
     Examples:
+        | ``FilterByExpression(expression = "a > 4")`` will yield only instances where "a">4
+        | ``FilterByExpression(expression = "a <= 4 and b > 5")`` will yield only instances where the value of field "a" is not exceeding 4 and in field "b" -- greater than 5
+        | ``FilterByExpression(expression = "a in [4, 8]")`` will yield only instances where "a" is 4 or 8
+        | ``FilterByExpression(expression = "a not in [4, 8]")`` will yield only instances where "a" is neither 4 nor 8
+        | ``FilterByExpression(expression = "a['b'] not in [4, 8]")`` will yield only instances where "a" is a dict in which key 'b' is mapped to a value that is neither 4 nor 8
     """
     error_on_filtered_all: bool = True
     def process(self, stream: Stream, stream_name: Optional[str] = None) -> Generator:
         from .metrics import Metric, MetricsList
+        def update_scores_of_stream_instances(
+            stream: Stream, scores: List[dict]
+        ) -> Generator:
+            for instance, score in zip(stream, scores):
+                instance["score"] = recursive_copy(score)
+                yield instance
+        # to be populated only when two or more metrics
+        accumulated_scores = []
+        first_instance = stream.peek()
         metric_names = first_instance.get(self.metric_field, [])
         if not metric_names:
         # by the first listed metric (as desired).
         metrics_list = list(reversed(metrics_list))
+        for metric_no, metric in enumerate(metrics_list):
             if not self.calc_confidence_intervals:
                 metric.disable_confidence_interval_calculation()
+            if metric_no > 0:
+                # update input stream with accumulated scores
+                reusable_generator = ReusableGenerator(
+                    generator=update_scores_of_stream_instances,
+                    gen_kwargs={"stream": stream, "scores": accumulated_scores},
                 )
+                multi_stream = MultiStream.from_generators({"tmp": reusable_generator})
+            else:
+                multi_stream = MultiStream.from_iterables({"tmp": stream})
+            multi_stream = metric(multi_stream)
+            if metric_no < len(metrics_list) - 1:
+                # not the last metric, so prepare for the next metric by
+                # updating accumulated_scores
+                accumulated_scores = []
+                for inst in multi_stream["tmp"]:
+                    accumulated_scores.append(recursive_copy(inst["score"]))
+        yield from multi_stream["tmp"]
 class MergeStreams(MultiStreamOperator):
     input stream. And if the input stream consists of more than 'max_instances' instances, the resulting stream only consists
     of the leading 'max_instances' of the input stream.
+    Args:
+        max_instances (int)
+        apply_to_streams (optional, list(str)):
+            names of streams to refine.
     Examples:
+        when input = ``[{"a": 1},{"a": 2},{"a": 3},{"a": 4},{"a": 5},{"a": 6}]`` is fed into
+        ``StreamRefiner(max_instances=4)``
+        the resulting stream is ``[{"a": 1},{"a": 2},{"a": 3},{"a": 4}]``
     """
     max_instances: int = None
     When also input 'max_instances' is specified, DeterministicBalancer maintains a total instance count not exceeding
     'max_instances'. The total number of discarded instances is as few as possible.
+    Args:
+        fields (List[str]):
+            A list of field names to be used in producing the instance's signature.
+        max_instances (Optional, int):
+            overall max.
     Usage:
+        ``balancer = DeterministicBalancer(fields=["field1", "field2"], max_instances=200)``
+        ``balanced_stream = balancer.process(stream)``
     Example:
+        When input ``[{"a": 1, "b": 1},{"a": 1, "b": 2},{"a": 2},{"a": 3},{"a": 4}]`` is fed into
+        ``DeterministicBalancer(fields=["a"])``
+        the resulting stream will be: ``[{"a": 1, "b": 1},{"a": 2},{"a": 3},{"a": 4}]``
     """
     fields: List[str]
 class MinimumOneExamplePerLabelRefiner(StreamRefiner):
     """A class used to return a specified number instances ensuring at least one example  per label.
+    For each instance, a signature value is constructed from the values of the instance in specified input ``fields``.
+    ``MinimumOneExamplePerLabelRefiner`` takes first instance that appears from each label (each unique signature), and then adds more elements up to the max_instances limit.  In general, the refiner takes the first elements in the stream that meet the required conditions.
+    ``MinimumOneExamplePerLabelRefiner`` then shuffles the results to avoid having one instance
     from each class first and then the rest . If max instance is not set, the original stream will be used
+    Args:
+        fields (List[str]):
+            A list of field names to be used in producing the instance's signature.
+        max_instances (Optional, int):
+            Number of elements to select. Note that max_instances of StreamRefiners
+            that are passed to the recipe (e.g. ``train_refiner``. ``test_refiner``) are overridden
+            by the recipe parameters ( ``max_train_instances``, ``max_test_instances``)
     Usage:
+        | ``balancer = MinimumOneExamplePerLabelRefiner(fields=["field1", "field2"], max_instances=200)``
+        | ``balanced_stream = balancer.process(stream)``
     Example:
+        When input ``[{"a": 1, "b": 1},{"a": 1, "b": 2},{"a": 1, "b": 3},{"a": 1, "b": 4},{"a": 2, "b": 5}]`` is fed into
+        ``MinimumOneExamplePerLabelRefiner(fields=["a"], max_instances=3)``
         the resulting stream will be:
+        ``[{'a': 1, 'b': 1}, {'a': 1, 'b': 2}, {'a': 2, 'b': 5}]`` (order may be different)
     """
     fields: List[str]
     """Balances by a signature that reflects the total length of the fields' values, quantized into integer segments.
     Args:
+        segments_boundaries (List[int]):
+            distinct integers sorted in increasing order, that map a given total length
+            into the index of the least of them that exceeds the given total length.
+            (If none exceeds -- into one index beyond, namely, the length of segments_boundaries)
+        fields (Optional, List[str]):
+            the total length of the values of these fields goes through the quantization described above
     Example:
+        when input ``[{"a": [1, 3], "b": 0, "id": 0}, {"a": [1, 3], "b": 0, "id": 1}, {"a": [], "b": "a", "id": 2}]``
+        is fed into ``LengthBalancer(fields=["a"], segments_boundaries=[1])``,
+        input instances will be counted and balanced against two categories:
+        empty total length (less than 1), and non-empty.
     """
     segments_boundaries: List[int]
 class DownloadOperator(SideEffectOperator):
     """Operator for downloading a file from a given URL to a specified local path.
+    Args:
+        source (str):
+            URL of the file to be downloaded.
+        target (str):
+            Local path where the downloaded file should be saved.
     """
     source: str
 class ExtractZipFile(SideEffectOperator):
     """Operator for extracting files from a zip archive.
+    Args:
+        zip_file (str):
+            Path of the zip file to be extracted.
+        target_dir (str):
+            Directory where the contents of the zip file will be extracted.
     """
     zip_file: str
 class DuplicateInstances(StreamOperator):
     """Operator which duplicates each instance in stream a given number of times.
+    Args:
+        num_duplications (int):
+            How many times each instance should be duplicated (1 means no duplication).
         duplication_index_field (Optional[str]):
             If given, then additional field with specified name is added to each duplicated instance,
             which contains id of a given duplication. Defaults to None, so no field is added.

processors.py CHANGED Viewed

@@ -132,6 +132,14 @@ class TakeFirstNonEmptyLine(FieldOperator):
         return parts[0].strip()
 class ConvertToBoolean(FieldOperator):
     def process_value(self, text: Any) -> Any:
         clean_instance = str(text).strip().lower()
@@ -157,6 +165,11 @@ class Lower(FieldOperator):
         return text.lower()
 @deprecation("2.0.0", alternative=Lower)
 class LowerCase(Lower):
     pass

         return parts[0].strip()
+class TakeLastNonEmptyLine(FieldOperator):
+    def process_value(self, text: Any) -> Any:
+        parts = str(text).strip().split("\n")
+        if len(parts) == 0:
+            return ""
+        return parts[-1].strip()
 class ConvertToBoolean(FieldOperator):
     def process_value(self, text: Any) -> Any:
         clean_instance = str(text).strip().lower()
         return text.lower()
+class Upper(FieldOperator):
+    def process_value(self, text: Any) -> Any:
+        return str(text).upper()
 @deprecation("2.0.0", alternative=Lower)
 class LowerCase(Lower):
     pass

schema.py CHANGED Viewed

@@ -143,6 +143,9 @@ class FinalizeDataset(InstanceOperatorValidator):
         )
         task_data["metadata"]["num_demos"] = instance["recipe_metadata"]["num_demos"]
         task_data["metadata"]["template"] = self.artifact_to_jsonable(
             instance["recipe_metadata"]["template"]
         )

         )
         task_data["metadata"]["num_demos"] = instance["recipe_metadata"]["num_demos"]
+        task_data["metadata"]["demos_pool_size"] = instance["recipe_metadata"][
+            "demos_pool_size"
+        ]
         task_data["metadata"]["template"] = self.artifact_to_jsonable(
             instance["recipe_metadata"]["template"]
         )

settings_utils.py CHANGED Viewed

@@ -138,7 +138,7 @@ if Settings.is_uninitilized():
     settings.max_log_message_size = (int, 100000)
     settings.catalogs = None
     settings.artifactories = None
-    settings.default_recipe = "standard_recipe"
     settings.default_verbosity = "info"
     settings.use_eager_execution = False
     settings.remote_metrics = []
@@ -186,6 +186,7 @@ if Constants.is_uninitilized():
     constants.inference_stream = "__INFERENCE_STREAM__"
     constants.instance_stream = "__INSTANCE_STREAM__"
     constants.image_tag = "unitxt-img"
 def get_settings() -> Settings:

     settings.max_log_message_size = (int, 100000)
     settings.catalogs = None
     settings.artifactories = None
+    settings.default_recipe = "dataset_recipe"
     settings.default_verbosity = "info"
     settings.use_eager_execution = False
     settings.remote_metrics = []
     constants.inference_stream = "__INFERENCE_STREAM__"
     constants.instance_stream = "__INSTANCE_STREAM__"
     constants.image_tag = "unitxt-img"
+    constants.demos_pool_field = "_demos_pool_"
 def get_settings() -> Settings:

span_lableing_operators.py CHANGED Viewed

@@ -6,19 +6,18 @@ from .operator import InstanceOperator
 class IobExtractor(InstanceOperator):
     """A class designed to extract entities from sequences of text using the Inside-Outside-Beginning (IOB) tagging convention. It identifies entities based on IOB tags and categorizes them into predefined labels such as Person, Organization, and Location.
-    Attributes:
-        labels (List[str]): A list of entity type labels, e.g., ["Person", "Organization", "Location"].
-        begin_labels (List[str]): A list of labels indicating the beginning of an entity, e.g., ["B-PER", "B-ORG", "B-LOC"].
-        inside_labels (List[str]): A list of labels indicating the continuation of an entity, e.g., ["I-PER", "I-ORG", "I-LOC"].
-        outside_label (str): The label indicating tokens outside of any entity, typically "O".
     The extraction process identifies spans of text corresponding to entities and labels them according to their entity type. Each span is annotated with a start and end character offset, the entity text, and the corresponding label.
     Example of instantiation and usage:
     .. code-block:: python

 class IobExtractor(InstanceOperator):
     """A class designed to extract entities from sequences of text using the Inside-Outside-Beginning (IOB) tagging convention. It identifies entities based on IOB tags and categorizes them into predefined labels such as Person, Organization, and Location.
+    Args:
+        labels (List[str]):
+            A list of entity type labels, e.g., ["Person", "Organization", "Location"].
+        begin_labels (List[str]):
+            A list of labels indicating the beginning of an entity, e.g., ["B-PER", "B-ORG", "B-LOC"].
+        inside_labels (List[str]):
+            A list of labels indicating the continuation of an entity, e.g., ["I-PER", "I-ORG", "I-LOC"].
+        outside_label (str):
+            The label indicating tokens outside of any entity, typically "O".
     The extraction process identifies spans of text corresponding to entities and labels them according to their entity type. Each span is annotated with a start and end character offset, the entity text, and the corresponding label.
     Example of instantiation and usage:
     .. code-block:: python

splitters.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import itertools
 from abc import abstractmethod
 from difflib import get_close_matches
-from typing import Dict, List, Optional
 from .artifact import Artifact
 from .dict_utils import dict_get
-from .operator import InstanceOperatorWithMultiStreamAccess, MultiStreamOperator
 from .random_utils import new_random_generator
 from .split_utils import (
     parse_random_mix_string,
@@ -14,7 +14,7 @@ from .split_utils import (
     rename_split,
     slice_streams,
 )
-from .stream import EmptyStreamError, FaultyStreamError, MultiStream
 from .type_utils import isoftype
 from .utils import recursive_copy
@@ -118,14 +118,14 @@ class Sampler(Artifact):
     def sample(
         self,
         sample_size: int,
-        instances_pool: List[Dict[str, object]],
-        instance: Dict[str, object],
-    ) -> List[Dict[str, object]]:
         pass
     def filter_source_by_instance(
-        self, instances_pool: List[Dict[str, object]], instance: Dict[str, object]
-    ) -> List[Dict[str, object]]:
         if "input_fields" not in instance:
             raise ValueError(f"'input_fields' field is missing from '{instance}'.")
         try:
@@ -336,10 +336,11 @@ class DiverseLabelsSampler(Sampler):
         return result
-class Sample(InstanceOperatorWithMultiStreamAccess):
-    from_stream: str
     to_field: str
     sampler: Sampler
     def prepare(self):
         self.local_cache = None
@@ -350,40 +351,36 @@ class Sample(InstanceOperatorWithMultiStreamAccess):
         pass
     def process(
-        self, instance: Dict[str, object], multi_stream: MultiStream
-    ) -> Dict[str, object]:
-        sample_size = self.get_sample_size(instance)
-        try:
-            if self.local_cache is None:
-                self.local_cache = recursive_copy(list(multi_stream[self.from_stream]))
-            source_stream = self.local_cache
-            source_stream = self.sampler.filter_source_by_instance(
-                source_stream, instance
-            )
-            if len(source_stream) < sample_size:
-                raise ValueError(
-                    f"Size of population to sample from: {len(source_stream)} is smaller than the needed sample_size: {self.sampler.sample_size}."
-                )
-            sampled_instances = self.sampler.sample(
-                sample_size=sample_size, instances_pool=source_stream, instance=instance
             )
-            instance[self.to_field] = sampled_instances
-            return instance
-        except FaultyStreamError as e:
-            raise EmptyStreamError(
-                f"Unable to fetch instances from '{self.from_stream}' to '{self.to_field}', due to {e.__class__.__name__}: {e}"
-            ) from e
-class ConstantSizeSample(Sample):
     sample_size: int
     def get_sample_size(self, instance) -> int:
         return self.sample_size
-class RandomSizeSample(Sample):
     sample_sizes: List[int]
     def get_sample_size(self, instance) -> int:

 import itertools
 from abc import abstractmethod
 from difflib import get_close_matches
+from typing import Any, Dict, List, Optional
 from .artifact import Artifact
 from .dict_utils import dict_get
+from .operator import InstanceOperator, MultiStreamOperator
 from .random_utils import new_random_generator
 from .split_utils import (
     parse_random_mix_string,
     rename_split,
     slice_streams,
 )
+from .stream import MultiStream
 from .type_utils import isoftype
 from .utils import recursive_copy
     def sample(
         self,
         sample_size: int,
+        instances_pool: List[Dict[str, Any]],
+        instance: Dict[str, Any],
+    ) -> List[Dict[str, Any]]:
         pass
     def filter_source_by_instance(
+        self, instances_pool: List[Dict[str, Any]], instance: Dict[str, Any]
+    ) -> List[Dict[str, Any]]:
         if "input_fields" not in instance:
             raise ValueError(f"'input_fields' field is missing from '{instance}'.")
         try:
         return result
+class AssignDemosToInstance(InstanceOperator):
+    from_field: str
     to_field: str
     sampler: Sampler
+    skip_demoed_instances: bool = False
     def prepare(self):
         self.local_cache = None
         pass
     def process(
+        self, instance: Dict[str, Any], multi_stream: MultiStream
+    ) -> Dict[str, Any]:
+        if self.skip_demoed_instances and self.to_field in instance:
+            if self.from_field in instance:
+                instance.pop(self.from_field)
+            return instance
+        demos_pool = instance[self.from_field]
+        sample_size = self.get_sample_size(instance)
+        source_stream = self.sampler.filter_source_by_instance(demos_pool, instance)
+        if len(source_stream) < sample_size:
+            raise ValueError(
+                f"Size of population to sample from: {len(source_stream)} is smaller than the needed sample_size: {sample_size}. Please consider increasing increasing the demos pool, for which you may need to increase loader_limit or employ a less strict stream filtering."
             )
+        sampled_instances = self.sampler.sample(
+            sample_size=sample_size, instances_pool=source_stream, instance=instance
+        )
+        instance[self.to_field] = recursive_copy(sampled_instances)
+        instance.pop(self.from_field)  # pop the field pointing to the demos_pool
+        return instance
+class ConstantSizeSample(AssignDemosToInstance):
     sample_size: int
     def get_sample_size(self, instance) -> int:
         return self.sample_size
+class RandomSizeSample(AssignDemosToInstance):
     sample_sizes: List[int]
     def get_sample_size(self, instance) -> int:

standard.py CHANGED Viewed

@@ -1,26 +1,35 @@
-from typing import List, Optional, Union
 from .artifact import fetch_artifact
 from .augmentors import Augmentor, NullAugmentor
 from .card import TaskCard
 from .collections_operators import GetLength
 from .dataclass import Field, InternalField, NonPositionalField, OptionalField
 from .error_utils import UnitxtError
 from .formats import Format, SystemFormat
 from .logging_utils import get_logger
-from .operator import SequentialOperator, SourceSequentialOperator, StreamingOperator
 from .operators import Set, StreamRefiner
-from .recipe import Recipe
 from .schema import FinalizeDataset
 from .serializers import SingleTypeSerializer
 from .settings_utils import get_constants, get_settings
-from .splitters import ConstantSizeSample, RandomSizeSample, Sampler, SeparateSplit
 from .stream import MultiStream
 from .system_prompts import EmptySystemPrompt, SystemPrompt
 from .task import Task
 from .templates import ApplyRandomTemplate, ApplySingleTemplate, Template, TemplatesList
 from .type_utils import isoftype
-from .utils import LRUCache
 constants = get_constants()
 settings = get_settings()
@@ -28,11 +37,205 @@ logger = get_logger()
 # Used to give meaningful name to recipe steps
-class CreateDemosPool(SeparateSplit):
-    pass
-class BaseRecipe(Recipe, SourceSequentialOperator):
     # Base parameters
     card: TaskCard = None
     task: Task = None
@@ -59,14 +262,18 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
     test_refiner: StreamRefiner = OptionalField(default_factory=StreamRefiner)
     demos_pool_size: int = None
     num_demos: Optional[Union[int, List[int]]] = 0
     demos_removed_from_data: bool = True
-    demos_pool_name: str = "demos_pool"
     demos_taken_from: str = "train"
     demos_field: str = "demos"
     sampler: Sampler = None
     augmentor: Union[Augmentor, List[Augmentor]] = OptionalField(default=None)
     steps: List[StreamingOperator] = InternalField(default_factory=list)
@@ -101,11 +308,16 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
                 raise ValueError(
                     "When using demonstrations both num_demos and demos_pool_size should be assigned with positive integers."
                 )
-            if self.demos_pool_size < self.max_demos_size:
                 raise ValueError(
-                    f"num_demos (got: {self.max_demos_size}) should not exceed demos_pool_size (got: {self.demos_pool_size})"
                 )
-            if self.loader_limit and self.demos_pool_size > self.loader_limit:
                 raise ValueError(
                     f"demos_pool_size should not exceed loader_limit ({self.loader_limit}), Got demos_pool_size={self.demos_pool_size}"
                 )
@@ -220,29 +432,21 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
             self.loading,
             self.metadata,
             self.standardization,
-            self.processing,
         ]
         self.inference = SequentialOperator()
-        self.inference.steps = [self.metadata, self.verbalization, self.finalize]
     def production_preprocess(self, task_instances):
         ms = MultiStream.from_iterables({constants.inference_stream: task_instances})
-        return list(self.inference_instance(ms)[constants.inference_stream])
-    def production_demos_pool(self):
-        if self.use_demos:
-            demos_pool = self.__class__._demos_pool_cache.get(str(self), None)
-            if demos_pool is None:
-                demos_pool = list(self.inference_demos()[self.demos_pool_name])
-                self.__class__._demos_pool_cache[str(self)] = demos_pool
-            return demos_pool
-        return []
     @property
     def has_custom_demos_pool(self):
-        return self.demos_pool_size is not None and self.demos_pool_size > 0
     @property
     def use_demos(self):
@@ -251,13 +455,22 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
     def produce(self, task_instances):
         """Use the recipe in production to produce model ready query from standard task instance."""
         self.before_process_multi_stream()
-        streams = {
-            constants.inference_stream: self.production_preprocess(task_instances),
-        }
-        if self.use_demos:
-            streams[self.demos_pool_name] = self.production_demos_pool()
-        multi_stream = MultiStream.from_iterables(streams)
-        multi_stream = self.inference(multi_stream)
         return list(multi_stream[constants.inference_stream])
     def reset(self):
@@ -321,15 +534,29 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
                 augmentor.set_fields(self.card.task.augmentable_inputs)
                 self.processing.steps.append(augmentor)
         if self.has_custom_demos_pool:
-            self.processing.steps.append(
-                CreateDemosPool(
-                    from_split=self.demos_taken_from,
-                    to_split_names=[self.demos_pool_name, self.demos_taken_from],
-                    to_split_sizes=[int(self.demos_pool_size)],
-                    remove_targets_from_source_split=self.demos_removed_from_data,
                 )
-            )
         if self.use_demos:
             if self.sampler is None:
@@ -346,28 +573,41 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
             if isinstance(self.num_demos, int):
                 self.verbalization.steps.append(
                     ConstantSizeSample(
-                        from_stream=self.demos_pool_name,
                         to_field=self.demos_field,
                         sampler=self.sampler,
                         sample_size=self.num_demos,
                     )
                 )
                 self.verbalization.steps.append(
-                    Set(fields={"recipe_metadata/num_demos": self.num_demos})
                 )
             elif isinstance(self.num_demos, list):
                 self.verbalization.steps.append(
                     RandomSizeSample(
-                        from_stream=self.demos_pool_name,
                         to_field=self.demos_field,
                         sampler=self.sampler,
                         sample_sizes=self.num_demos,
                     )
                 )
                 self.verbalization.steps.append(
                     GetLength(field="demos", to_field="recipe_metadata/num_demos")
                 )
             else:
                 raise ValueError("num_demos must be int or List[int]")
@@ -383,9 +623,15 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
                         template=self.template, demos_field=self.demos_field
                     )
                 )
         else:
             self.verbalization.steps.append(
-                Set(fields={"recipe_metadata/num_demos": 0})
             )
             if isinstance(self.template, list):
                 self.verbalization.steps.append(
@@ -409,15 +655,6 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
         self.finalize.steps.append(FinalizeDataset(group_by=self.group_by))
-    def prepare(self):
-        if isinstance(self.template, TemplatesList):
-            self.template = self.template.items
-        self.reset_pipeline()
-class StandardRecipeWithIndexes(BaseRecipe):
-    template_card_index: int = None
     def prepare(self):
         assert (
             self.template_card_index is None or self.template is None
@@ -464,77 +701,41 @@ class StandardRecipeWithIndexes(BaseRecipe):
             raise ValueError(
                 "No template was specified in the the 'template' or 'template_card_index' recipe arguments, and no default templates are defined the card or task"
             )
-        super().prepare()
-class StandardRecipe(StandardRecipeWithIndexes):
-    """This class represents a standard recipe for data processing and preparation.
-    This class can be used to prepare a recipe.
-    with all necessary steps, refiners and renderers included. It allows to set various
-    parameters and steps in a sequential manner for preparing the recipe.
-    Args:
-        card (TaskCard):
-            TaskCard object associated with the recipe.
-        template (Template, optional):
-            Template object to be used for the recipe.
-        system_prompt (SystemPrompt, optional):
-            SystemPrompt object to be used for the recipe.
-        loader_limit (int, optional):
-            Specifies the maximum number of instances per stream to be returned from the loader (used to reduce loading time in large datasets)
-        format (SystemFormat, optional):
-            SystemFormat object to be used for the recipe.
-        metrics (List[str]):
-            list of catalog metrics to use with this recipe.
-        postprocessors (List[str]):
-            list of catalog processors to apply at post processing. (Not recommended to use from here)
-        group_by (List[Union[str, List[str]]]):
-            list of task_data or metadata keys to group global scores by.
-        train_refiner (StreamRefiner, optional):
-            Train refiner to be used in the recipe.
-        max_train_instances (int, optional):
-            Maximum training instances for the refiner.
-        validation_refiner (StreamRefiner, optional):
-            Validation refiner to be used in the recipe.
-        max_validation_instances (int, optional):
-            Maximum validation instances for the refiner.
-        test_refiner (StreamRefiner, optional):
-            Test refiner to be used in the recipe.
-        max_test_instances (int, optional):
-            Maximum test instances for the refiner.
-        demos_pool_size (int, optional):
-            Size of the demos pool.
-        num_demos (int, optional):
-            Number of demos to be used.
-        demos_pool_name (str, optional):
-            Name of the demos pool. Default is "demos_pool".
-        demos_taken_from (str, optional):
-            Specifies from where the demos are taken. Default is "train".
-        demos_field (str, optional):
-            Field name for demos. Default is "demos".
-        demos_removed_from_data (bool, optional):
-            whether to remove the demos from the source data, Default is True
-        sampler (Sampler, optional):
-            The Sampler used to select the demonstrations when num_demos > 0.
-        steps (List[StreamingOperator], optional):
-            List of StreamingOperator objects to be used in the recipe.
-        augmentor (Augmentor) :
-            Augmentor to be used to pseudo randomly augment the source text
-        instruction_card_index (int, optional):
-            Index of instruction card to be used for preparing the recipe.
-        template_card_index (int, optional):
-            Index of template card to be used for preparing the recipe.
-    Methods:
-        prepare():
-            This overridden method is used for preparing the recipe
-            by arranging all the steps, refiners, and renderers in a sequential manner.
-    Raises:
-        AssertionError:
-            If both template and template_card_index are specified at the same time.
-    """
     pass

+import itertools
+import json
+import sys
+from typing import Any, Dict, Generator, List, Optional, Union
 from .artifact import fetch_artifact
 from .augmentors import Augmentor, NullAugmentor
 from .card import TaskCard
 from .collections_operators import GetLength
 from .dataclass import Field, InternalField, NonPositionalField, OptionalField
+from .deprecation_utils import deprecation
 from .error_utils import UnitxtError
 from .formats import Format, SystemFormat
+from .generator_utils import ReusableGenerator
 from .logging_utils import get_logger
+from .operator import (
+    MultiStreamOperator,
+    SequentialOperator,
+    SourceSequentialOperator,
+    StreamingOperator,
+)
 from .operators import Set, StreamRefiner
 from .schema import FinalizeDataset
 from .serializers import SingleTypeSerializer
 from .settings_utils import get_constants, get_settings
+from .splitters import ConstantSizeSample, RandomSizeSample, Sampler
 from .stream import MultiStream
 from .system_prompts import EmptySystemPrompt, SystemPrompt
 from .task import Task
 from .templates import ApplyRandomTemplate, ApplySingleTemplate, Template, TemplatesList
 from .type_utils import isoftype
+from .utils import LRUCache, recursive_copy
 constants = get_constants()
 settings = get_settings()
 # Used to give meaningful name to recipe steps
+class CreateDemosPool(MultiStreamOperator):
+    from_stream: str = None
+    demos_pool_size: int = None
+    demos_removed_from_data: bool = None
+    to_field: str = constants.demos_pool_field
+    # flake8: noqa: B007
+    def process(self, multi_stream: MultiStream) -> MultiStream:
+        # generate the demos_pool as a selection of demos_pool_size distinct instances
+        # (distinct by their "input_fields" field). The selection is taken from stream named from_stream.
+        # The selected instances are later treated as ordinary instances or not, depending on parameter
+        # demos_removed_from_data.
+        # The selection of instances is done from the first instances of the stream named from_stream.
+        # instances that are not distinct from previously selected demo instances, are kept aside, to be later
+        # treated like all the remaining instances of stream from_stream.
+        if self.from_stream not in multi_stream:
+            raise ValueError(
+                f"Input multi-stream is missing a stream named '{self.from_stream}' to take demo instances from for the demos_pool."
+            )
+        if (
+            self.demos_removed_from_data is not None
+            and self.demos_removed_from_data is True
+            and (self.demos_pool_size == sys.maxsize)
+        ):
+            # going to consume the whole of input stream named self.from_stream for demo instances,
+            # and not let demos instances to behave as regular instances. so self.from_stream
+            # ends here its life as an input stream that is expected to reach the end of the recipe
+            if len(multi_stream) == 1:
+                raise ValueError(
+                    f"The single input stream, '{self.from_stream}' is to be wholly consumed for generating demos, and no instance is left to use these demos."
+                )
+        from_stream = multi_stream[self.from_stream]
+        demos_pool = []
+        input_fields_of_demos_pool = []
+        not_selected_from_from_stream = []
+        for num_scanned, instance in enumerate(from_stream):
+            if "input_fields" not in instance:
+                raise ValueError(f"'input_fields' field is missing from '{instance}'.")
+            input_fields_signature = json.dumps(
+                instance["input_fields"], sort_keys=True
+            )
+            if input_fields_signature in input_fields_of_demos_pool:
+                not_selected_from_from_stream.append(instance)
+                continue
+            demos_pool.append(instance)
+            input_fields_of_demos_pool.append(input_fields_signature)
+            if len(demos_pool) >= self.demos_pool_size:
+                break
+            # for backward compatibility, do not throw exception here if demos pool is smaller than expected.
+            # Delay that for the event (if occurs) that Sample is not be able to sample num_demos demos.
+        # to avoid endless recursion in case of not demos_removed_from_data
+        demos_pool = recursive_copy(demos_pool)
+        set_demos_pool = Set(fields={self.to_field: demos_pool})
+        if (
+            self.demos_removed_from_data is not None
+            and self.demos_removed_from_data is False
+        ):
+            # all input instances go out. No one is "killed" because selected as demo
+            return set_demos_pool(multi_stream)
+        if (
+            self.demos_removed_from_data is not None
+            and self.demos_removed_from_data is True
+        ):
+            if self.demos_pool_size == sys.maxsize:
+                # consume the whole of input stream self.from_stream, just for demos, and do not
+                # take any of its instances to behave as a non-demo instance, i.e., a regular instance
+                # that consume the demos
+                out_ms = MultiStream(
+                    {
+                        stream_name: multi_stream[stream_name]
+                        for stream_name in multi_stream
+                        if stream_name != self.from_stream
+                    }
+                )
+                return set_demos_pool(out_ms)
+        #  self.demos_removed_from_data and not consume the whole of self.from_stream just for demos
+        def from_stream_generator(
+            first_layer: list, ms: MultiStream, stream_name: str, start: int
+        ) -> Generator:
+            yield from first_layer
+            yield from itertools.islice(ms[stream_name], start, None)
+        new_streams = {}
+        for stream_name in multi_stream:
+            if stream_name == self.from_stream:
+                new_streams[stream_name] = ReusableGenerator(
+                    generator=from_stream_generator,
+                    gen_kwargs={
+                        "first_layer": not_selected_from_from_stream,
+                        "ms": multi_stream,
+                        "stream_name": self.from_stream,
+                        "start": num_scanned + 1,
+                    },
+                )
+            else:
+                new_streams[stream_name] = ReusableGenerator(
+                    generator=from_stream_generator,
+                    gen_kwargs={
+                        "first_layer": [],
+                        "ms": multi_stream,
+                        "stream_name": stream_name,
+                        "start": 0,
+                    },
+                )
+        ms = MultiStream.from_generators(new_streams)
+        return set_demos_pool(ms)
+class AddDemosPool(MultiStreamOperator):
+    demos_pool: List[Dict[str, Any]]
+    demos_pool_field_name: str = constants.demos_pool_field
+    def process(self, multi_stream: MultiStream) -> MultiStream:
+        set_demos_pool = Set(fields={self.demos_pool_field_name: self.demos_pool})
+        return set_demos_pool(multi_stream)
+class DatasetRecipe(SourceSequentialOperator):
+    """This class represents a standard recipe for data processing and preparation.
+    This class can be used to prepare a recipe.
+    with all necessary steps, refiners and renderers included. It allows to set various
+    parameters and steps in a sequential manner for preparing the recipe.
+    Args:
+        card (TaskCard):
+            TaskCard object associated with the recipe.
+        template (Template, optional):
+            Template object to be used for the recipe.
+        system_prompt (SystemPrompt, optional):
+            SystemPrompt object to be used for the recipe.
+        loader_limit (int, optional):
+            Specifies the maximum number of instances per stream to be returned from the loader (used to reduce loading time in large datasets)
+        format (SystemFormat, optional):
+            SystemFormat object to be used for the recipe.
+        metrics (List[str]):
+            list of catalog metrics to use with this recipe.
+        postprocessors (List[str]):
+            list of catalog processors to apply at post processing. (Not recommended to use from here)
+        group_by (List[Union[str, List[str]]]):
+            list of task_data or metadata keys to group global scores by.
+        train_refiner (StreamRefiner, optional):
+            Train refiner to be used in the recipe.
+        max_train_instances (int, optional):
+            Maximum training instances for the refiner.
+        validation_refiner (StreamRefiner, optional):
+            Validation refiner to be used in the recipe.
+        max_validation_instances (int, optional):
+            Maximum validation instances for the refiner.
+        test_refiner (StreamRefiner, optional):
+            Test refiner to be used in the recipe.
+        max_test_instances (int, optional):
+            Maximum test instances for the refiner.
+        demos_pool_size (int, optional):
+            Size of the demos pool. -1 for taking the whole of stream 'demos_taken_from'.
+        demos_pool(List[Dict[str, Any]], optional):
+            a list of instances to make the demos_pool
+        num_demos (int, optional):
+            Number of demos to add to each instance, to become part of the source to be generated for this instance.
+        demos_taken_from (str, optional):
+            Specifies the stream from where the demos are taken. Default is "train".
+        demos_field (str, optional):
+            Field name for demos. Default is "demos".
+            The num_demos demos selected for an instance are stored in this field of that instance.
+        demos_pool_field_name (str, optional):
+            field name to maintain the demos_pool, until sampled from, in order to make the demos.
+            Defaults to constants.demos_pool_field.
+        demos_removed_from_data (bool, optional):
+            whether to remove the demos taken to demos_pool from the source data, Default is True
+        sampler (Sampler, optional):
+            The Sampler used to select the demonstrations when num_demos > 0.
+        skip_demoed_instances (bool, optional):
+            whether to skip pushing demos to an instance whose demos_field is
+            already populated. Defaults to False.
+        steps (List[StreamingOperator], optional):
+            List of StreamingOperator objects to be used in the recipe.
+        augmentor (Augmentor) :
+            Augmentor to be used to pseudo randomly augment the source text
+        instruction_card_index (int, optional):
+            Index of instruction card to be used for preparing the recipe.
+        template_card_index (int, optional):
+            Index of template card to be used for preparing the recipe.
+    Methods:
+        prepare():
+            This overridden method is used for preparing the recipe
+            by arranging all the steps, refiners, and renderers in a sequential manner.
+    Raises:
+        AssertionError:
+            If both template and template_card_index are specified at the same time.
+    """
     # Base parameters
     card: TaskCard = None
     task: Task = None
     test_refiner: StreamRefiner = OptionalField(default_factory=StreamRefiner)
     demos_pool_size: int = None
+    demos_pool: List[Dict[str, Any]] = None
     num_demos: Optional[Union[int, List[int]]] = 0
     demos_removed_from_data: bool = True
+    demos_pool_field_name: str = constants.demos_pool_field
     demos_taken_from: str = "train"
     demos_field: str = "demos"
     sampler: Sampler = None
+    # do not push demos to instances whose "demos" field is already populated
+    skip_demoed_instances: bool = False
     augmentor: Union[Augmentor, List[Augmentor]] = OptionalField(default=None)
     steps: List[StreamingOperator] = InternalField(default_factory=list)
                 raise ValueError(
                     "When using demonstrations both num_demos and demos_pool_size should be assigned with positive integers."
                 )
+            if self.demos_pool_size < self.max_demos_size + 1:
                 raise ValueError(
+                    f"num_demos (got: {self.max_demos_size}) should not exceed demos_pool_size - 1 (got: {self.demos_pool_size}), (-1: to always allow filtering of a demo identical to the processed instance)."
                 )
+            if (
+                (not self.demos_pool)
+                and (self.demos_pool_size != sys.maxsize)
+                and self.loader_limit
+                and (self.demos_pool_size > self.loader_limit)
+            ):
                 raise ValueError(
                     f"demos_pool_size should not exceed loader_limit ({self.loader_limit}), Got demos_pool_size={self.demos_pool_size}"
                 )
             self.loading,
             self.metadata,
             self.standardization,
         ]
         self.inference = SequentialOperator()
+        self.inference.steps = [self.processing, self.verbalization, self.finalize]
     def production_preprocess(self, task_instances):
         ms = MultiStream.from_iterables({constants.inference_stream: task_instances})
+        return list(self.metadata(ms)[constants.inference_stream])
     @property
     def has_custom_demos_pool(self):
+        return self.demos_pool_size is not None and (
+            self.demos_pool_size > 0 or self.demos_pool_size == -1
+        )
     @property
     def use_demos(self):
     def produce(self, task_instances):
         """Use the recipe in production to produce model ready query from standard task instance."""
         self.before_process_multi_stream()
+        ms = MultiStream.from_iterables({constants.inference_stream: task_instances})
+        # does not hurt to set metadata
+        # task_instances are assumed to be as if passed through self.standardization
+        ms = self.metadata(ms)
+        if not self.use_demos:
+            # go with task_instances all the way, it does not need other streams:
+            ms = self.inference(ms)
+            return list(ms[constants.inference_stream])
+        streams = self.inference_demos()
+        # streams stopped before processing
+        # ms is ready to join, it will get the demos from streams
+        streams[constants.inference_stream] = ms[constants.inference_stream]
+        # multi_stream = MultiStream(streams)
+        multi_stream = self.inference(streams)
         return list(multi_stream[constants.inference_stream])
     def reset(self):
                 augmentor.set_fields(self.card.task.augmentable_inputs)
                 self.processing.steps.append(augmentor)
+        # for backward compatibility, consume the demos instances even if not pushed into demos field of the ordinary instances,
+        # in order to use the very same ordinary instances as in back releases.
+        # one example of consume but not used, and indeed skips over a problematic (json-wise) input:
+        # prepare/cards/rag/end_to_end/clapnq.py
         if self.has_custom_demos_pool:
+            if self.demos_pool:
+                self.processing.steps.append(
+                    AddDemosPool(
+                        demos_pool=self.demos_pool,
+                        demos_pool_field_name=self.demos_pool_field_name,
+                    )
+                )
+            else:
+                self.processing.steps.append(
+                    CreateDemosPool(
+                        from_stream=self.demos_taken_from,
+                        demos_pool_size=self.demos_pool_size
+                        if self.demos_pool is None
+                        else None,
+                        demos_removed_from_data=self.demos_removed_from_data,
+                        to_field=self.demos_pool_field_name,
+                    )
                 )
         if self.use_demos:
             if self.sampler is None:
             if isinstance(self.num_demos, int):
                 self.verbalization.steps.append(
                     ConstantSizeSample(
+                        from_field=self.demos_pool_field_name,
                         to_field=self.demos_field,
                         sampler=self.sampler,
                         sample_size=self.num_demos,
+                        skip_demoed_instances=self.skip_demoed_instances,
                     )
                 )
                 self.verbalization.steps.append(
+                    Set(
+                        fields={
+                            "recipe_metadata/num_demos": self.num_demos,
+                            "recipe_metadata/demos_pool_size": self.demos_pool_size,
+                        }
+                    )
                 )
             elif isinstance(self.num_demos, list):
                 self.verbalization.steps.append(
                     RandomSizeSample(
+                        from_field=self.demos_pool_field_name,
                         to_field=self.demos_field,
                         sampler=self.sampler,
                         sample_sizes=self.num_demos,
+                        skip_demoed_instances=self.skip_demoed_instances,
                     )
                 )
                 self.verbalization.steps.append(
                     GetLength(field="demos", to_field="recipe_metadata/num_demos")
                 )
+                self.verbalization.steps.append(
+                    Set(
+                        fields={"recipe_metadata/demos_pool_size": self.demos_pool_size}
+                    )
+                )
             else:
                 raise ValueError("num_demos must be int or List[int]")
                         template=self.template, demos_field=self.demos_field
                     )
                 )
         else:
             self.verbalization.steps.append(
+                Set(
+                    fields={
+                        "recipe_metadata/num_demos": 0,
+                        "recipe_metadata/demos_pool_size": 0,
+                    }
+                )
             )
             if isinstance(self.template, list):
                 self.verbalization.steps.append(
         self.finalize.steps.append(FinalizeDataset(group_by=self.group_by))
     def prepare(self):
         assert (
             self.template_card_index is None or self.template is None
             raise ValueError(
                 "No template was specified in the the 'template' or 'template_card_index' recipe arguments, and no default templates are defined the card or task"
             )
+        if self.use_demos:
+            assert (
+                self.demos_pool is not None
+                and isoftype(self.demos_pool, List[Dict[str, Any]])
+            ) != (
+                self.demos_taken_from is not None
+                and self.demos_pool_size is not None
+                and self.demos_removed_from_data is not None
+            ), (
+                "The demos_pool must be specified by exactly one of two ways: explicitly, as a list of instances coming through parameter "
+                + "'demos_pool', or via parameters 'demos_taken_from', 'demos_pool_size', and 'demos_removed_from_data', "
+                + "that together direct its production."
+            )
+        # now set self.demos_pool_size for the checks done by verify
+        if self.demos_pool:
+            self.demos_pool_size = len(self.demos_pool)
+        if self.demos_pool_size is not None and self.demos_pool_size == -1:
+            self.demos_pool_size = sys.maxsize
+        if isinstance(self.template, TemplatesList):
+            self.template = self.template.items
+        self.reset_pipeline()
+@deprecation(version="2.0.0", alternative=DatasetRecipe)
+class BaseRecipe(DatasetRecipe):
+    pass
+@deprecation(version="2.0.0", alternative=DatasetRecipe)
+class StandardRecipeWithIndexes(DatasetRecipe):
+    pass
+@deprecation(version="2.0.0", alternative=DatasetRecipe)
+class StandardRecipe(DatasetRecipe):
     pass

struct_data_operators.py CHANGED Viewed

@@ -679,7 +679,7 @@ class LoadJson(FieldOperator):
             except json.JSONDecodeError:
                 return self.failure_value
         else:
-            return json.loads(value)
 class DumpJson(FieldOperator):

             except json.JSONDecodeError:
                 return self.failure_value
         else:
+            return json.loads(value, strict=False)
 class DumpJson(FieldOperator):

task.py CHANGED Viewed

@@ -40,25 +40,22 @@ def parse_string_types_instead_of_actual_objects(obj):
 class Task(InstanceOperator, ArtifactFetcherMixin):
     """Task packs the different instance fields into dictionaries by their roles in the task.
-    Attributes:
         input_fields (Union[Dict[str, str], List[str]]):
-        Dictionary with string names of instance input fields and types of respective values.
-        In case a list is passed, each type will be assumed to be Any.
         reference_fields (Union[Dict[str, str], List[str]]):
-        Dictionary with string names of instance output fields and types of respective values.
-        In case a list is passed, each type will be assumed to be Any.
-        metrics (List[str]): List of names of metrics to be used in the task.
         prediction_type (Optional[str]):
-        Need to be consistent with all used metrics. Defaults to None, which means that it will
-        be set to Any.
         defaults (Optional[Dict[str, Any]]):
-        An optional dictionary with default values for chosen input/output keys. Needs to be
-        consistent with names and types provided in 'input_fields' and/or 'output_fields' arguments.
-        Will not overwrite values if already provided in a given instance.
     The output instance contains three fields:
         1. "input_fields" whose value is a sub-dictionary of the input instance, consisting of all the fields listed in Arg 'input_fields'.
@@ -119,7 +116,7 @@ class Task(InstanceOperator, ArtifactFetcherMixin):
                 self.prediction_type
             )
-    def verify(self):
         if hasattr(self, "inputs") and self.inputs is not None:
             depr_message = (
                 "The 'inputs' field is deprecated. Please use 'input_fields' instead."
@@ -130,6 +127,9 @@ class Task(InstanceOperator, ArtifactFetcherMixin):
             depr_message = "The 'outputs' field is deprecated. Please use 'reference_fields' instead."
             warnings.warn(depr_message, DeprecationWarning, stacklevel=2)
         if self.input_fields is None:
             raise UnitxtError(
                 "Missing attribute in task: 'input_fields' not set.",
@@ -155,7 +155,11 @@ class Task(InstanceOperator, ArtifactFetcherMixin):
                     f"will raise an exception.",
                     Documentation.ADDING_TASK,
                 )
-                data = {key: Any for key in data}
                 if io_type == "input_fields":
                     self.input_fields = data
                 else:
@@ -290,6 +294,9 @@ class Task(InstanceOperator, ArtifactFetcherMixin):
             "media": instance.get("media", {}),
             "recipe_metadata": instance.get("recipe_metadata", {}),
         }
         if stream_name == constants.inference_stream:
             return result

 class Task(InstanceOperator, ArtifactFetcherMixin):
     """Task packs the different instance fields into dictionaries by their roles in the task.
+    Args:
         input_fields (Union[Dict[str, str], List[str]]):
+            Dictionary with string names of instance input fields and types of respective values.
+            In case a list is passed, each type will be assumed to be Any.
         reference_fields (Union[Dict[str, str], List[str]]):
+            Dictionary with string names of instance output fields and types of respective values.
+            In case a list is passed, each type will be assumed to be Any.
+        metrics (List[str]):
+            List of names of metrics to be used in the task.
         prediction_type (Optional[str]):
+            Need to be consistent with all used metrics. Defaults to None, which means that it will
+            be set to Any.
         defaults (Optional[Dict[str, Any]]):
+            An optional dictionary with default values for chosen input/output keys. Needs to be
+            consistent with names and types provided in 'input_fields' and/or 'output_fields' arguments.
+            Will not overwrite values if already provided in a given instance.
     The output instance contains three fields:
         1. "input_fields" whose value is a sub-dictionary of the input instance, consisting of all the fields listed in Arg 'input_fields'.
                 self.prediction_type
             )
+    def task_deprecations(self):
         if hasattr(self, "inputs") and self.inputs is not None:
             depr_message = (
                 "The 'inputs' field is deprecated. Please use 'input_fields' instead."
             depr_message = "The 'outputs' field is deprecated. Please use 'reference_fields' instead."
             warnings.warn(depr_message, DeprecationWarning, stacklevel=2)
+    def verify(self):
+        self.task_deprecations()
         if self.input_fields is None:
             raise UnitxtError(
                 "Missing attribute in task: 'input_fields' not set.",
                     f"will raise an exception.",
                     Documentation.ADDING_TASK,
                 )
+                if isinstance(data, dict):
+                    data = parse_type_dict(to_type_dict(data))
+                else:
+                    data = {key: Any for key in data}
                 if io_type == "input_fields":
                     self.input_fields = data
                 else:
             "media": instance.get("media", {}),
             "recipe_metadata": instance.get("recipe_metadata", {}),
         }
+        if "demos" in instance:
+            # for the case of recipe.skip_demoed_instances
+            result["demos"] = instance["demos"]
         if stream_name == constants.inference_stream:
             return result

templates.py CHANGED Viewed

@@ -307,26 +307,27 @@ class PairwiseChoiceTemplate(InputOutputTemplate):
      The answer field value should be of type Literal["choice_a", "choice_b", "tie"]
     Args:
-         choice_a_field (str): The field which contains choice_a value
-         choice_b_field (str): The field which contains choice_b value
-         answer_field (str): The field which contains the answer value.
-         Should be of type Literal["choice_1", "choice_2", "tie"]
-         choice_a_label (str): The label of choice A answer as it is verbalized in the template.
-         choice_b_label (str): The label of choice B answer as it is verbalized in the template.
-         choice_tie_label (str): The label of a tie answer as it should be verbalized in the template.
-         shuffle (bool): whether to shuffle the choices or not. This is done to take into account position bias.
     shuffle: 50% of the time:
      1. The values of choice_a_field and choice_b_field will be swapped.
      2. If the values of answer_field is choice_a_label, set it to choice_b_label.
-        | Else if the values of answer_field is choice_b_label, set it to choice_a_label.
-        | Else if the value of answer_field is choice_tie_label, do nothing.
     """
@@ -636,21 +637,22 @@ class MultipleChoiceTemplate(InputFormatTemplate):
 class YesNoTemplate(InputFormatTemplate):
     """A template for generating binary Yes/No questions asking whether an input text is of a specific class.
-    input_format:
-        Defines the format of the question.
-    class_field:
-        Defines the field that contains the name of the class that this template
-        asks of.
-    label_field:
-        Defines the field which contains the true label of the input text. If a gold label is equal to the
-        value in class_name, then the correct output is self.yes_answer (by default, "Yes").
-        Otherwise the correct output is self.no_answer (by default, "No").
-    yes_answer:
-        The output value for when the gold label equals self.class_name.
-        Defaults to "Yes".
-    no_answer:
-        The output value for when the gold label differs from self.class_name.
-        Defaults to "No".
     """
     input_format: str = None

      The answer field value should be of type Literal["choice_a", "choice_b", "tie"]
     Args:
+         choice_a_field (str):
+            The field which contains choice_a value
+         choice_b_field (str):
+            The field which contains choice_b value
+         answer_field (str):
+            The field which contains the answer value.
+            Should be of type Literal["choice_1", "choice_2", "tie"]
+         choice_a_label (str):
+            The label of choice A answer as it is verbalized in the template.
+         choice_b_label (str):
+            The label of choice B answer as it is verbalized in the template.
+         choice_tie_label (str):
+            The label of a tie answer as it should be verbalized in the template.
+         shuffle (bool):
+            whether to shuffle the choices or not. This is done to take into account position bias.
     shuffle: 50% of the time:
      1. The values of choice_a_field and choice_b_field will be swapped.
      2. If the values of answer_field is choice_a_label, set it to choice_b_label.
+        Else if the values of answer_field is choice_b_label, set it to choice_a_label.
+        Else if the value of answer_field is choice_tie_label, do nothing.
     """
 class YesNoTemplate(InputFormatTemplate):
     """A template for generating binary Yes/No questions asking whether an input text is of a specific class.
+    Args:
+        input_format:
+            Defines the format of the question.
+        class_field:
+            Defines the field that contains the name of the class that this template
+            asks of.
+        label_field:
+            Defines the field which contains the true label of the input text. If a gold label is equal to the
+            value in class_name, then the correct output is self.yes_answer (by default, "Yes").
+            Otherwise the correct output is self.no_answer (by default, "No").
+        yes_answer:
+            The output value for when the gold label equals self.class_name.
+            Defaults to "Yes".
+        no_answer:
+            The output value for when the gold label differs from self.class_name.
+            Defaults to "No".
     """
     input_format: str = None

type_utils.py CHANGED Viewed

@@ -552,6 +552,9 @@ def strtype(typing_type) -> str:
         - The function checks the `__origin__` attribute to determine the base type and formats
           the type arguments accordingly.
     """
     if not is_type(typing_type):
         raise UnsupportedTypeError(typing_type)

         - The function checks the `__origin__` attribute to determine the base type and formats
           the type arguments accordingly.
     """
+    if isinstance(typing_type, str):
+        return typing_type
     if not is_type(typing_type):
         raise UnsupportedTypeError(typing_type)

version.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = "1.16.0"


1	+ version = "1.16.1"