Spaces:

eagle0504
/

YSA-Larkin-Comm

App Files Files Community

eagle0504 commited on Mar 22

Commit

651347a

•

1 Parent(s): 65ce9a0

Update utils/helper_functions.py

Browse files

Files changed (1) hide show

utils/helper_functions.py +52 -64

utils/helper_functions.py CHANGED Viewed

@@ -238,76 +238,64 @@ def llama2_7b_ysa(prompt: str) -> str:
     return response
-def quantize_to_4bit(arr: Union[np.ndarray, Any]) -> np.ndarray:
-    """
-    Converts an array to a 4-bit representation by normalizing and scaling its values.
-    The function first checks if the input is an instance of numpy ndarray,
-    if not, it converts the input into a numpy ndarray. Then, it normalizes
-    the values of the array to be between 0 and 1. Finally, it scales these
-    normalized values to the range of 0-15, corresponding to 4-bit integers,
-    and returns this array of integers.
-    Parameters:
-        arr (Union[np.ndarray, Any]): An array or any type that can be converted to a numpy ndarray.
     Returns:
-        np.ndarray: A numpy ndarray containing the input data quantized to 4-bit representation.
-    Examples:
-        >>> quantize_to_4bit([0, 128, 255])
-        array([ 0,  7, 15])
     """
-    if not isinstance(arr, np.ndarray):  # Check if the input is a numpy array
-        arr = np.array(arr)  # Convert to numpy array if not already
-    arr_min = arr.min()  # Find minimum value in the array
-    arr_max = arr.max()  # Find maximum value in the array
-    # Normalize array values to a [0, 1] range
-    normalized_arr = (arr - arr_min) / (arr_max - arr_min)
-    # Scale normalized values to a 0-15 range (4-bit) and convert to integer
-    return np.round(normalized_arr * 15).astype(int)
-def quantized_influence(arr1: np.ndarray, arr2: np.ndarray) -> float:
     """
-    Calculates a weighted measure of influence between two arrays based on their quantized (4-bit) versions.
-    This function first quantizes both input arrays to 4-bit representations and then calculates a weighting based
-    on the unique values of the first array's quantized version. It uses these weights to compute local averages
-    within the second array's quantized version, assessing the influence of the first array on the second.
-    The influence is normalized by the standard deviation of the second array's quantized version.
-    Parameters:
-        arr1 (np.ndarray): The first input numpy array.
-        arr2 (np.ndarray): The second input numpy array.
     Returns:
-        float: The calculated influence value, representing a weighted average that has been normalized.
-    Note:
-        Both inputs must be numpy ndarrays and it's expected that a function named `quantize_to_4bit`
-        exists for converting an array to its 4-bit representation.
     """
-    arr1_4bit = quantize_to_4bit(arr1)  # Quantize the first array to 4-bit
-    arr2_4bit = quantize_to_4bit(arr2)  # Quantize the second array to 4-bit
-    unique_values = np.unique(
-        arr1_4bit
-    )  # Get the unique 4-bit values from the first array
-    y_bar_global = np.mean(
-        arr2_4bit
-    )  # Calculate the global mean of the second array's 4-bit version
-    # Compute the sum of squares of the differences between local and global means,
-    # each weighted by the square of the count of values in the local mean
-    weighted_local_averages = [
-        (np.mean((arr2_4bit[arr1_4bit == val]) - y_bar_global) ** 2)
-        * len(arr2_4bit[arr1_4bit == val]) ** 2
-        for val in unique_values
-    ]
-    # Return normalized weighted mean by dividing by the standard deviation of the second array's 4-bit version
-    return np.mean(weighted_local_averages) / np.std(arr2_4bit)

     return response
+def quantize_to_kbit(arr: Union[np.ndarray, Any], k: int = 16) -> np.ndarray:
+    """Converts an array to a k-bit representation by normalizing and scaling its values.
+    Args:
+        arr (Union[np.ndarray, Any]): The input array to be quantized.
+        k (int): The number of levels to quantize to. Defaults to 16 for 4-bit quantization.
     Returns:
+        np.ndarray: The quantized array with values scaled to 0 to k-1.
     """
+    if not isinstance(arr, np.ndarray):  # Check if input is not a numpy array
+        arr = np.array(arr)  # Convert input to a numpy array
+    arr_min = arr.min()  # Calculate the minimum value in the array
+    arr_max = arr.max()  # Calculate the maximum value in the array
+    normalized_arr = (arr - arr_min) / (arr_max - arr_min)  # Normalize array values to [0, 1]
+    return np.round(normalized_arr * (k - 1)).astype(int)  # Scale normalized values to 0-(k-1) and convert to integer
+def quantized_influence(arr1: np.ndarray, arr2: np.ndarray, k: int = 16, use_dagger: bool = False) -> Tuple[float, List[float]]:
     """
+    Calculates a weighted measure of influence based on quantized version of input arrays and optionally applies a transformation.
+    Args:
+        arr1 (np.ndarray): First input array to be quantized and analyzed.
+        arr2 (np.ndarray): Second input array to be quantized and used for influence measurement.
+        k (int): The quantization level, defaults to 16 for 4-bit quantization.
+        use_dagger (bool): Flag to apply a transformation based on local averages, defaults to False.
     Returns:
+        Tuple[float, List[float]]: A tuple containing the quantized influence measure and an optional list of transformed values based on local estimates.
     """
+    # Quantize both arrays to k levels
+    arr1_quantized = quantize_to_kbit(arr1, k)
+    arr2_quantized = quantize_to_kbit(arr2, k)
+    # Find unique quantized values in arr1
+    unique_values = np.unique(arr1_quantized)
+    # Compute the global average of quantized arr2
+    y_bar_global = np.mean(arr2_quantized)
+    # Compute weighted local averages and normalize
+    weighted_local_averages = [(np.mean(arr2_quantized[arr1_quantized == val]) - y_bar_global)**2 * len(arr2_quantized[arr1_quantized == val])**2 for val in unique_values]
+    qim = np.mean(weighted_local_averages) / np.std(arr2_quantized)  # Calculate the quantized influence measure
+    if use_dagger:
+        # If use_dagger is True, compute local estimates and map them to unique quantized values
+        local_estimates = [np.mean(arr2_quantized[arr1_quantized == val]) for val in unique_values]
+        daggers = {unique_values[i]: v for i, v in enumerate(local_estimates)}  # Map unique values to local estimates
+        def find_val_(i: int) -> float:
+            """Helper function to map quantized values to their local estimates."""
+            return daggers[i]
+        # Apply transformation based on local estimates
+        daggered_values = list(map(find_val_, arr1_quantized))
+    else:
+        # If use_dagger is False, return the original quantized arr1 values
+        daggered_values = arr1_quantized.tolist()
+    return qim, daggered_values