Update utils/helper_functions.py
Browse files- utils/helper_functions.py +52 -64
utils/helper_functions.py
CHANGED
@@ -238,76 +238,64 @@ def llama2_7b_ysa(prompt: str) -> str:
|
|
238 |
return response
|
239 |
|
240 |
|
241 |
-
def
|
242 |
-
"""
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
the values of the array to be between 0 and 1. Finally, it scales these
|
248 |
-
normalized values to the range of 0-15, corresponding to 4-bit integers,
|
249 |
-
and returns this array of integers.
|
250 |
-
|
251 |
-
Parameters:
|
252 |
-
arr (Union[np.ndarray, Any]): An array or any type that can be converted to a numpy ndarray.
|
253 |
|
254 |
Returns:
|
255 |
-
np.ndarray:
|
256 |
-
|
257 |
-
Examples:
|
258 |
-
>>> quantize_to_4bit([0, 128, 255])
|
259 |
-
array([ 0, 7, 15])
|
260 |
"""
|
261 |
-
if not isinstance(arr, np.ndarray): # Check if
|
262 |
-
arr = np.array(arr) # Convert to numpy array
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
# Normalize array values to a [0, 1] range
|
268 |
-
normalized_arr = (arr - arr_min) / (arr_max - arr_min)
|
269 |
|
270 |
-
# Scale normalized values to a 0-15 range (4-bit) and convert to integer
|
271 |
-
return np.round(normalized_arr * 15).astype(int)
|
272 |
|
273 |
-
|
274 |
-
def quantized_influence(arr1: np.ndarray, arr2: np.ndarray) -> float:
|
275 |
"""
|
276 |
-
Calculates a weighted measure of influence
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
Parameters:
|
284 |
-
arr1 (np.ndarray): The first input numpy array.
|
285 |
-
arr2 (np.ndarray): The second input numpy array.
|
286 |
|
287 |
Returns:
|
288 |
-
float:
|
289 |
-
|
290 |
-
Note:
|
291 |
-
Both inputs must be numpy ndarrays and it's expected that a function named `quantize_to_4bit`
|
292 |
-
exists for converting an array to its 4-bit representation.
|
293 |
"""
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
# Compute
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
return response
|
239 |
|
240 |
|
241 |
+
def quantize_to_kbit(arr: Union[np.ndarray, Any], k: int = 16) -> np.ndarray:
|
242 |
+
"""Converts an array to a k-bit representation by normalizing and scaling its values.
|
243 |
+
|
244 |
+
Args:
|
245 |
+
arr (Union[np.ndarray, Any]): The input array to be quantized.
|
246 |
+
k (int): The number of levels to quantize to. Defaults to 16 for 4-bit quantization.
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
|
248 |
Returns:
|
249 |
+
np.ndarray: The quantized array with values scaled to 0 to k-1.
|
|
|
|
|
|
|
|
|
250 |
"""
|
251 |
+
if not isinstance(arr, np.ndarray): # Check if input is not a numpy array
|
252 |
+
arr = np.array(arr) # Convert input to a numpy array
|
253 |
+
arr_min = arr.min() # Calculate the minimum value in the array
|
254 |
+
arr_max = arr.max() # Calculate the maximum value in the array
|
255 |
+
normalized_arr = (arr - arr_min) / (arr_max - arr_min) # Normalize array values to [0, 1]
|
256 |
+
return np.round(normalized_arr * (k - 1)).astype(int) # Scale normalized values to 0-(k-1) and convert to integer
|
|
|
|
|
257 |
|
|
|
|
|
258 |
|
259 |
+
def quantized_influence(arr1: np.ndarray, arr2: np.ndarray, k: int = 16, use_dagger: bool = False) -> Tuple[float, List[float]]:
|
|
|
260 |
"""
|
261 |
+
Calculates a weighted measure of influence based on quantized version of input arrays and optionally applies a transformation.
|
262 |
+
|
263 |
+
Args:
|
264 |
+
arr1 (np.ndarray): First input array to be quantized and analyzed.
|
265 |
+
arr2 (np.ndarray): Second input array to be quantized and used for influence measurement.
|
266 |
+
k (int): The quantization level, defaults to 16 for 4-bit quantization.
|
267 |
+
use_dagger (bool): Flag to apply a transformation based on local averages, defaults to False.
|
|
|
|
|
|
|
268 |
|
269 |
Returns:
|
270 |
+
Tuple[float, List[float]]: A tuple containing the quantized influence measure and an optional list of transformed values based on local estimates.
|
|
|
|
|
|
|
|
|
271 |
"""
|
272 |
+
# Quantize both arrays to k levels
|
273 |
+
arr1_quantized = quantize_to_kbit(arr1, k)
|
274 |
+
arr2_quantized = quantize_to_kbit(arr2, k)
|
275 |
+
|
276 |
+
# Find unique quantized values in arr1
|
277 |
+
unique_values = np.unique(arr1_quantized)
|
278 |
+
|
279 |
+
# Compute the global average of quantized arr2
|
280 |
+
y_bar_global = np.mean(arr2_quantized)
|
281 |
+
|
282 |
+
# Compute weighted local averages and normalize
|
283 |
+
weighted_local_averages = [(np.mean(arr2_quantized[arr1_quantized == val]) - y_bar_global)**2 * len(arr2_quantized[arr1_quantized == val])**2 for val in unique_values]
|
284 |
+
qim = np.mean(weighted_local_averages) / np.std(arr2_quantized) # Calculate the quantized influence measure
|
285 |
+
|
286 |
+
if use_dagger:
|
287 |
+
# If use_dagger is True, compute local estimates and map them to unique quantized values
|
288 |
+
local_estimates = [np.mean(arr2_quantized[arr1_quantized == val]) for val in unique_values]
|
289 |
+
daggers = {unique_values[i]: v for i, v in enumerate(local_estimates)} # Map unique values to local estimates
|
290 |
+
|
291 |
+
def find_val_(i: int) -> float:
|
292 |
+
"""Helper function to map quantized values to their local estimates."""
|
293 |
+
return daggers[i]
|
294 |
+
|
295 |
+
# Apply transformation based on local estimates
|
296 |
+
daggered_values = list(map(find_val_, arr1_quantized))
|
297 |
+
else:
|
298 |
+
# If use_dagger is False, return the original quantized arr1 values
|
299 |
+
daggered_values = arr1_quantized.tolist()
|
300 |
+
|
301 |
+
return qim, daggered_values
|