Spaces:
Configuration error

englert commited on
Commit
ba93a7e
·
1 Parent(s): e839e64

update app.py #3

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. fastdist2.py +1 -290
  3. requirements.txt +1 -0
app.py CHANGED
@@ -90,4 +90,4 @@ demo = gr.Interface(
90
  gr.components.Number(label="Downsample size")],
91
  outputs=gr.components.File(label="Zip"))
92
 
93
- demo.launch()
 
90
  gr.components.Number(label="Downsample size")],
91
  outputs=gr.components.File(label="Zip"))
92
 
93
+ demo.launch(debug = True)
fastdist2.py CHANGED
@@ -1,144 +1,9 @@
1
- import math
2
-
3
  import numpy as np
4
- from numba import jit, prange, cuda, float32
5
 
6
 
7
  # https://github.com/talboger/fastdist
8
 
9
- @jit(nopython=True, fastmath=True)
10
- def cosine(u, v, w=None):
11
- """
12
- :purpose:
13
- Computes the cosine similarity between two 1D arrays
14
- Unlike scipy's cosine distance, this returns similarity, which is 1 - distance
15
-
16
- :params:
17
- u, v : input arrays, both of shape (n,)
18
- w : weights at each index of u and v. array of shape (n,)
19
- if no w is set, it is initialized as an array of ones
20
- such that it will have no impact on the output
21
-
22
- :returns:
23
- cosine : float, the cosine similarity between u and v
24
-
25
- :example:
26
- >>> from fastdist import fastdist
27
- >>> import numpy as np
28
- >>> u, v, w = np.random.RandomState(seed=0).rand(10000, 3).T
29
- >>> fastdist.cosine(u, v, w)
30
- 0.7495065944399267
31
- """
32
- n = len(u)
33
- num = 0
34
- u_norm, v_norm = 0, 0
35
- for i in range(n):
36
- num += u[i] * v[i] * w[i]
37
- u_norm += abs(u[i]) ** 2 * w[i]
38
- v_norm += abs(v[i]) ** 2 * w[i]
39
-
40
- denom = (u_norm * v_norm) ** (1 / 2)
41
- return num / denom
42
-
43
-
44
- @jit(nopython=True, fastmath=True)
45
- def cosine_vector_to_matrix(u, m):
46
- """
47
- :purpose:
48
- Computes the cosine similarity between a 1D array and rows of a matrix
49
-
50
- :params:
51
- u : input vector of shape (n,)
52
- m : input matrix of shape (m, n)
53
-
54
- :returns:
55
- cosine vector : np.array, of shape (m,) vector containing cosine similarity between u
56
- and the rows of m
57
-
58
- :example:
59
- >>> from fastdist import fastdist
60
- >>> import numpy as np
61
- >>> u = np.random.RandomState(seed=0).rand(10)
62
- >>> m = np.random.RandomState(seed=0).rand(100, 10)
63
- >>> fastdist.cosine_vector_to_matrix(u, m)
64
- (returns an array of shape (100,))
65
- """
66
- norm = 0
67
- for i in range(len(u)):
68
- norm += abs(u[i]) ** 2
69
- u = u / norm ** (1 / 2)
70
- for i in range(m.shape[0]):
71
- norm = 0
72
- for j in range(len(m[i])):
73
- norm += abs(m[i][j]) ** 2
74
- m[i] = m[i] / norm ** (1 / 2)
75
- return np.dot(u, m.T)
76
-
77
-
78
- @jit(nopython=True, fastmath=True)
79
- def cosine_matrix_to_matrix(a, b):
80
- """
81
- :purpose:
82
- Computes the cosine similarity between the rows of two matrices
83
-
84
- :params:
85
- a, b : input matrices of shape (m, n) and (k, n)
86
- the matrices must share a common dimension at index 1
87
-
88
- :returns:
89
- cosine matrix : np.array, an (m, k) array of the cosine similarity
90
- between the rows of a and b
91
-
92
- :example:
93
- >>> from fastdist import fastdist
94
- >>> import numpy as np
95
- >>> a = np.random.RandomState(seed=0).rand(10, 50)
96
- >>> b = np.random.RandomState(seed=0).rand(100, 50)
97
- >>> fastdist.cosine_matrix_to_matrix(a, b)
98
- (returns an array of shape (10, 100))
99
- """
100
- for i in range(a.shape[0]):
101
- norm = 0
102
- for j in range(len(a[i])):
103
- norm += abs(a[i][j]) ** 2
104
- a[i] = a[i] / norm ** (1 / 2)
105
- for i in range(b.shape[0]):
106
- norm = 0
107
- for j in range(len(b[i])):
108
- norm += abs(b[i][j]) ** 2
109
- b[i] = b[i] / norm ** (1 / 2)
110
- return np.dot(a, b.T)
111
-
112
-
113
- @jit(nopython=True, fastmath=True)
114
- def euclidean(u, v):
115
- """
116
- :purpose:
117
- Computes the Euclidean distance between two 1D arrays
118
-
119
- :params:
120
- u, v : input arrays, both of shape (n,)
121
- w : weights at each index of u and v. array of shape (n,)
122
- if no w is set, it is initialized as an array of ones
123
- such that it will have no impact on the output
124
-
125
- :returns:
126
- euclidean : float, the Euclidean distance between u and v
127
-
128
- :example:
129
- >>> from fastdist import fastdist
130
- >>> import numpy as np
131
- >>> u, v, w = np.random.RandomState(seed=0).rand(10000, 3).T
132
- >>> fastdist.euclidean(u, v, w)
133
- 28.822558591834163
134
- """
135
- n = len(u)
136
- dist = 0
137
- for i in range(n):
138
- dist += abs(u[i] - v[i]) ** 2
139
- return dist ** (1 / 2)
140
-
141
-
142
  @jit(nopython=True, fastmath=True)
143
  def euclidean_vector_to_matrix_distance(u, m):
144
  """
@@ -176,157 +41,3 @@ def euclidean_vector_to_matrix_distance(u, m):
176
  out[i] = dist ** (1 / 2)
177
 
178
  return out
179
-
180
-
181
- @cuda.jit
182
- def gpu_kernel_euclidean_vector_to_matrix_distance(u, m, u_dim0, m_dim0, out):
183
- # Thread id in a 1D block
184
- tx = cuda.threadIdx.x
185
- # Block id in a 1D grid
186
- ty = cuda.blockIdx.x
187
- # Block width, i.e. number of threads per block
188
- bw = cuda.blockDim.x
189
- # Compute flattened index inside the array
190
- pos = tx + ty * bw
191
- if pos < m_dim0: # Check array boundaries
192
- dist = 0
193
- for l in range(u_dim0):
194
- d = abs(u[l] - m[pos][l])
195
- dist += d * d
196
- out[pos] = dist ** (1 / 2)
197
-
198
-
199
- def euclidean_vector_to_matrix_distance_gpu(u, m):
200
- m_dim0 = m.shape[0]
201
- u_dim0 = u.shape[0]
202
- out = np.zeros((m_dim0), dtype=np.float32)
203
-
204
- threadsperblock = 16
205
- blockspergrid = (m_dim0 + (threadsperblock - 1)) // threadsperblock
206
- gpu_kernel_euclidean_vector_to_matrix_distance[blockspergrid, threadsperblock](u, m, u_dim0, m_dim0, out)
207
-
208
- return out
209
-
210
-
211
- # https://numba.readthedocs.io/en/stable/cuda/examples.html
212
- @cuda.jit
213
- def gpu_kernel_euclidean_matrix_to_matrix_distance_fast(A, B, C):
214
- TPB = 16
215
-
216
- # Define an array in the shared memory
217
- # The size and type of the arrays must be known at compile time
218
- sA = cuda.shared.array(shape=(TPB, TPB), dtype=float32)
219
-
220
- sB = cuda.shared.array(shape=(TPB, TPB), dtype=float32)
221
-
222
- x, y = cuda.grid(2)
223
-
224
- tx = cuda.threadIdx.x
225
-
226
- ty = cuda.threadIdx.y
227
-
228
- bpg = cuda.gridDim.x # blocks per grid
229
-
230
- # Each thread computes one element in the result matrix.
231
-
232
- # The dot product is chunked into dot products of TPB-long vectors.
233
-
234
- tmp = float32(0.)
235
-
236
- for i in range(bpg):
237
-
238
- # Preload data into shared memory
239
-
240
- sA[ty, tx] = 0
241
-
242
- sB[ty, tx] = 0
243
-
244
- if y < A.shape[0] and (tx + i * TPB) < A.shape[1]:
245
- sA[ty, tx] = A[y, tx + i * TPB]
246
-
247
- if x < B.shape[1] and (ty + i * TPB) < B.shape[0]:
248
- sB[ty, tx] = B[ty + i * TPB, x]
249
-
250
- # Wait until all threads finish preloading
251
-
252
- cuda.syncthreads()
253
-
254
- # Computes partial product on the shared memory
255
-
256
- for j in range(TPB):
257
- d = abs(sA[ty, j] - sB[j, tx])
258
- tmp += d * d
259
- # Wait until all threads finish computing
260
-
261
- cuda.syncthreads()
262
-
263
- if y < C.shape[0] and x < C.shape[1]:
264
- C[y, x] = tmp ** (1 / 2)
265
-
266
-
267
- def euclidean_matrix_to_matrix_distance_gpu_fast(u, m):
268
- u_dim0 = u.shape[0]
269
- m_dim1 = m.shape[1]
270
-
271
- # vec_dim = u.shape[1]
272
- # assert vec_dim == m.shape[1]
273
- out = np.zeros((u_dim0, m_dim1), dtype=np.float32)
274
-
275
- threadsperblock = (16, 16)
276
- grid_y_max = max(u.shape[0], m.shape[0])
277
- grid_x_max = max(u.shape[1], m.shape[1])
278
- blockspergrid_x = math.ceil(grid_x_max / threadsperblock[0])
279
- blockspergrid_y = math.ceil(grid_y_max / threadsperblock[1])
280
-
281
- blockspergrid = (blockspergrid_x, blockspergrid_y)
282
-
283
- u_d = cuda.to_device(u)
284
- m_d = cuda.to_device(m)
285
- out_d = cuda.to_device(out)
286
-
287
- gpu_kernel_euclidean_matrix_to_matrix_distance_fast[blockspergrid, threadsperblock](u_d, m_d, out_d)
288
- out = out_d.copy_to_host()
289
- return out
290
-
291
-
292
- @jit(cache=True, nopython=True, parallel=True, fastmath=True, boundscheck=False, nogil=True)
293
- def euclidean_matrix_to_matrix_distance(a, b):
294
- """
295
- :purpose:
296
- Computes the distance between the rows of two matrices using any given metric
297
-
298
- :params:
299
- a, b : input matrices either of shape (m, n) and (k, n)
300
- the matrices must share a common dimension at index 1
301
- metric : the function used to calculate the distance
302
- metric_name : str of the function name. this is only used for
303
- the if statement because cosine similarity has its
304
- own function
305
-
306
- :returns:
307
- distance matrix : np.array, an (m, k) array of the distance
308
- between the rows of a and b
309
-
310
- :example:
311
- >>> from fastdist import fastdist
312
- >>> import numpy as np
313
- >>> a = np.random.RandomState(seed=0).rand(10, 50)
314
- >>> b = np.random.RandomState(seed=0).rand(100, 50)
315
- >>> fastdist.matrix_to_matrix_distance(a, b, fastdist.cosine, "cosine")
316
- (returns an array of shape (10, 100))
317
-
318
- :note:
319
- the cosine similarity uses its own function, cosine_matrix_to_matrix.
320
- this is because normalizing the rows and then taking the dot product
321
- of the two matrices heavily optimizes the computation. the other similarity
322
- metrics do not have such an optimization, so we loop through them
323
- """
324
- n, m = a.shape[0], b.shape[0]
325
- out = np.zeros((n, m), dtype=np.float32)
326
- for i in prange(n):
327
- for j in range(m):
328
- dist = 0
329
- for l in range(len(a[i])):
330
- dist += abs(a[i][l] - b[j][l]) ** 2
331
- out[i][j] = dist ** (1 / 2)
332
- return out
 
 
 
1
  import numpy as np
2
+ from numba import jit, prange
3
 
4
 
5
  # https://github.com/talboger/fastdist
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  @jit(nopython=True, fastmath=True)
8
  def euclidean_vector_to_matrix_distance(u, m):
9
  """
 
41
  out[i] = dist ** (1 / 2)
42
 
43
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -5,3 +5,4 @@ numpy
5
  opencv-python
6
  umap-learn
7
  numba
 
 
5
  opencv-python
6
  umap-learn
7
  numba
8
+ gradio