marta-marta commited on
Commit
73d009a
·
1 Parent(s): 0d09ea1

Adding type hints to the functions in these scripts

Browse files
Files changed (1) hide show
  1. Data_Plotting/Plot_TSNE.py +20 -13
Data_Plotting/Plot_TSNE.py CHANGED
@@ -2,28 +2,35 @@ from sklearn.manifold import TSNE
2
  import matplotlib.pyplot as plt
3
  import numpy as np
4
 
 
5
  # Latent Feature Cluster for Training Data using T-SNE
6
- def TSNE_reduction(latent_points: list, perplexity=30, learning_rate=20):
7
- latent_dimensionality = len(latent_points[0])
 
 
 
 
 
 
 
 
 
 
8
  model = TSNE(n_components=2, random_state=0, perplexity=perplexity,
9
- learning_rate=learning_rate) # Perplexity(5-50) | learning_rate(10-1000)
10
- embedding = model
11
- # configuring the parameters
12
  # the number of components = dimension of the embedded space
13
- # default perplexity = 30 " Perplexity balances the attention t-SNE gives to local and global aspects of the data.
14
- # It is roughly a guess of the number of close neighbors each point has. ..a denser dataset ... requires higher perplexity value"
15
- # default learning rate = 200 "If the learning rate is too high, the data may look like a ‘ball’ with any point
16
- # approximately equidistant from its nearest neighbours. If the learning rate is too low,
17
- # most points may look compressed in a dense cloud with few outliers."
18
- tsne_data = model.fit_transform(
19
- latent_points) # When there are more data points, trainX should be the first couple hundred points so TSNE doesn't take too long
20
  x = tsne_data[:, 0]
21
  y = tsne_data[:, 1]
22
  title = ("T-SNE of Data")
23
  return x, y, title, embedding
24
 
25
 
26
- def plot_dimensionality_reduction(x, y, label_set, title):
27
  plt.title(title)
28
  # Color points based on their density
29
  if label_set[0].dtype == float:
 
2
  import matplotlib.pyplot as plt
3
  import numpy as np
4
 
5
+
6
  # Latent Feature Cluster for Training Data using T-SNE
7
+ def TSNE_reduction(latent_points: np.ndarray, perplexity=30, learning_rate=20):
8
+ """
9
+ :param latent_points: [ndarray] - an array of arrays that define the points of an object in the latent space
10
+ :param perplexity: [int] - default perplexity = 30 " Perplexity balances the attention t-SNE gives to local and
11
+ global aspects of the data. It is roughly a guess of the number of close neighbors each point has...
12
+ a denser dataset ... requires higher perplexity value" Recommended: Perplexity(5-50)
13
+ :param learning_rate: [int] - default learning rate = 200 "If the learning rate is too high, the data may look
14
+ like a ‘ball’ with any point approximately equidistant from its nearest neighbours.
15
+ If the learning rate is too low, most points may look compressed in a dense cloud with few outliers."
16
+ Recommended: learning_rate(10-1000)
17
+ :return: [tuple] - the output is the x and y coordinates for the reduced latent space, a title, and an embedding
18
+ """
19
  model = TSNE(n_components=2, random_state=0, perplexity=perplexity,
20
+ learning_rate=learning_rate)
 
 
21
  # the number of components = dimension of the embedded space
22
+
23
+ embedding = model
24
+
25
+ tsne_data = model.fit_transform(latent_points)
26
+ # When there are more data points, only use a couple of hundred points so TSNE doesn't take too long
 
 
27
  x = tsne_data[:, 0]
28
  y = tsne_data[:, 1]
29
  title = ("T-SNE of Data")
30
  return x, y, title, embedding
31
 
32
 
33
+ def plot_dimensionality_reduction(x: list, y: list, label_set: list, title: str):
34
  plt.title(title)
35
  # Color points based on their density
36
  if label_set[0].dtype == float: