Commit
·
73d009a
1
Parent(s):
0d09ea1
Adding type hints to the functions in these scripts
Browse files- Data_Plotting/Plot_TSNE.py +20 -13
Data_Plotting/Plot_TSNE.py
CHANGED
@@ -2,28 +2,35 @@ from sklearn.manifold import TSNE
|
|
2 |
import matplotlib.pyplot as plt
|
3 |
import numpy as np
|
4 |
|
|
|
5 |
# Latent Feature Cluster for Training Data using T-SNE
|
6 |
-
def TSNE_reduction(latent_points:
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
model = TSNE(n_components=2, random_state=0, perplexity=perplexity,
|
9 |
-
learning_rate=learning_rate)
|
10 |
-
embedding = model
|
11 |
-
# configuring the parameters
|
12 |
# the number of components = dimension of the embedded space
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
#
|
18 |
-
tsne_data = model.fit_transform(
|
19 |
-
latent_points) # When there are more data points, trainX should be the first couple hundred points so TSNE doesn't take too long
|
20 |
x = tsne_data[:, 0]
|
21 |
y = tsne_data[:, 1]
|
22 |
title = ("T-SNE of Data")
|
23 |
return x, y, title, embedding
|
24 |
|
25 |
|
26 |
-
def plot_dimensionality_reduction(x, y, label_set, title):
|
27 |
plt.title(title)
|
28 |
# Color points based on their density
|
29 |
if label_set[0].dtype == float:
|
|
|
2 |
import matplotlib.pyplot as plt
|
3 |
import numpy as np
|
4 |
|
5 |
+
|
6 |
# Latent Feature Cluster for Training Data using T-SNE
|
7 |
+
def TSNE_reduction(latent_points: np.ndarray, perplexity=30, learning_rate=20):
|
8 |
+
"""
|
9 |
+
:param latent_points: [ndarray] - an array of arrays that define the points of an object in the latent space
|
10 |
+
:param perplexity: [int] - default perplexity = 30 " Perplexity balances the attention t-SNE gives to local and
|
11 |
+
global aspects of the data. It is roughly a guess of the number of close neighbors each point has...
|
12 |
+
a denser dataset ... requires higher perplexity value" Recommended: Perplexity(5-50)
|
13 |
+
:param learning_rate: [int] - default learning rate = 200 "If the learning rate is too high, the data may look
|
14 |
+
like a ‘ball’ with any point approximately equidistant from its nearest neighbours.
|
15 |
+
If the learning rate is too low, most points may look compressed in a dense cloud with few outliers."
|
16 |
+
Recommended: learning_rate(10-1000)
|
17 |
+
:return: [tuple] - the output is the x and y coordinates for the reduced latent space, a title, and an embedding
|
18 |
+
"""
|
19 |
model = TSNE(n_components=2, random_state=0, perplexity=perplexity,
|
20 |
+
learning_rate=learning_rate)
|
|
|
|
|
21 |
# the number of components = dimension of the embedded space
|
22 |
+
|
23 |
+
embedding = model
|
24 |
+
|
25 |
+
tsne_data = model.fit_transform(latent_points)
|
26 |
+
# When there are more data points, only use a couple of hundred points so TSNE doesn't take too long
|
|
|
|
|
27 |
x = tsne_data[:, 0]
|
28 |
y = tsne_data[:, 1]
|
29 |
title = ("T-SNE of Data")
|
30 |
return x, y, title, embedding
|
31 |
|
32 |
|
33 |
+
def plot_dimensionality_reduction(x: list, y: list, label_set: list, title: str):
|
34 |
plt.title(title)
|
35 |
# Color points based on their density
|
36 |
if label_set[0].dtype == float:
|