shubham142000
commited on
Commit
•
4536a1a
1
Parent(s):
4b63c97
Update PU.py
Browse files
PU.py
CHANGED
@@ -3,31 +3,31 @@ from sklearn.metrics import accuracy_score
|
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
|
10 |
# Convert numpy arrays to PyTorch tensors
|
11 |
-
|
12 |
-
|
13 |
|
14 |
-
|
15 |
-
|
16 |
|
17 |
-
|
18 |
|
19 |
-
|
20 |
-
|
21 |
|
22 |
# Select bottom samples from unlabeled data based on confidence and add them to positive samples
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
|
27 |
-
|
28 |
-
|
29 |
|
30 |
-
|
31 |
-
|
32 |
|
33 |
-
|
|
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
|
6 |
+
def pu_classification(embeddings_positive, embeddings_unlabeled, iterations=10, selection_ratio=0.3):
|
7 |
+
y_train_pu = np.ones(embeddings_positive.shape[0]) # Positive examples
|
8 |
+
y_unlabeled_pu = -np.ones(embeddings_unlabeled.shape[0]) # Unlabeled examples
|
9 |
|
10 |
# Convert numpy arrays to PyTorch tensors
|
11 |
+
embeddings_positive = torch.tensor(embeddings_positive)
|
12 |
+
embeddings_unlabeled = torch.tensor(embeddings_unlabeled)
|
13 |
|
14 |
+
X_train_pu = torch.cat((embeddings_positive, embeddings_unlabeled), dim=0)
|
15 |
+
y_train_pu = np.hstack((y_train_pu, y_unlabeled_pu))
|
16 |
|
17 |
+
pu_classifier = SVC(kernel='linear', probability=True)
|
18 |
|
19 |
+
for i in range(iterations):
|
20 |
+
pu_classifier.fit(X_train_pu.numpy(), y_train_pu)
|
21 |
|
22 |
# Select bottom samples from unlabeled data based on confidence and add them to positive samples
|
23 |
+
unlabeled_probs = pu_classifier.predict_proba(embeddings_unlabeled.numpy())[:, 1]
|
24 |
+
bottom_indices = unlabeled_probs.argsort()[:int(selection_ratio * embeddings_unlabeled.shape[0])]
|
25 |
+
selected_samples = embeddings_unlabeled[bottom_indices]
|
26 |
|
27 |
+
embeddings_positive = torch.cat((embeddings_positive, selected_samples), dim=0)
|
28 |
+
embeddings_unlabeled = np.delete(embeddings_unlabeled, bottom_indices, axis=0)
|
29 |
|
30 |
+
X_train_pu = torch.cat((embeddings_positive, embeddings_unlabeled), dim=0)
|
31 |
+
y_train_pu = np.hstack((np.ones(embeddings_positive.shape[0]), -np.ones(embeddings_unlabeled.shape[0])))
|
32 |
|
33 |
+
return pu_classifier
|