|
from sklearn.svm import SVC |
|
from sklearn.metrics import accuracy_score |
|
import torch |
|
import numpy as np |
|
|
|
def pu_classification(embeddings_positive, embeddings_unlabeled, iterations=10, selection_ratio=0.3): |
|
y_train_pu = np.ones(embeddings_positive.shape[0]) |
|
y_unlabeled_pu = -np.ones(embeddings_unlabeled.shape[0]) |
|
|
|
|
|
embeddings_positive = torch.tensor(embeddings_positive) |
|
embeddings_unlabeled = torch.tensor(embeddings_unlabeled) |
|
|
|
X_train_pu = torch.cat((embeddings_positive, embeddings_unlabeled), dim=0) |
|
y_train_pu = np.hstack((y_train_pu, y_unlabeled_pu)) |
|
|
|
pu_classifier = SVC(kernel='linear', probability=True) |
|
|
|
for i in range(iterations): |
|
pu_classifier.fit(X_train_pu.numpy(), y_train_pu) |
|
|
|
|
|
unlabeled_probs = pu_classifier.predict_proba(embeddings_unlabeled.numpy())[:, 1] |
|
bottom_indices = unlabeled_probs.argsort()[:int(selection_ratio * embeddings_unlabeled.shape[0])] |
|
selected_samples = embeddings_unlabeled[bottom_indices] |
|
|
|
embeddings_positive = torch.cat((embeddings_positive, selected_samples), dim=0) |
|
embeddings_unlabeled = np.delete(embeddings_unlabeled, bottom_indices, axis=0) |
|
|
|
X_train_pu = torch.cat((embeddings_positive, embeddings_unlabeled), dim=0) |
|
y_train_pu = np.hstack((np.ones(embeddings_positive.shape[0]), -np.ones(embeddings_unlabeled.shape[0]))) |
|
|
|
return pu_classifier |