shubham142000's picture
Update PU.py
4536a1a verified
raw
history blame
1.6 kB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import torch
import numpy as np
def pu_classification(embeddings_positive, embeddings_unlabeled, iterations=10, selection_ratio=0.3):
y_train_pu = np.ones(embeddings_positive.shape[0]) # Positive examples
y_unlabeled_pu = -np.ones(embeddings_unlabeled.shape[0]) # Unlabeled examples
# Convert numpy arrays to PyTorch tensors
embeddings_positive = torch.tensor(embeddings_positive)
embeddings_unlabeled = torch.tensor(embeddings_unlabeled)
X_train_pu = torch.cat((embeddings_positive, embeddings_unlabeled), dim=0)
y_train_pu = np.hstack((y_train_pu, y_unlabeled_pu))
pu_classifier = SVC(kernel='linear', probability=True)
for i in range(iterations):
pu_classifier.fit(X_train_pu.numpy(), y_train_pu)
# Select bottom samples from unlabeled data based on confidence and add them to positive samples
unlabeled_probs = pu_classifier.predict_proba(embeddings_unlabeled.numpy())[:, 1]
bottom_indices = unlabeled_probs.argsort()[:int(selection_ratio * embeddings_unlabeled.shape[0])]
selected_samples = embeddings_unlabeled[bottom_indices]
embeddings_positive = torch.cat((embeddings_positive, selected_samples), dim=0)
embeddings_unlabeled = np.delete(embeddings_unlabeled, bottom_indices, axis=0)
X_train_pu = torch.cat((embeddings_positive, embeddings_unlabeled), dim=0)
y_train_pu = np.hstack((np.ones(embeddings_positive.shape[0]), -np.ones(embeddings_unlabeled.shape[0])))
return pu_classifier