from sklearn.svm import SVC from sklearn.metrics import accuracy_score import torch import numpy as np def pu_classification(embeddings_positive, embeddings_unlabeled, iterations=10, selection_ratio=0.3): y_train_pu = np.ones(embeddings_positive.shape[0]) # Positive examples y_unlabeled_pu = -np.ones(embeddings_unlabeled.shape[0]) # Unlabeled examples # Convert numpy arrays to PyTorch tensors embeddings_positive = torch.tensor(embeddings_positive) embeddings_unlabeled = torch.tensor(embeddings_unlabeled) X_train_pu = torch.cat((embeddings_positive, embeddings_unlabeled), dim=0) y_train_pu = np.hstack((y_train_pu, y_unlabeled_pu)) pu_classifier = SVC(kernel='linear', probability=True) for i in range(iterations): pu_classifier.fit(X_train_pu.numpy(), y_train_pu) # Select bottom samples from unlabeled data based on confidence and add them to positive samples unlabeled_probs = pu_classifier.predict_proba(embeddings_unlabeled.numpy())[:, 1] bottom_indices = unlabeled_probs.argsort()[:int(selection_ratio * embeddings_unlabeled.shape[0])] selected_samples = embeddings_unlabeled[bottom_indices] embeddings_positive = torch.cat((embeddings_positive, selected_samples), dim=0) embeddings_unlabeled = np.delete(embeddings_unlabeled, bottom_indices, axis=0) X_train_pu = torch.cat((embeddings_positive, embeddings_unlabeled), dim=0) y_train_pu = np.hstack((np.ones(embeddings_positive.shape[0]), -np.ones(embeddings_unlabeled.shape[0]))) return pu_classifier