shubham142000
commited on
Commit
•
5194760
1
Parent(s):
fa45197
Update oneclass.py
Browse files- oneclass.py +21 -0
oneclass.py
CHANGED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sklearn.cluster import AgglomerativeClustering
|
2 |
+
from sklearn.metrics import pairwise_distances
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
# Cosine distance clustering with BERT embeddings
|
7 |
+
cosine_distances_bert = pairwise_distances(bert_embeddings_unlabeled, bert_embeddings_positive, metric='cosine')
|
8 |
+
cosine_distances_bert_square = pairwise_distances(bert_embeddings_unlabeled, metric='cosine')
|
9 |
+
clustering_cosine_bert = AgglomerativeClustering(n_clusters=None, distance_threshold=0.2, affinity='precomputed', linkage='average')
|
10 |
+
clustering_cosine_bert.fit(cosine_distances_bert_square)
|
11 |
+
unlabeled_clusters_cosine_bert = clustering_cosine_bert.labels_
|
12 |
+
|
13 |
+
# Function to get recommended paper indices based on clustering
|
14 |
+
def get_recommended_papers_indices(unlabeled_clusters, unlabeled_distances, top_n=20):
|
15 |
+
recommended_indices = []
|
16 |
+
for cluster_id in np.unique(unlabeled_clusters):
|
17 |
+
cluster_indices = np.where(unlabeled_clusters == cluster_id)[0]
|
18 |
+
cluster_distances = unlabeled_distances[cluster_indices].min(axis=1)
|
19 |
+
sorted_indices = cluster_indices[np.argsort(cluster_distances)]
|
20 |
+
recommended_indices.extend(sorted_indices[:top_n])
|
21 |
+
return recommended_indices
|