shubham142000 commited on
Commit
5194760
1 Parent(s): fa45197

Update oneclass.py

Browse files
Files changed (1) hide show
  1. oneclass.py +21 -0
oneclass.py CHANGED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.cluster import AgglomerativeClustering
2
+ from sklearn.metrics import pairwise_distances
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ # Cosine distance clustering with BERT embeddings
7
+ cosine_distances_bert = pairwise_distances(bert_embeddings_unlabeled, bert_embeddings_positive, metric='cosine')
8
+ cosine_distances_bert_square = pairwise_distances(bert_embeddings_unlabeled, metric='cosine')
9
+ clustering_cosine_bert = AgglomerativeClustering(n_clusters=None, distance_threshold=0.2, affinity='precomputed', linkage='average')
10
+ clustering_cosine_bert.fit(cosine_distances_bert_square)
11
+ unlabeled_clusters_cosine_bert = clustering_cosine_bert.labels_
12
+
13
+ # Function to get recommended paper indices based on clustering
14
+ def get_recommended_papers_indices(unlabeled_clusters, unlabeled_distances, top_n=20):
15
+ recommended_indices = []
16
+ for cluster_id in np.unique(unlabeled_clusters):
17
+ cluster_indices = np.where(unlabeled_clusters == cluster_id)[0]
18
+ cluster_distances = unlabeled_distances[cluster_indices].min(axis=1)
19
+ sorted_indices = cluster_indices[np.argsort(cluster_distances)]
20
+ recommended_indices.extend(sorted_indices[:top_n])
21
+ return recommended_indices