File size: 1,548 Bytes
5194760
 
 
 
768e669
 
 
 
5194760
768e669
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5194760
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import pairwise_distances
import numpy as np

def get_recommendations_from_clustering(embeddings_unlabeled, embeddings_positive, distance_threshold=0.2, top_n=20):
    # Calculate cosine distances
    cosine_distances_bert = pairwise_distances(embeddings_unlabeled, embeddings_positive, metric='cosine')
    cosine_distances_bert_square = pairwise_distances(embeddings_unlabeled, metric='cosine')

    # Perform clustering
    clustering_cosine_bert = AgglomerativeClustering(n_clusters=None, distance_threshold=distance_threshold, affinity='precomputed', linkage='average')
    clustering_cosine_bert.fit(cosine_distances_bert_square)
    unlabeled_clusters_cosine_bert = clustering_cosine_bert.labels_

    # Function to get recommended paper indices based on clustering
    def get_recommended_papers_indices(unlabeled_clusters, unlabeled_distances, top_n=20):
        recommended_indices = []
        for cluster_id in np.unique(unlabeled_clusters):
            cluster_indices = np.where(unlabeled_clusters == cluster_id)[0]
            cluster_distances = unlabeled_distances[cluster_indices].min(axis=1)
            sorted_indices = cluster_indices[np.argsort(cluster_distances)]
            recommended_indices.extend(sorted_indices[:top_n])
        return recommended_indices
    
    # Get recommended paper indices
    recommended_indices = get_recommended_papers_indices(unlabeled_clusters_cosine_bert, cosine_distances_bert, top_n)
    return recommended_indices