File size: 1,548 Bytes
5194760 768e669 5194760 768e669 5194760 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import pairwise_distances
import numpy as np
def get_recommendations_from_clustering(embeddings_unlabeled, embeddings_positive, distance_threshold=0.2, top_n=20):
# Calculate cosine distances
cosine_distances_bert = pairwise_distances(embeddings_unlabeled, embeddings_positive, metric='cosine')
cosine_distances_bert_square = pairwise_distances(embeddings_unlabeled, metric='cosine')
# Perform clustering
clustering_cosine_bert = AgglomerativeClustering(n_clusters=None, distance_threshold=distance_threshold, affinity='precomputed', linkage='average')
clustering_cosine_bert.fit(cosine_distances_bert_square)
unlabeled_clusters_cosine_bert = clustering_cosine_bert.labels_
# Function to get recommended paper indices based on clustering
def get_recommended_papers_indices(unlabeled_clusters, unlabeled_distances, top_n=20):
recommended_indices = []
for cluster_id in np.unique(unlabeled_clusters):
cluster_indices = np.where(unlabeled_clusters == cluster_id)[0]
cluster_distances = unlabeled_distances[cluster_indices].min(axis=1)
sorted_indices = cluster_indices[np.argsort(cluster_distances)]
recommended_indices.extend(sorted_indices[:top_n])
return recommended_indices
# Get recommended paper indices
recommended_indices = get_recommended_papers_indices(unlabeled_clusters_cosine_bert, cosine_distances_bert, top_n)
return recommended_indices |