from sklearn.cluster import AgglomerativeClustering from sklearn.metrics import pairwise_distances import numpy as np def get_recommendations_from_clustering(embeddings_unlabeled, embeddings_positive, distance_threshold=0.2, top_n=20): # Calculate cosine distances cosine_distances_bert = pairwise_distances(embeddings_unlabeled, embeddings_positive, metric='cosine') cosine_distances_bert_square = pairwise_distances(embeddings_unlabeled, metric='cosine') # Perform clustering clustering_cosine_bert = AgglomerativeClustering(n_clusters=None, distance_threshold=distance_threshold, affinity='precomputed', linkage='average') clustering_cosine_bert.fit(cosine_distances_bert_square) unlabeled_clusters_cosine_bert = clustering_cosine_bert.labels_ # Function to get recommended paper indices based on clustering def get_recommended_papers_indices(unlabeled_clusters, unlabeled_distances, top_n=20): recommended_indices = [] for cluster_id in np.unique(unlabeled_clusters): cluster_indices = np.where(unlabeled_clusters == cluster_id)[0] cluster_distances = unlabeled_distances[cluster_indices].min(axis=1) sorted_indices = cluster_indices[np.argsort(cluster_distances)] recommended_indices.extend(sorted_indices[:top_n]) return recommended_indices # Get recommended paper indices recommended_indices = get_recommended_papers_indices(unlabeled_clusters_cosine_bert, cosine_distances_bert, top_n) return recommended_indices