Spaces:
Running
on
Zero
Running
on
Zero
# %% | |
import torch | |
import torch.nn.functional as F | |
def affinity_from_features( | |
features, | |
features_B=None, | |
affinity_focal_gamma=1.0, | |
distance="cosine", | |
normalize_features=False, | |
fill_diagonal=False, | |
n_features=1, | |
): | |
"""Compute affinity matrix from input features. | |
Args: | |
features (torch.Tensor): input features, shape (n_samples, n_features) | |
feature_B (torch.Tensor, optional): optional, if not None, compute affinity between two features | |
affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the edge weights | |
on weak connections, default 1.0 | |
distance (str): distance metric, 'cosine' (default) or 'euclidean'. | |
apply_normalize (bool): normalize input features before computing affinity matrix, | |
default True | |
Returns: | |
(torch.Tensor): affinity matrix, shape (n_samples, n_samples) | |
""" | |
# compute affinity matrix from input features | |
features = features.clone() | |
if features_B is not None: | |
features_B = features_B.clone() | |
# if feature_B is not provided, compute affinity matrix on features x features | |
# if feature_B is provided, compute affinity matrix on features x feature_B | |
if features_B is not None: | |
assert not fill_diagonal, "fill_diagonal should be False when feature_B is None" | |
features_B = features if features_B is None else features_B | |
if normalize_features: | |
features = F.normalize(features, dim=-1) | |
features_B = F.normalize(features_B, dim=-1) | |
if distance == "cosine": | |
# if not check_if_normalized(features): | |
# TODO: make sure features are normalized within each head | |
features = F.normalize(features, dim=-1) | |
# if not check_if_normalized(features_B): | |
features_B = F.normalize(features_B, dim=-1) | |
A = 1 - (features @ features_B.T) / n_features | |
elif distance == "euclidean": | |
A = torch.cdist(features, features_B, p=2) / n_features | |
else: | |
raise ValueError("distance should be 'cosine' or 'euclidean'") | |
if fill_diagonal: | |
A[torch.arange(A.shape[0]), torch.arange(A.shape[0])] = 0 | |
# torch.exp make affinity matrix positive definite, | |
# lower affinity_focal_gamma reduce the weak edge weights | |
A = torch.exp(-((A / affinity_focal_gamma))) | |
return A | |
from ncut_pytorch.ncut_pytorch import run_subgraph_sampling, propagate_knn, gram_schmidt | |
import logging | |
import torch | |
def ncut( | |
A, | |
num_eig=20, | |
eig_solver="svd_lowrank", | |
make_symmetric=True, | |
): | |
"""PyTorch implementation of Normalized cut without Nystrom-like approximation. | |
Args: | |
A (torch.Tensor): affinity matrix, shape (n_samples, n_samples) | |
num_eig (int): number of eigenvectors to return | |
eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'] | |
Returns: | |
(torch.Tensor): eigenvectors corresponding to the eigenvalues, shape (n_samples, num_eig) | |
(torch.Tensor): eigenvalues of the eigenvectors, sorted in descending order | |
""" | |
if make_symmetric: | |
# make sure A is symmetric | |
A = (A + A.T) / 2 | |
# symmetrical normalization; A = D^(-1/2) A D^(-1/2) | |
D_r = A.sum(dim=0).detach().clone() | |
D_c = A.sum(dim=1).detach().clone() | |
A /= torch.sqrt(D_r)[:, None] | |
A /= torch.sqrt(D_c)[None, :] | |
# compute eigenvectors | |
if eig_solver == "svd_lowrank": # default | |
# only top q eigenvectors, fastest | |
eigen_vector, eigen_value, _ = torch.svd_lowrank(A, q=num_eig) | |
elif eig_solver == "lobpcg": | |
# only top k eigenvectors, fast | |
eigen_value, eigen_vector = torch.lobpcg(A, k=num_eig) | |
elif eig_solver == "svd": | |
# all eigenvectors, slow | |
eigen_vector, eigen_value, _ = torch.svd(A) | |
elif eig_solver == "eigh": | |
# all eigenvectors, slow | |
eigen_value, eigen_vector = torch.linalg.eigh(A) | |
else: | |
raise ValueError( | |
"eigen_solver should be 'lobpcg', 'svd_lowrank', 'svd' or 'eigh'" | |
) | |
# sort eigenvectors by eigenvalues, take top (descending order) | |
eigen_value = eigen_value.real | |
eigen_vector = eigen_vector.real | |
sort_order = torch.argsort(eigen_value, descending=True)[:num_eig] | |
eigen_value = eigen_value[sort_order] | |
eigen_vector = eigen_vector[:, sort_order] | |
if eigen_value.min() < 0: | |
logging.warning( | |
"negative eigenvalues detected, please make sure the affinity matrix is positive definite" | |
) | |
return eigen_vector, eigen_value | |
def nystrom_ncut( | |
features, | |
features_B=None, | |
num_eig=100, | |
num_sample=10000, | |
knn=10, | |
sample_method="farthest", | |
distance="cosine", | |
affinity_focal_gamma=1.0, | |
indirect_connection=False, | |
indirect_pca_dim=100, | |
device=None, | |
eig_solver="svd_lowrank", | |
normalize_features=False, | |
matmul_chunk_size=8096, | |
make_orthogonal=False, | |
verbose=False, | |
no_propagation=False, | |
make_symmetric=False, | |
n_features=1, | |
): | |
"""PyTorch implementation of Faster Nystrom Normalized cut. | |
Args: | |
features (torch.Tensor): feature matrix, shape (n_samples, n_features) | |
features_2 (torch.Tensor): feature matrix 2, for asymmetric affinity matrix, shape (n_samples2, n_features) | |
num_eig (int): default 20, number of top eigenvectors to return | |
num_sample (int): default 30000, number of samples for Nystrom-like approximation | |
knn (int): default 3, number of KNN for propagating eigenvectors from subgraph to full graph, | |
smaller knn will result in more sharp eigenvectors, | |
sample_method (str): sample method, 'farthest' (default) or 'random' | |
'farthest' is recommended for better approximation | |
distance (str): distance metric, 'cosine' (default) or 'euclidean' | |
affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the weak edge weights, | |
resulting in more sharp eigenvectors, default 1.0 | |
indirect_connection (bool): include indirect connection in the subgraph, default True | |
indirect_pca_dim (int): default 100, PCA dimension to reduce the node dimension, only applied to | |
the not sampled nodes, not applied to the sampled nodes | |
device (str): device to use for computation, if None, will not change device | |
a good practice is to pass features by CPU since it's usually large, | |
and move subgraph affinity to GPU to speed up eigenvector computation | |
eig_solver (str): eigen decompose solver, 'svd_lowrank' (default), 'lobpcg', 'svd', 'eigh' | |
'svd_lowrank' is recommended for large scale graph, it's the fastest | |
they correspond to torch.svd_lowrank, torch.lobpcg, torch.svd, torch.linalg.eigh | |
normalize_features (bool): normalize input features before computing affinity matrix, | |
default True | |
matmul_chunk_size (int): chunk size for matrix multiplication | |
large matrix multiplication is chunked to reduce memory usage, | |
smaller chunk size will reduce memory usage but slower computation, default 8096 | |
make_orthogonal (bool): make eigenvectors orthogonal after propagation, default True | |
verbose (bool): show progress bar when propagating eigenvectors from subgraph to full graph | |
no_propagation (bool): if True, skip the eigenvector propagation step, only return the subgraph eigenvectors | |
Returns: | |
(torch.Tensor): eigenvectors, shape (n_samples, num_eig) | |
(torch.Tensor): eigenvalues, sorted in descending order, shape (num_eig,) | |
(torch.Tensor): sampled_indices used by Nystrom-like approximation subgraph, shape (num_sample,) | |
""" | |
# check if features dimension greater than num_eig | |
if eig_solver in ["svd_lowrank", "lobpcg"]: | |
assert features.shape[0] > ( | |
num_eig * 2 | |
), "number of nodes should be greater than 2*num_eig" | |
if eig_solver in ["svd", "eigh"]: | |
assert ( | |
features.shape[0] > num_eig | |
), "number of nodes should be greater than num_eig" | |
features = features.clone() | |
if normalize_features: | |
# features need to be normalized for affinity matrix computation (cosine distance) | |
features = torch.nn.functional.normalize(features, dim=-1) | |
sampled_indices = run_subgraph_sampling( | |
features, | |
num_sample=num_sample, | |
sample_method=sample_method, | |
) | |
sampled_indices_B = run_subgraph_sampling( | |
features_B, | |
num_sample=num_sample, | |
sample_method=sample_method, | |
) | |
sampled_features = features[sampled_indices] | |
sampled_features_B = features_B[sampled_indices_B] | |
# move subgraph gpu to speed up | |
original_device = sampled_features.device | |
device = original_device if device is None else device | |
sampled_features = sampled_features.to(device) | |
sampled_features_B = sampled_features_B.to(device) | |
# compute affinity matrix on subgraph | |
A = affinity_from_features( | |
sampled_features, features_B=sampled_features_B, | |
affinity_focal_gamma=affinity_focal_gamma, distance=distance, | |
n_features=n_features, | |
) | |
not_sampled = torch.tensor( | |
list(set(range(features.shape[0])) - set(sampled_indices)) | |
) | |
if len(not_sampled) == 0: | |
# if sampled all nodes, no need for nyström approximation | |
eigen_vector, eigen_value = ncut(A, num_eig, eig_solver=eig_solver) | |
return eigen_vector, eigen_value, sampled_indices | |
# 1) PCA to reduce the node dimension for the not sampled nodes | |
# 2) compute indirect connection on the PC nodes | |
if len(not_sampled) > 0 and indirect_connection: | |
raise NotImplementedError("indirect_connection is not implemented yet") | |
indirect_pca_dim = min(indirect_pca_dim, min(*features.shape)) | |
U, S, V = torch.pca_lowrank(features[not_sampled].T, q=indirect_pca_dim) | |
feature_B = (features[not_sampled].T @ V).T # project to PCA space | |
feature_B = feature_B.to(device) | |
B = affinity_from_features( | |
sampled_features, | |
feature_B, | |
affinity_focal_gamma=affinity_focal_gamma, | |
distance=distance, | |
fill_diagonal=False, | |
) | |
# P is 1-hop random walk matrix | |
B_row = B / B.sum(axis=1, keepdim=True) | |
B_col = B / B.sum(axis=0, keepdim=True) | |
P = B_row @ B_col.T | |
P = (P + P.T) / 2 | |
# fill diagonal with 0 | |
P[torch.arange(P.shape[0]), torch.arange(P.shape[0])] = 0 | |
A = A + P | |
# compute normalized cut on the subgraph | |
eigen_vector, eigen_value = ncut(A, num_eig, eig_solver=eig_solver, make_symmetric=make_symmetric) | |
eigen_vector = eigen_vector.to(dtype=features.dtype, device=original_device) | |
eigen_value = eigen_value.to(dtype=features.dtype, device=original_device) | |
if no_propagation: | |
return eigen_vector, eigen_value, sampled_indices | |
# propagate eigenvectors from subgraph to full graph | |
eigen_vector = propagate_knn( | |
eigen_vector, | |
features, | |
sampled_features, | |
knn, | |
chunk_size=matmul_chunk_size, | |
device=device, | |
use_tqdm=verbose, | |
) | |
# post-hoc orthogonalization | |
if make_orthogonal: | |
eigen_vector = gram_schmidt(eigen_vector) | |
return eigen_vector, eigen_value, sampled_indices | |