|
|
|
|
|
|
|
|
|
|
|
|
|
import argparse |
|
import os |
|
import os.path as osp |
|
import numpy as np |
|
|
|
import faiss |
|
|
|
|
|
|
|
def get_parser(): |
|
parser = argparse.ArgumentParser( |
|
description="compute a pca matrix given an array of numpy features" |
|
) |
|
|
|
parser.add_argument('data', help='numpy file containing features') |
|
parser.add_argument('--output', help='where to save the pca matrix', required=True) |
|
parser.add_argument('--dim', type=int, help='dim for pca reduction', required=True) |
|
parser.add_argument('--eigen-power', type=float, default=0, help='eigen power, -0.5 for whitening') |
|
|
|
return parser |
|
|
|
|
|
def main(): |
|
parser = get_parser() |
|
args = parser.parse_args() |
|
|
|
print("Reading features") |
|
x = np.load(args.data, mmap_mode="r") |
|
|
|
print("Computing PCA") |
|
pca = faiss.PCAMatrix(x.shape[-1], args.dim, args.eigen_power) |
|
pca.train(x) |
|
b = faiss.vector_to_array(pca.b) |
|
A = faiss.vector_to_array(pca.A).reshape(pca.d_out, pca.d_in) |
|
|
|
os.makedirs(args.output, exist_ok=True) |
|
|
|
prefix = str(args.dim) |
|
if args.eigen_power != 0: |
|
prefix += f"_{args.eigen_power}" |
|
|
|
np.save(osp.join(args.output, f"{prefix}_pca_A"), A.T) |
|
np.save(osp.join(args.output, f"{prefix}_pca_b"), b) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|