Source code for Garfield.preprocessing._graph

"""
Functions for graph construction
"""
import numpy as np
import warnings
import scanpy as sc
import scipy.sparse as sp
import igraph as ig
import leidenalg
import pynndescent

from . import _utils as utils



[docs]
def get_nearest_neighbors(
    query_arr,
    target_arr,
    svd_components=None,
    randomized_svd=False,
    svd_runs=1,
    metric="correlation",
):
    """
    For each row in query_arr, compute its nearest neighbor in target_arr.

    Parameters
    ----------
    query_arr: np.array of shape (n_samples1, n_features)
        The query data matrix.
    target_arr: np.array of shape (n_samples2, n_features)
        The target data matrix.
    svd_components: None or int, default=None
        If not None, will first conduct SVD to reduce the dimension
        of the vertically stacked version of query_arr and target_arr.
    randomized_svd: bool, default=False
        Whether to use randomized SVD.
    svd_runs: int, default=1
        Run multiple instances of SVD and select the one with the lowest Frobenious reconstruction error.
    metric: string, default='correlation'
        The metric to use in nearest neighbor search.

    Returns
    -------
    neighbors: np.array of shape (n_samples1)
        The i-th element is the index in target_arr to whom the i-th row of query_arr is closest to.
    dists: np.array of shape (n_samples1)
        The i-th element is the distance corresponding to neighbors[i].
    """
    query_arr = utils.convert_to_numpy(query_arr)
    target_arr = utils.convert_to_numpy(target_arr)
    arr = np.vstack([query_arr, target_arr])
    arr = utils.svd_embedding(
        arr=arr, n_components=svd_components, randomized=randomized_svd, n_runs=svd_runs
    )
    query_arr = arr[: query_arr.shape[0], :]
    pivot_arr = arr[query_arr.shape[0] :, :]
    # approximate nearest neighbor search
    index = pynndescent.NNDescent(pivot_arr, n_neighbors=100, metric=metric)
    neighbors, dists = index.query(query_arr, k=50)
    neighbors, dists = neighbors[:, 0], dists[:, 0]
    return neighbors, dists