Source code for Garfield.analysis.calc_neighbor_prop

from sklearn.neighbors import NearestNeighbors
import numpy as np
import pandas as pd



[docs]
def calc_neighbor_prop(
        adata,
        batch_key='replicates',
        celltype_key='Cluster',
        n_neighbors=25,
        spatial_key='spatial',
        output_key=None
):
    """
    Normalize the cell type abundance based on the nearest neighbors for each batch in the AnnData object.

    Parameters
    ----------
    adata : AnnData
        The AnnData object containing the spatial data and cell type information.
    batch_key : str, optional
        The key in `adata.obs` to identify different batches (default is 'replicates').
    celltype_key : str, optional
        The key in `adata.obs` representing the cell types (default is 'Cluster').
    n_neighbors : int, optional
        The number of nearest neighbors to consider for each cell (default is 25).
    spatial_key : str, optional
        The key in `adata.obsm` containing the spatial coordinates (default is 'spatial').
    output_key : str, optional
        The key to store the normalized cell type abundances in `adata.obsm` (default is None).

    Returns
    -------
    adata : AnnData
        The updated AnnData object with normalized cell type abundances added to `obsm`.
    """

    # Create storage for normalized cell type abundances
    cell_counts = {}

    # Loop through unique batches
    for b in adata.obs[batch_key].unique():
        # Get spatial coordinates and cell types for the current batch
        batch_data = adata[adata.obs[batch_key] == b]
        X = batch_data.obsm[spatial_key]
        celltypes = batch_data.obs[celltype_key].astype(str).values
        cellnames = batch_data.obs_names

        # Compute the nearest neighbors
        knn = NearestNeighbors(n_neighbors=n_neighbors)
        knn.fit(X)
        knn_indices = knn.kneighbors(X, return_distance=False)
        knn_celltypes = celltypes[knn_indices]  # Get the cell types of nearest neighbors

        # Process each cell in the batch
        for i in range(len(cellnames)):
            # Count the types of neighboring cells
            unique, counts = np.unique(knn_celltypes[i, :], return_counts=True)
            cell_counts[cellnames[i]] = dict(zip(unique, counts))

            # Get the total number of neighbors
            total_neighbors = sum(counts)

            # Normalize the counts (abundance)
            if total_neighbors > 0:
                normalized_counts = counts / total_neighbors  # Normalize to range [0, 1]
                cell_counts[cellnames[i]] = dict(zip(unique, normalized_counts))

    # Store the results in `obsm`
    if output_key is None:
        output_key = f'k{n_neighbors}_neighbours_celltype_normalized'

    adata.obsm[output_key] = pd.DataFrame(cell_counts).T.fillna(0)

    return adata