Tumor microenvironment (TME) spatial niche analysis
Here we will use 10X Xenium breast cancer dataset as an example to illustrate how Garfield performs spatial niche analysis and niche characterization. Data access: https://www.10xgenomics.com/products/xenium-in-situ/preview-dataset-human-breast.
Load packages
[1]:
import os
import pandas as pd
import numpy as np
os.chdir('/pri_exthome/zhouwg/project/Garfield')
os.getcwd()
[1]:
'/pri_exthome/zhouwg/project/Garfield'
[2]:
# load packages
import os
import warnings
import Garfield as gf
import scipy.sparse as sp
import scanpy as sc
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
gf.__version__
[2]:
'1.0.0'
Read data
[3]:
file_fold = '/pri_exthome/zhouwg/project/spatial_data/gold'
adata_batch1 = sc.read_h5ad(file_fold + '/xenium_human_breast_cancer_batch1.h5ad')
adata_batch1.X = adata_batch1.layers['counts'].copy()
adata_batch1.var_names_make_unique()
adata_batch2 = sc.read_h5ad(file_fold + '/xenium_human_breast_cancer_batch2.h5ad')
adata_batch2.X = adata_batch2.layers['counts'].copy()
adata_batch2.var_names_make_unique()
# concat
adata = adata_batch1.concatenate(adata_batch2, batch_key='batch')
# Create new adata to drop all unnecessary columns
adata_new = sc.AnnData(sp.csr_matrix(
(adata.shape[0], adata.shape[1]),
dtype=np.float32))
adata_new.X = adata.layers['counts'].copy()
adata_new.var_names = adata.var_names
adata_new.obs_names = adata.obs_names
adata_new.obsm["spatial"] = adata.obsm["spatial"]
adata_new.obs["cell_id"] = adata.obs["cell_id"].values
adata_new.obs["batch"] = adata.obs["batch"].values
adata_new.obs["replicates"] = adata.obs["replicates"].values
# Store raw counts in adata.layers
adata_new.layers["counts"] = adata.X.copy()
adata_new
[3]:
AnnData object with n_obs × n_vars = 282363 × 313
obs: 'cell_id', 'batch', 'replicates'
obsm: 'spatial'
layers: 'counts'
[4]:
adata_new.obs['replicates'].value_counts()
[4]:
replicates
Rep_1 164000
Rep_2 118363
Name: count, dtype: int64
[5]:
# Ensure adata.X is counts.
adata.X.max()
[5]:
167.0
Integrating spatially resolved transcriptomics data using Garfield
[6]:
# set workdir #
workdir = '/pri_exthome/zhouwg/project/Garfield_benchmark/results/sp_unimodal/spRNA_xenium_human_breast'
gf.settings.set_workdir(workdir)
### modify parameter
user_config = dict(
## Input options
adata_list=adata_new,
profile='spatial',
data_type='single-modal',
sample_col='batch',
weight=0.5,
## Preprocessing options
graph_const_method='Squidpy', # mu_std, Radius, KNN, Squidpy
used_hvg=True,
min_cells=3,
min_features=0,
keep_mt=False,
target_sum=1e4,
rna_n_top_features=3000,
n_components=50,
n_neighbors=5,
metric='euclidean',
svd_solver='arpack',
# datasets
used_pca_feat=False,
adj_key='connectivities',
# data split parameters
edge_val_ratio=0.1,
edge_test_ratio=0.,
node_val_ratio=0.1,
node_test_ratio=0.,
## Model options
augment_type='svd',
svd_q=5,
use_FCencoder=True,
conv_type='GAT', # GAT or GATv2Conv or GCN
gnn_layer=2,
hidden_dims=[128, 128],
bottle_neck_neurons=20,
cluster_num=20,
drop_feature_rate=0.2,
drop_edge_rate=0.2,
num_heads=3,
dropout=0.2,
concat=True,
used_edge_weight=True,
used_DSBN=False,
used_mmd=True,
# data loader parameters
num_neighbors=5,
loaders_n_hops=2,
edge_batch_size=4096,
node_batch_size=128, # None
# loss parameters
include_edge_recon_loss=True,
include_gene_expr_recon_loss=True,
lambda_latent_contrastive_instanceloss=1.0,
lambda_latent_contrastive_clusterloss=0.5,
lambda_gene_expr_recon=1., #
lambda_edge_recon=1., #
lambda_latent_adj_recon_loss=2.,
lambda_omics_recon_mmd_loss=0.2,
# train parameters
n_epochs_no_edge_recon=0,
learning_rate=0.001,
weight_decay=1e-05,
gradient_clipping=5,
# other parameters
latent_key='garfield_latent',
reload_best_model=True,
use_early_stopping=True,
early_stopping_kwargs=None,
monitor=True,
device_id=0,
seed=2024,
verbose=True
)
dict_config = gf.settings.set_gf_params(user_config)
Saving results in: /pri_exthome/zhouwg/project/Garfield_benchmark/results/sp_unimodal/spRNA_xenium_human_breast
[24]:
from Garfield.model import Garfield
# Initialize model
model = Garfield(dict_config)
--- DATA LOADING AND PREPROCESSING ---
COSINE SIM GRAPH DECODER -> dropout_rate: 0.2
[25]:
# Train model
model.train()
--- INITIALIZING TRAINER ---
Using GPU: device-0
Number of training nodes: 254127
Number of validation nodes: 28236
Number of training edges: 1615698
Number of validation edges: 179521
Edge batch size: 4096
Node batch size: 256
--- MODEL TRAINING ---
Epoch 1/100 |--------------------| 1.0% val_auroc_score: 0.8776; val_auprc_score: 0.8936; val_best_acc_score: 0.8172; val_best_f1_score: 0.8096; train_kl_reg_loss: 50.7585; train_edge_recon_loss: 1058.6235; train_gene_expr_recon_loss: 46871.5648; train_lambda_latent_adj_recon_loss: 1209.4891; train_lambda_latent_contrastive_instanceloss: 7.3723; train_lambda_latent_contrastive_clusterloss: 3.0528; train_gene_expr_mmd_loss: 17.7750; train_global_loss: 49218.6358; train_optim_loss: 49218.6358; val_kl_reg_loss: 46.5657; val_edge_recon_loss: 964.0532; val_gene_expr_recon_loss: 41828.2551; val_lambda_latent_adj_recon_loss: 353.4837; val_lambda_latent_contrastive_instanceloss: 7.0974; val_lambda_latent_contrastive_clusterloss: 2.4568; val_gene_expr_mmd_loss: 18.6913; val_global_loss: 43220.6031; val_optim_loss: 43220.6031
Epoch 2/100 |--------------------| 2.0% val_auroc_score: 0.8853; val_auprc_score: 0.9056; val_best_acc_score: 0.8253; val_best_f1_score: 0.8162; train_kl_reg_loss: 75.1174; train_edge_recon_loss: 1011.0217; train_gene_expr_recon_loss: 42361.9961; train_lambda_latent_adj_recon_loss: 430.6415; train_lambda_latent_contrastive_instanceloss: 7.1359; train_lambda_latent_contrastive_clusterloss: 2.3969; train_gene_expr_mmd_loss: 17.4050; train_global_loss: 43905.7145; train_optim_loss: 43905.7145; val_kl_reg_loss: 71.9691; val_edge_recon_loss: 963.7434; val_gene_expr_recon_loss: 40572.2922; val_lambda_latent_adj_recon_loss: 306.2032; val_lambda_latent_contrastive_instanceloss: 7.0677; val_lambda_latent_contrastive_clusterloss: 2.1837; val_gene_expr_mmd_loss: 16.3816; val_global_loss: 41939.8406; val_optim_loss: 41939.8406
Epoch 3/100 |--------------------| 3.0% val_auroc_score: 0.8818; val_auprc_score: 0.9011; val_best_acc_score: 0.8201; val_best_f1_score: 0.8119; train_kl_reg_loss: 89.6356; train_edge_recon_loss: 1004.0086; train_gene_expr_recon_loss: 41578.0043; train_lambda_latent_adj_recon_loss: 365.4889; train_lambda_latent_contrastive_instanceloss: 7.1084; train_lambda_latent_contrastive_clusterloss: 2.2133; train_gene_expr_mmd_loss: 16.7617; train_global_loss: 43063.2208; train_optim_loss: 43063.2208; val_kl_reg_loss: 81.1925; val_edge_recon_loss: 964.0505; val_gene_expr_recon_loss: 40342.4580; val_lambda_latent_adj_recon_loss: 279.0829; val_lambda_latent_contrastive_instanceloss: 7.0617; val_lambda_latent_contrastive_clusterloss: 2.0844; val_gene_expr_mmd_loss: 16.2883; val_global_loss: 41692.2186; val_optim_loss: 41692.2186
Epoch 4/100 |--------------------| 4.0% val_auroc_score: 0.8817; val_auprc_score: 0.9024; val_best_acc_score: 0.8216; val_best_f1_score: 0.8131; train_kl_reg_loss: 97.8892; train_edge_recon_loss: 1002.4554; train_gene_expr_recon_loss: 41202.6463; train_lambda_latent_adj_recon_loss: 325.5363; train_lambda_latent_contrastive_instanceloss: 7.0974; train_lambda_latent_contrastive_clusterloss: 2.1432; train_gene_expr_mmd_loss: 15.9769; train_global_loss: 42653.7447; train_optim_loss: 42653.7447; val_kl_reg_loss: 91.2145; val_edge_recon_loss: 965.9334; val_gene_expr_recon_loss: 40086.4171; val_lambda_latent_adj_recon_loss: 250.0118; val_lambda_latent_contrastive_instanceloss: 7.0581; val_lambda_latent_contrastive_clusterloss: 2.0445; val_gene_expr_mmd_loss: 14.3999; val_global_loss: 41417.0796; val_optim_loss: 41417.0796
Epoch 5/100 |█-------------------| 5.0% val_auroc_score: 0.8833; val_auprc_score: 0.9043; val_best_acc_score: 0.8233; val_best_f1_score: 0.8146; train_kl_reg_loss: 106.0794; train_edge_recon_loss: 1001.3394; train_gene_expr_recon_loss: 40935.3621; train_lambda_latent_adj_recon_loss: 304.4501; train_lambda_latent_contrastive_instanceloss: 7.0904; train_lambda_latent_contrastive_clusterloss: 2.1090; train_gene_expr_mmd_loss: 15.6188; train_global_loss: 42372.0492; train_optim_loss: 42372.0492; val_kl_reg_loss: 103.6359; val_edge_recon_loss: 965.2103; val_gene_expr_recon_loss: 40013.2107; val_lambda_latent_adj_recon_loss: 248.8201; val_lambda_latent_contrastive_instanceloss: 7.0558; val_lambda_latent_contrastive_clusterloss: 2.0291; val_gene_expr_mmd_loss: 15.3148; val_global_loss: 41355.2773; val_optim_loss: 41355.2773
Epoch 6/100 |█-------------------| 6.0% val_auroc_score: 0.8838; val_auprc_score: 0.9046; val_best_acc_score: 0.8239; val_best_f1_score: 0.8149; train_kl_reg_loss: 110.5403; train_edge_recon_loss: 999.8126; train_gene_expr_recon_loss: 40741.4342; train_lambda_latent_adj_recon_loss: 289.8306; train_lambda_latent_contrastive_instanceloss: 7.0858; train_lambda_latent_contrastive_clusterloss: 2.0896; train_gene_expr_mmd_loss: 15.1141; train_global_loss: 42165.9074; train_optim_loss: 42165.9074; val_kl_reg_loss: 104.3687; val_edge_recon_loss: 963.7631; val_gene_expr_recon_loss: 40017.0510; val_lambda_latent_adj_recon_loss: 235.9723; val_lambda_latent_contrastive_instanceloss: 7.0537; val_lambda_latent_contrastive_clusterloss: 2.0170; val_gene_expr_mmd_loss: 14.6915; val_global_loss: 41344.9174; val_optim_loss: 41344.9174
Epoch 7/100 |█-------------------| 7.0% val_auroc_score: 0.8842; val_auprc_score: 0.9045; val_best_acc_score: 0.8233; val_best_f1_score: 0.8144; train_kl_reg_loss: 114.9199; train_edge_recon_loss: 998.9446; train_gene_expr_recon_loss: 40622.8384; train_lambda_latent_adj_recon_loss: 279.5904; train_lambda_latent_contrastive_instanceloss: 7.0811; train_lambda_latent_contrastive_clusterloss: 2.0731; train_gene_expr_mmd_loss: 15.2337; train_global_loss: 42040.6813; train_optim_loss: 42040.6813; val_kl_reg_loss: 108.8204; val_edge_recon_loss: 961.4641; val_gene_expr_recon_loss: 39941.7819; val_lambda_latent_adj_recon_loss: 231.4726; val_lambda_latent_contrastive_instanceloss: 7.0526; val_lambda_latent_contrastive_clusterloss: 2.0061; val_gene_expr_mmd_loss: 14.9583; val_global_loss: 41267.5558; val_optim_loss: 41267.5558
Epoch 8/100 |█-------------------| 8.0% val_auroc_score: 0.8855; val_auprc_score: 0.9063; val_best_acc_score: 0.8254; val_best_f1_score: 0.8166; train_kl_reg_loss: 117.9003; train_edge_recon_loss: 998.4742; train_gene_expr_recon_loss: 40489.1219; train_lambda_latent_adj_recon_loss: 272.2878; train_lambda_latent_contrastive_instanceloss: 7.0789; train_lambda_latent_contrastive_clusterloss: 2.0651; train_gene_expr_mmd_loss: 14.7490; train_global_loss: 41901.6772; train_optim_loss: 41901.6772; val_kl_reg_loss: 112.5880; val_edge_recon_loss: 960.9997; val_gene_expr_recon_loss: 40081.4434; val_lambda_latent_adj_recon_loss: 229.5542; val_lambda_latent_contrastive_instanceloss: 7.0509; val_lambda_latent_contrastive_clusterloss: 2.0038; val_gene_expr_mmd_loss: 13.8094; val_global_loss: 41407.4490; val_optim_loss: 41407.4490
Epoch 9/100 |█-------------------| 9.0% val_auroc_score: 0.8825; val_auprc_score: 0.9037; val_best_acc_score: 0.8235; val_best_f1_score: 0.8142; train_kl_reg_loss: 121.0431; train_edge_recon_loss: 998.2447; train_gene_expr_recon_loss: 40369.9072; train_lambda_latent_adj_recon_loss: 263.9861; train_lambda_latent_contrastive_instanceloss: 7.0784; train_lambda_latent_contrastive_clusterloss: 2.0578; train_gene_expr_mmd_loss: 14.6067; train_global_loss: 41776.9243; train_optim_loss: 41776.9243; val_kl_reg_loss: 111.9546; val_edge_recon_loss: 961.4434; val_gene_expr_recon_loss: 40053.7770; val_lambda_latent_adj_recon_loss: 217.7099; val_lambda_latent_contrastive_instanceloss: 7.0519; val_lambda_latent_contrastive_clusterloss: 1.9962; val_gene_expr_mmd_loss: 13.6071; val_global_loss: 41367.5394; val_optim_loss: 41367.5394
Epoch 10/100 |██------------------| 10.0% val_auroc_score: 0.8851; val_auprc_score: 0.9057; val_best_acc_score: 0.8243; val_best_f1_score: 0.8152; train_kl_reg_loss: 122.6403; train_edge_recon_loss: 997.9140; train_gene_expr_recon_loss: 40338.8970; train_lambda_latent_adj_recon_loss: 258.0181; train_lambda_latent_contrastive_instanceloss: 7.0766; train_lambda_latent_contrastive_clusterloss: 2.0515; train_gene_expr_mmd_loss: 14.6231; train_global_loss: 41741.2202; train_optim_loss: 41741.2202; val_kl_reg_loss: 118.2444; val_edge_recon_loss: 960.4868; val_gene_expr_recon_loss: 40173.2214; val_lambda_latent_adj_recon_loss: 220.3683; val_lambda_latent_contrastive_instanceloss: 7.0556; val_lambda_latent_contrastive_clusterloss: 2.0070; val_gene_expr_mmd_loss: 13.9247; val_global_loss: 41495.3084; val_optim_loss: 41495.3084
Epoch 11/100 |██------------------| 11.0% val_auroc_score: 0.8825; val_auprc_score: 0.9030; val_best_acc_score: 0.8219; val_best_f1_score: 0.8130; train_kl_reg_loss: 122.8299; train_edge_recon_loss: 996.3487; train_gene_expr_recon_loss: 40250.3881; train_lambda_latent_adj_recon_loss: 256.4358; train_lambda_latent_contrastive_instanceloss: 7.0745; train_lambda_latent_contrastive_clusterloss: 2.0452; train_gene_expr_mmd_loss: 14.5680; train_global_loss: 41649.6901; train_optim_loss: 41649.6901; val_kl_reg_loss: 120.2157; val_edge_recon_loss: 958.8080; val_gene_expr_recon_loss: 39912.1292; val_lambda_latent_adj_recon_loss: 223.5689; val_lambda_latent_contrastive_instanceloss: 7.0534; val_lambda_latent_contrastive_clusterloss: 1.9954; val_gene_expr_mmd_loss: 14.0169; val_global_loss: 41237.7881; val_optim_loss: 41237.7881
Epoch 12/100 |██------------------| 12.0% val_auroc_score: 0.8828; val_auprc_score: 0.9032; val_best_acc_score: 0.8235; val_best_f1_score: 0.8141; train_kl_reg_loss: 124.1679; train_edge_recon_loss: 995.9008; train_gene_expr_recon_loss: 40249.7243; train_lambda_latent_adj_recon_loss: 253.2288; train_lambda_latent_contrastive_instanceloss: 7.0748; train_lambda_latent_contrastive_clusterloss: 2.0415; train_gene_expr_mmd_loss: 14.5612; train_global_loss: 41646.6992; train_optim_loss: 41646.6992; val_kl_reg_loss: 120.0959; val_edge_recon_loss: 959.2712; val_gene_expr_recon_loss: 39617.0432; val_lambda_latent_adj_recon_loss: 220.9357; val_lambda_latent_contrastive_instanceloss: 7.0555; val_lambda_latent_contrastive_clusterloss: 1.9976; val_gene_expr_mmd_loss: 14.7909; val_global_loss: 40941.1897; val_optim_loss: 40941.1897
Epoch 13/100 |██------------------| 13.0% val_auroc_score: 0.8850; val_auprc_score: 0.9056; val_best_acc_score: 0.8243; val_best_f1_score: 0.8148; train_kl_reg_loss: 124.8412; train_edge_recon_loss: 996.0497; train_gene_expr_recon_loss: 40157.4104; train_lambda_latent_adj_recon_loss: 249.0228; train_lambda_latent_contrastive_instanceloss: 7.0724; train_lambda_latent_contrastive_clusterloss: 2.0370; train_gene_expr_mmd_loss: 14.3746; train_global_loss: 41550.8081; train_optim_loss: 41550.8081; val_kl_reg_loss: 123.5154; val_edge_recon_loss: 957.3145; val_gene_expr_recon_loss: 39554.3270; val_lambda_latent_adj_recon_loss: 220.8270; val_lambda_latent_contrastive_instanceloss: 7.0487; val_lambda_latent_contrastive_clusterloss: 1.9850; val_gene_expr_mmd_loss: 13.9886; val_global_loss: 40879.0068; val_optim_loss: 40879.0068
Epoch 14/100 |██------------------| 14.0% val_auroc_score: 0.8834; val_auprc_score: 0.9037; val_best_acc_score: 0.8228; val_best_f1_score: 0.8139; train_kl_reg_loss: 125.9847; train_edge_recon_loss: 996.2861; train_gene_expr_recon_loss: 40070.2250; train_lambda_latent_adj_recon_loss: 245.8402; train_lambda_latent_contrastive_instanceloss: 7.0713; train_lambda_latent_contrastive_clusterloss: 2.0327; train_gene_expr_mmd_loss: 14.2214; train_global_loss: 41461.6612; train_optim_loss: 41461.6612; val_kl_reg_loss: 126.0574; val_edge_recon_loss: 960.3616; val_gene_expr_recon_loss: 39918.9891; val_lambda_latent_adj_recon_loss: 218.6893; val_lambda_latent_contrastive_instanceloss: 7.0480; val_lambda_latent_contrastive_clusterloss: 1.9905; val_gene_expr_mmd_loss: 13.7169; val_global_loss: 41246.8535; val_optim_loss: 41246.8535
Epoch 15/100 |███-----------------| 15.0% val_auroc_score: 0.8831; val_auprc_score: 0.9035; val_best_acc_score: 0.8226; val_best_f1_score: 0.8137; train_kl_reg_loss: 126.5583; train_edge_recon_loss: 996.0118; train_gene_expr_recon_loss: 40081.6660; train_lambda_latent_adj_recon_loss: 241.8486; train_lambda_latent_contrastive_instanceloss: 7.0696; train_lambda_latent_contrastive_clusterloss: 2.0313; train_gene_expr_mmd_loss: 14.1293; train_global_loss: 41469.3147; train_optim_loss: 41469.3147; val_kl_reg_loss: 124.2741; val_edge_recon_loss: 959.5414; val_gene_expr_recon_loss: 40137.0021; val_lambda_latent_adj_recon_loss: 214.3165; val_lambda_latent_contrastive_instanceloss: 7.0494; val_lambda_latent_contrastive_clusterloss: 1.9897; val_gene_expr_mmd_loss: 13.3212; val_global_loss: 41457.4948; val_optim_loss: 41457.4948
Epoch 16/100 |███-----------------| 16.0% val_auroc_score: 0.8842; val_auprc_score: 0.9051; val_best_acc_score: 0.8245; val_best_f1_score: 0.8156; train_kl_reg_loss: 127.3703; train_edge_recon_loss: 995.6445; train_gene_expr_recon_loss: 40074.8821; train_lambda_latent_adj_recon_loss: 242.1269; train_lambda_latent_contrastive_instanceloss: 7.0701; train_lambda_latent_contrastive_clusterloss: 2.0292; train_gene_expr_mmd_loss: 14.1570; train_global_loss: 41463.2801; train_optim_loss: 41463.2801; val_kl_reg_loss: 124.3662; val_edge_recon_loss: 958.4993; val_gene_expr_recon_loss: 40291.8340; val_lambda_latent_adj_recon_loss: 215.9983; val_lambda_latent_contrastive_instanceloss: 7.0508; val_lambda_latent_contrastive_clusterloss: 1.9820; val_gene_expr_mmd_loss: 14.0337; val_global_loss: 41613.7638; val_optim_loss: 41613.7638
Epoch 17/100 |███-----------------| 17.0% val_auroc_score: 0.8844; val_auprc_score: 0.9049; val_best_acc_score: 0.8238; val_best_f1_score: 0.8149; train_kl_reg_loss: 127.2085; train_edge_recon_loss: 995.0348; train_gene_expr_recon_loss: 40000.2266; train_lambda_latent_adj_recon_loss: 241.0106; train_lambda_latent_contrastive_instanceloss: 7.0690; train_lambda_latent_contrastive_clusterloss: 2.0273; train_gene_expr_mmd_loss: 14.0625; train_global_loss: 41386.6393; train_optim_loss: 41386.6393; val_kl_reg_loss: 124.8090; val_edge_recon_loss: 959.1723; val_gene_expr_recon_loss: 40181.2939; val_lambda_latent_adj_recon_loss: 212.2636; val_lambda_latent_contrastive_instanceloss: 7.0464; val_lambda_latent_contrastive_clusterloss: 1.9769; val_gene_expr_mmd_loss: 13.5305; val_global_loss: 41500.0929; val_optim_loss: 41500.0929
Reducing learning rate: metric has not improved more than 0.0 in the last 4 epochs.
New learning rate is 0.0001.
Epoch 18/100 |███-----------------| 18.0% val_auroc_score: 0.8857; val_auprc_score: 0.9064; val_best_acc_score: 0.8248; val_best_f1_score: 0.8163; train_kl_reg_loss: 127.9286; train_edge_recon_loss: 995.1901; train_gene_expr_recon_loss: 39855.5609; train_lambda_latent_adj_recon_loss: 237.4666; train_lambda_latent_contrastive_instanceloss: 7.0648; train_lambda_latent_contrastive_clusterloss: 2.0175; train_gene_expr_mmd_loss: 14.0095; train_global_loss: 41239.2381; train_optim_loss: 41239.2381; val_kl_reg_loss: 125.8056; val_edge_recon_loss: 958.7944; val_gene_expr_recon_loss: 40000.1664; val_lambda_latent_adj_recon_loss: 212.8337; val_lambda_latent_contrastive_instanceloss: 7.0473; val_lambda_latent_contrastive_clusterloss: 1.9812; val_gene_expr_mmd_loss: 13.3857; val_global_loss: 41320.0141; val_optim_loss: 41320.0141
Epoch 19/100 |███-----------------| 19.0% val_auroc_score: 0.8861; val_auprc_score: 0.9061; val_best_acc_score: 0.8256; val_best_f1_score: 0.8170; train_kl_reg_loss: 128.0611; train_edge_recon_loss: 994.7671; train_gene_expr_recon_loss: 39850.2294; train_lambda_latent_adj_recon_loss: 237.9947; train_lambda_latent_contrastive_instanceloss: 7.0638; train_lambda_latent_contrastive_clusterloss: 2.0140; train_gene_expr_mmd_loss: 13.9990; train_global_loss: 41234.1290; train_optim_loss: 41234.1290; val_kl_reg_loss: 126.9382; val_edge_recon_loss: 958.9486; val_gene_expr_recon_loss: 40142.3609; val_lambda_latent_adj_recon_loss: 211.6432; val_lambda_latent_contrastive_instanceloss: 7.0445; val_lambda_latent_contrastive_clusterloss: 1.9708; val_gene_expr_mmd_loss: 13.4086; val_global_loss: 41462.3150; val_optim_loss: 41462.3150
Epoch 20/100 |████----------------| 20.0% val_auroc_score: 0.8856; val_auprc_score: 0.9063; val_best_acc_score: 0.8249; val_best_f1_score: 0.8163; train_kl_reg_loss: 127.9389; train_edge_recon_loss: 994.9502; train_gene_expr_recon_loss: 39805.0707; train_lambda_latent_adj_recon_loss: 237.5645; train_lambda_latent_contrastive_instanceloss: 7.0639; train_lambda_latent_contrastive_clusterloss: 2.0136; train_gene_expr_mmd_loss: 13.8008; train_global_loss: 41188.4026; train_optim_loss: 41188.4026; val_kl_reg_loss: 126.2667; val_edge_recon_loss: 959.1629; val_gene_expr_recon_loss: 39885.2235; val_lambda_latent_adj_recon_loss: 212.3516; val_lambda_latent_contrastive_instanceloss: 7.0430; val_lambda_latent_contrastive_clusterloss: 1.9709; val_gene_expr_mmd_loss: 13.7071; val_global_loss: 41205.7261; val_optim_loss: 41205.7261
Epoch 21/100 |████----------------| 21.0% val_auroc_score: 0.8862; val_auprc_score: 0.9068; val_best_acc_score: 0.8263; val_best_f1_score: 0.8176; train_kl_reg_loss: 127.8277; train_edge_recon_loss: 995.0876; train_gene_expr_recon_loss: 39805.9191; train_lambda_latent_adj_recon_loss: 236.8815; train_lambda_latent_contrastive_instanceloss: 7.0626; train_lambda_latent_contrastive_clusterloss: 2.0119; train_gene_expr_mmd_loss: 13.8887; train_global_loss: 41188.6788; train_optim_loss: 41188.6788; val_kl_reg_loss: 124.9811; val_edge_recon_loss: 957.8342; val_gene_expr_recon_loss: 39828.1732; val_lambda_latent_adj_recon_loss: 211.5395; val_lambda_latent_contrastive_instanceloss: 7.0465; val_lambda_latent_contrastive_clusterloss: 1.9783; val_gene_expr_mmd_loss: 13.6349; val_global_loss: 41145.1882; val_optim_loss: 41145.1882
Stopping early: metric has not improved more than 0.0 in the last 8 epochs.
If the early stopping criterion is too strong, please instantiate it with different parameters in the train method.
Model training finished after 48 min 50 sec.
Using best model state, which was in epoch 13.
--- MODEL EVALUATION ---
val AUROC score: 0.8853
val AUPRC score: 0.9060
val best accuracy score: 0.8247
val best F1 score: 0.8155
val MSE score: 1.9793
[26]:
# Compute latent neighbor graph
latent_key = 'garfield_latent'
sc.pp.neighbors(model.adata,
use_rep=latent_key,
key_added=latent_key)
# Compute UMAP embedding
sc.tl.umap(model.adata,
neighbors_key=latent_key)
[27]:
# Compute latent Leiden clustering
latent_leiden_resolution = 0.8
latent_cluster_key = f"latent_leiden_{str(latent_leiden_resolution)}"
latent_key = "garfield_latent"
sc.tl.leiden(adata=model.adata,
resolution=latent_leiden_resolution,
key_added=latent_cluster_key,
neighbors_key=latent_key)
len(model.adata.obs[latent_cluster_key].unique())
[27]:
12
Visualize Garfield Latent Space
[29]:
sc.settings.set_figure_params(dpi=100, facecolor='white')
sc.pl.umap(model.adata, color=['replicates', latent_cluster_key],
s=10, show=False, ncols=2, wspace=0.5) # , legend_loc='on data'
[29]:
[<Axes: title={'center': 'batch'}, xlabel='UMAP1', ylabel='UMAP2'>,
<Axes: title={'center': 'latent_leiden_0.5'}, xlabel='UMAP1', ylabel='UMAP2'>]
[32]:
sc.settings.set_figure_params(dpi=100, facecolor='white')
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 2, figsize=(8, 4), gridspec_kw={'wspace': 0.3, 'hspace': 0.2})
sc.pl.embedding(model.adata[model.adata.obs['replicates'] == 'Rep_1'],
basis='spatial', color=latent_cluster_key,
title='Replicate-1', s=20, show=False,
ax=ax[0], frameon=False)
sc.pl.embedding(model.adata[model.adata.obs['replicates'] == 'Rep_2'],
basis='spatial', color=latent_cluster_key,
title='Replicate-2', s=20, show=False,
ax=ax[1], frameon=False)
plt.tight_layout(w_pad=0.3)
Save model results
[21]:
# Save trained model
model_folder_path = f"{workdir}/model"
os.makedirs(model_folder_path, exist_ok=True)
model.save(dir_path=model_folder_path,
overwrite=True,
save_adata=True,
adata_file_name="adata_concat.h5ad")
Model saved successfully using pickle at /pri_exthome/zhouwg/project/Garfield_benchmark/results/sp_unimodal/spRNA_xenium_human_breast/model/attr.pkl
[3]:
from Garfield.model import Garfield
workdir = f'/pri_exthome/zhouwg/project/Garfield_benchmark/results/sp_unimodal/spRNA_xenium_human_breast'
gf.settings.set_workdir(workdir)
model_folder_path = f"{workdir}/model"
model = Garfield.load(dir_path=model_folder_path,
adata_file_name="adata_concat.h5ad")
Saving results in: /pri_exthome/zhouwg/project/Garfield_benchmark/results/sp_unimodal/spRNA_xenium_human_breast
Model loaded successfully using pickle from /pri_exthome/zhouwg/project/Garfield_benchmark/results/sp_unimodal/spRNA_xenium_human_breast/model/attr.pkl
AnnData object with n_obs × n_vars = 282363 × 313
obs: 'cell_id', 'batch', 'replicates', 'n_genes', 'latent_leiden_0.5', 'latent_leiden_0.4', 'latent_leiden_0.6', 'latent_leiden_0.7', 'latent_leiden_0.8', 'Cluster'
var: 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'mean', 'std'
uns: 'batch_colors', 'garfield_latent', 'hvg', 'latent_leiden_0.4', 'latent_leiden_0.5', 'latent_leiden_0.5_colors', 'latent_leiden_0.6', 'latent_leiden_0.7', 'latent_leiden_0.8', 'log1p', 'neighbors', 'pca', 'umap'
obsm: 'X_pca', 'X_umap', 'feat', 'garfield_latent', 'spatial'
varm: 'PCs'
layers: 'counts', 'norm_data'
obsp: 'connectivities', 'distances', 'garfield_latent_connectivities', 'garfield_latent_distances', 'spatial_connectivities'
--- DATA LOADING AND PREPROCESSING ---
COSINE SIM GRAPH DECODER -> dropout_rate: 0.2
Load original celltype labels
[4]:
import pandas as pd
# 读取xlsx文件
root_dir = '/pri_exthome/zhouwg/project/spatial_data/bronze/xenium_human_breast_cancer'
data1 = pd.read_excel(os.path.join(root_dir, 'Cell_Barcode_Type_Matrices.xlsx'),
sheet_name='Xenium R1 Fig1-5 (supervised)', index_col=0)
# 生成新的行名
data1.index = [f"{name}-Rep_1-0" for name in data1.index]
data2 = pd.read_excel(os.path.join(root_dir, 'Cell_Barcode_Type_Matrices.xlsx'),
sheet_name='Xenium R2 Fig1-5 (supervised)', index_col=0)
# 生成新的行名
data2.index = [f"{name}-Rep_2-1" for name in data2.index]
data = pd.concat([data1, data2])
[5]:
model.adata.obs['Cluster'].value_counts()
[5]:
Cluster
Stromal 80059
Invasive_Tumor 50133
DCIS_2 21707
DCIS_1 20094
Macrophages_1 19226
Endothelial 15630
CD4+_T_Cells 14393
Myoepi_ACTA2+ 13140
CD8+_T_Cells 11888
B_Cells 8779
Unlabeled 7869
Prolif_Invasive_Tumor 5626
Myoepi_KRT15+ 5507
Macrophages_2 3202
Perivascular-Like 1396
Stromal_&_T_Cell_Hybrid 1151
IRF7+_DCs 894
T_Cell_&_Tumor_Hybrid 837
LAMP3+_DCs 504
Mast_Cells 328
Name: count, dtype: int64
[6]:
sc.settings.set_figure_params(dpi=100, facecolor='white')
sc.pl.umap(model.adata[model.adata.obs['replicates'].isin(['Rep_2']), :],
color=['Cluster'], s=10, show=False, ncols=2, legend_fontsize=8,
wspace=0.3, legend_loc='on data')
[6]:
<Axes: title={'center': 'Cluster'}, xlabel='UMAP1', ylabel='UMAP2'>
[7]:
### dendrogram analysis
sc.tl.dendrogram(adata=model.adata,
use_rep="garfield_latent",
linkage_method="single",
groupby="latent_leiden_0.8")
[8]:
from matplotlib import pyplot as plt
import matplotlib
fig, (ax) = plt.subplots(1, 1, figsize=(3, 8))
sc.pl.dendrogram(
adata=model.adata,
groupby="latent_leiden_0.8",
orientation="left",
ax=ax)
[8]:
<Axes: >
Niche characterization
[10]:
# Visualize the distribution of multiple cell types in space
from Garfield.analysis import calc_neighbor_prop
## Rep_1
n = 25 # 每个细胞计算的邻居数
batch_key = 'replicates' # 用于区分批次的列
celltye_key = 'Cluster' # 用于表示细胞类型的列
adata = model.adata.copy()
adata = adata[adata.obs[batch_key] == 'Rep_1']
adata = calc_neighbor_prop(
adata,
batch_key='replicates',
celltype_key='Cluster',
n_neighbors=25,
spatial_key='spatial',
output_key=None
)
[11]:
from matplotlib import rcParams
from Garfield.plot import plot_multi_patterns_spatial
rcParams['axes.grid'] = False
sc.settings.set_figure_params(dpi=100, facecolor='white')
# select cell types for visualization
select_celltype = ['CD8+_T_Cells', 'CD4+_T_Cells', 'Invasive_Tumor', 'IRF7+_DCs']
# obtain the celltype proportion
prop = adata.obsm[f'k{n}_neighbours_celltype_normalized']
prop_sub = prop.loc[:, prop.columns.isin(select_celltype)]
fig = plot_multi_patterns_spatial(
adata,
prop_sub,
basis="spatial",
display_zeros=True,
spot_size=2)
/home/zhouwg/project/Garfield/Garfield/plot/spatial_plot.py:372: RuntimeWarning: invalid value encountered in divide
weighted_colors_ryb = (colors_ryb * kernel_weights).sum(
Niche functional enrichment analysis
[12]:
from Garfield.analysis import calc_marker_stats,filter_marker_stats,aggregate_top_markers
niche_key = 'latent_leiden_0.8'
frac_df, mean_df, stats_df = calc_marker_stats(model.adata,
groupby=niche_key,
use_raw=False)
[13]:
filtered_stats_df = filter_marker_stats(stats_df, use_raw=False)
filtered_stats_df
[13]:
| top_frac_group | top_frac | frac_diff | max_frac_diff | top_mean_group | top_mean | mean_diff | max_mean_diff | |
|---|---|---|---|---|---|---|---|---|
| LUM | 0 | 0.996524 | 0.089779 | 0.157525 | 0 | 23.035275 | 14.295960 | 15.823607 |
| POSTN | 0 | 0.981910 | 0.173626 | 0.211133 | 0 | 19.802172 | 13.911303 | 13.971071 |
| CXCL12 | 0 | 0.921449 | 0.116930 | 0.134596 | 0 | 10.290223 | 6.076733 | 6.093116 |
| CCDC80 | 0 | 0.942203 | 0.189961 | 0.316198 | 0 | 9.114605 | 5.254186 | 6.262940 |
| MMP2 | 0 | 0.900898 | 0.343140 | 0.374988 | 0 | 5.896849 | 4.210658 | 4.382273 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| CDC42EP1 | 13 | 0.340711 | 0.144270 | 0.214085 | 13 | 0.482519 | 0.256816 | 0.341248 |
| C5orf46 | 13 | 0.258892 | 0.074330 | 0.228083 | 13 | 0.470308 | 0.179277 | 0.432408 |
| LIF | 13 | 0.137994 | 0.092833 | 0.113404 | 13 | 0.231109 | 0.167806 | 0.200844 |
| TUBB2B | 13 | 0.330179 | 0.064643 | 0.079973 | 13 | 0.487557 | 0.148570 | 0.178164 |
| CXCL16 | 13 | 0.562815 | 0.088826 | 0.231686 | 13 | 0.988393 | 0.142197 | 0.533212 |
215 rows × 8 columns
[14]:
final_markers = aggregate_top_markers(model.adata, filtered_stats_df,
groupby=niche_key, n_genes=100, use_raw=False)
final_markers
WARNING: It seems you use rank_genes_groups on the raw count data. Please logarithmize your data before calling rank_genes_groups.
[14]:
| genes | cluster | top_frac | frac_diff | max_frac_diff | top_mean_group | top_mean | mean_diff | max_mean_diff | logfoldchanges | pvals | pvals_adj | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | LUM | 0 | 0.996524 | 0.089779 | 0.157525 | 0 | 23.035275 | 14.295960 | 15.823607 | 28.016331 | 0.000000e+00 | 0.000000e+00 |
| 1 | POSTN | 0 | 0.981910 | 0.173626 | 0.211133 | 0 | 19.802172 | 13.911303 | 13.971071 | 24.424164 | 0.000000e+00 | 0.000000e+00 |
| 2 | CXCL12 | 0 | 0.921449 | 0.116930 | 0.134596 | 0 | 10.290223 | 6.076733 | 6.093116 | 12.798749 | 0.000000e+00 | 0.000000e+00 |
| 3 | CCDC80 | 0 | 0.942203 | 0.189961 | 0.316198 | 0 | 9.114605 | 5.254186 | 6.262940 | 11.820065 | 0.000000e+00 | 0.000000e+00 |
| 4 | MMP2 | 0 | 0.900898 | 0.343140 | 0.374988 | 0 | 5.896849 | 4.210658 | 4.382273 | 8.532184 | 0.000000e+00 | 0.000000e+00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 210 | CDC42EP1 | 13 | 0.340711 | 0.144270 | 0.214085 | 13 | 0.482519 | 0.256816 | 0.341248 | 2.660414 | 1.326514e-307 | 3.204500e-307 |
| 211 | C5orf46 | 13 | 0.258892 | 0.074330 | 0.228083 | 13 | 0.470308 | 0.179277 | 0.432408 | 4.358301 | 5.843159e-235 | 1.243841e-234 |
| 212 | LIF | 13 | 0.137994 | 0.092833 | 0.113404 | 13 | 0.231109 | 0.167806 | 0.200844 | 3.344476 | 3.511281e-101 | 5.066613e-101 |
| 213 | TUBB2B | 13 | 0.330179 | 0.064643 | 0.079973 | 13 | 0.487557 | 0.148570 | 0.178164 | 2.013302 | 4.539250e-213 | 9.036470e-213 |
| 214 | CXCL16 | 13 | 0.562815 | 0.088826 | 0.231686 | 13 | 0.988393 | 0.142197 | 0.533212 | 2.198404 | 0.000000e+00 | 0.000000e+00 |
215 rows × 12 columns
[15]:
# enrichment
from Garfield.analysis import get_enrichr_geneset,get_niche_enrichr,get_fast_niche_enrichr
get_enrichr_geneset(organism="Human")
[15]:
['ARCHS4_Cell-lines',
'ARCHS4_IDG_Coexp',
'ARCHS4_Kinases_Coexp',
'ARCHS4_TFs_Coexp',
'ARCHS4_Tissues',
'Achilles_fitness_decrease',
'Achilles_fitness_increase',
'Aging_Perturbations_from_GEO_down',
'Aging_Perturbations_from_GEO_up',
'Allen_Brain_Atlas_10x_scRNA_2021',
'Allen_Brain_Atlas_down',
'Allen_Brain_Atlas_up',
'Azimuth_2023',
'Azimuth_Cell_Types_2021',
'BioCarta_2013',
'BioCarta_2015',
'BioCarta_2016',
'BioPlanet_2019',
'BioPlex_2017',
'CCLE_Proteomics_2020',
'CORUM',
'COVID-19_Related_Gene_Sets',
'COVID-19_Related_Gene_Sets_2021',
'Cancer_Cell_Line_Encyclopedia',
'CellMarker_2024',
'CellMarker_Augmented_2021',
'ChEA_2013',
'ChEA_2015',
'ChEA_2016',
'ChEA_2022',
'Chromosome_Location',
'Chromosome_Location_hg19',
'ClinVar_2019',
'DGIdb_Drug_Targets_2024',
'DSigDB',
'Data_Acquisition_Method_Most_Popular_Genes',
'DepMap_CRISPR_GeneDependency_CellLines_2023',
'DepMap_WG_CRISPR_Screens_Broad_CellLines_2019',
'DepMap_WG_CRISPR_Screens_Sanger_CellLines_2019',
'Descartes_Cell_Types_and_Tissue_2021',
'Diabetes_Perturbations_GEO_2022',
'DisGeNET',
'Disease_Perturbations_from_GEO_down',
'Disease_Perturbations_from_GEO_up',
'Disease_Signatures_from_GEO_down_2014',
'Disease_Signatures_from_GEO_up_2014',
'DrugMatrix',
'Drug_Perturbations_from_GEO_2014',
'Drug_Perturbations_from_GEO_down',
'Drug_Perturbations_from_GEO_up',
'ENCODE_Histone_Modifications_2013',
'ENCODE_Histone_Modifications_2015',
'ENCODE_TF_ChIP-seq_2014',
'ENCODE_TF_ChIP-seq_2015',
'ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X',
'ESCAPE',
'Elsevier_Pathway_Collection',
'Enrichr_Libraries_Most_Popular_Genes',
'Enrichr_Submissions_TF-Gene_Coocurrence',
'Enrichr_Users_Contributed_Lists_2020',
'Epigenomics_Roadmap_HM_ChIP-seq',
'FANTOM6_lncRNA_KD_DEGs',
'GO_Biological_Process_2013',
'GO_Biological_Process_2015',
'GO_Biological_Process_2017',
'GO_Biological_Process_2017b',
'GO_Biological_Process_2018',
'GO_Biological_Process_2021',
'GO_Biological_Process_2023',
'GO_Cellular_Component_2013',
'GO_Cellular_Component_2015',
'GO_Cellular_Component_2017',
'GO_Cellular_Component_2017b',
'GO_Cellular_Component_2018',
'GO_Cellular_Component_2021',
'GO_Cellular_Component_2023',
'GO_Molecular_Function_2013',
'GO_Molecular_Function_2015',
'GO_Molecular_Function_2017',
'GO_Molecular_Function_2017b',
'GO_Molecular_Function_2018',
'GO_Molecular_Function_2021',
'GO_Molecular_Function_2023',
'GTEx_Aging_Signatures_2021',
'GTEx_Tissue_Expression_Down',
'GTEx_Tissue_Expression_Up',
'GTEx_Tissues_V8_2023',
'GWAS_Catalog_2019',
'GWAS_Catalog_2023',
'GeDiPNet_2023',
'GeneSigDB',
'Gene_Perturbations_from_GEO_down',
'Gene_Perturbations_from_GEO_up',
'Genes_Associated_with_NIH_Grants',
'Genome_Browser_PWMs',
'GlyGen_Glycosylated_Proteins_2022',
'HDSigDB_Human_2021',
'HDSigDB_Mouse_2021',
'HMDB_Metabolites',
'HMS_LINCS_KinomeScan',
'HomoloGene',
'HuBMAP_ASCT_plus_B_augmented_w_RNAseq_Coexpression',
'HuBMAP_ASCTplusB_augmented_2022',
'HumanCyc_2015',
'HumanCyc_2016',
'Human_Gene_Atlas',
'Human_Phenotype_Ontology',
'IDG_Drug_Targets_2022',
'InterPro_Domains_2019',
'Jensen_COMPARTMENTS',
'Jensen_DISEASES',
'Jensen_TISSUES',
'KEA_2013',
'KEA_2015',
'KEGG_2013',
'KEGG_2015',
'KEGG_2016',
'KEGG_2019_Human',
'KEGG_2019_Mouse',
'KEGG_2021_Human',
'KOMP2_Mouse_Phenotypes_2022',
'Kinase_Perturbations_from_GEO_down',
'Kinase_Perturbations_from_GEO_up',
'L1000_Kinase_and_GPCR_Perturbations_down',
'L1000_Kinase_and_GPCR_Perturbations_up',
'LINCS_L1000_CRISPR_KO_Consensus_Sigs',
'LINCS_L1000_Chem_Pert_Consensus_Sigs',
'LINCS_L1000_Chem_Pert_down',
'LINCS_L1000_Chem_Pert_up',
'LINCS_L1000_Ligand_Perturbations_down',
'LINCS_L1000_Ligand_Perturbations_up',
'Ligand_Perturbations_from_GEO_down',
'Ligand_Perturbations_from_GEO_up',
'MAGMA_Drugs_and_Diseases',
'MAGNET_2023',
'MCF7_Perturbations_from_GEO_down',
'MCF7_Perturbations_from_GEO_up',
'MGI_Mammalian_Phenotype_2013',
'MGI_Mammalian_Phenotype_2017',
'MGI_Mammalian_Phenotype_Level_3',
'MGI_Mammalian_Phenotype_Level_4',
'MGI_Mammalian_Phenotype_Level_4_2019',
'MGI_Mammalian_Phenotype_Level_4_2021',
'MGI_Mammalian_Phenotype_Level_4_2024',
'MSigDB_Computational',
'MSigDB_Hallmark_2020',
'MSigDB_Oncogenic_Signatures',
'Metabolomics_Workbench_Metabolites_2022',
'Microbe_Perturbations_from_GEO_down',
'Microbe_Perturbations_from_GEO_up',
'MoTrPAC_2023',
'Mouse_Gene_Atlas',
'NCI-60_Cancer_Cell_Lines',
'NCI-Nature_2016',
'NIH_Funded_PIs_2017_AutoRIF_ARCHS4_Predictions',
'NIH_Funded_PIs_2017_GeneRIF_ARCHS4_Predictions',
'NIH_Funded_PIs_2017_Human_AutoRIF',
'NIH_Funded_PIs_2017_Human_GeneRIF',
'NURSA_Human_Endogenous_Complexome',
'OMIM_Disease',
'OMIM_Expanded',
'Old_CMAP_down',
'Old_CMAP_up',
'Orphanet_Augmented_2021',
'PFOCR_Pathways',
'PFOCR_Pathways_2023',
'PPI_Hub_Proteins',
'PanglaoDB_Augmented_2021',
'Panther_2015',
'Panther_2016',
'PerturbAtlas',
'Pfam_Domains_2019',
'Pfam_InterPro_Domains',
'PheWeb_2019',
'PhenGenI_Association_2021',
'Phosphatase_Substrates_from_DEPOD',
'ProteomicsDB_2020',
'Proteomics_Drug_Atlas_2023',
'RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO',
'RNAseq_Automatic_GEO_Signatures_Human_Down',
'RNAseq_Automatic_GEO_Signatures_Human_Up',
'RNAseq_Automatic_GEO_Signatures_Mouse_Down',
'RNAseq_Automatic_GEO_Signatures_Mouse_Up',
'Rare_Diseases_AutoRIF_ARCHS4_Predictions',
'Rare_Diseases_AutoRIF_Gene_Lists',
'Rare_Diseases_GeneRIF_ARCHS4_Predictions',
'Rare_Diseases_GeneRIF_Gene_Lists',
'Reactome_2013',
'Reactome_2015',
'Reactome_2016',
'Reactome_2022',
'Reactome_Pathways_2024',
'Rummagene_kinases',
'Rummagene_signatures',
'Rummagene_transcription_factors',
'SILAC_Phosphoproteomics',
'SubCell_BarCode',
'SynGO_2022',
'SynGO_2024',
'SysMyo_Muscle_Gene_Sets',
'TF-LOF_Expression_from_GEO',
'TF_Perturbations_Followed_by_Expression',
'TG_GATES_2020',
'TRANSFAC_and_JASPAR_PWMs',
'TRRUST_Transcription_Factors_2019',
'Table_Mining_of_CRISPR_Studies',
'Tabula_Muris',
'Tabula_Sapiens',
'TargetScan_microRNA',
'TargetScan_microRNA_2017',
'The_Kinase_Library_2023',
'The_Kinase_Library_2024',
'Tissue_Protein_Expression_from_Human_Proteome_Map',
'Tissue_Protein_Expression_from_ProteomicsDB',
'Transcription_Factor_PPIs',
'UK_Biobank_GWAS_v1',
'Virus-Host_PPI_P-HIPSTer_2020',
'VirusMINT',
'Virus_Perturbations_from_GEO_down',
'Virus_Perturbations_from_GEO_up',
'WikiPathway_2021_Human',
'WikiPathway_2023_Human',
'WikiPathways_2013',
'WikiPathways_2015',
'WikiPathways_2016',
'WikiPathways_2019_Human',
'WikiPathways_2019_Mouse',
'WikiPathways_2024_Human',
'WikiPathways_2024_Mouse',
'dbGaP',
'huMAP',
'lncHUB_lncRNA_Co-Expression',
'miRTarBase_2017']
[16]:
## KEGG analysis
enrich_dict = get_niche_enrichr(final_markers, geneset='KEGG_2019_Human',
niche_column='cluster', niches="all",
organism="human", topn_genes=200)
enrich_dict
[16]:
{'0': Gene_set Term Overlap \
0 KEGG_2019_Human Melanoma 3/72
1 KEGG_2019_Human Glioma 3/75
2 KEGG_2019_Human Prostate cancer 3/97
3 KEGG_2019_Human Pathways in cancer 5/530
4 KEGG_2019_Human Ovarian steroidogenesis 2/49
.. ... ... ...
68 KEGG_2019_Human Chemokine signaling pathway 1/190
69 KEGG_2019_Human Endocytosis 1/244
70 KEGG_2019_Human Cytokine-cytokine receptor interaction 1/294
71 KEGG_2019_Human Human papillomavirus infection 1/330
72 KEGG_2019_Human Neuroactive ligand-receptor interaction 1/338
P-value Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.000123 0.005068 0 0 36.057971
1 0.000139 0.005068 0 0 34.550347
2 0.000297 0.007233 0 0 26.434840
3 0.000639 0.011655 0 0 8.419048
4 0.001985 0.021609 0 0 33.916596
.. ... ... ... ... ...
68 0.227323 0.240501 0 0 4.026048
69 0.282256 0.294353 0 0 3.122824
70 0.329752 0.339041 0 0 2.583355
71 0.362059 0.367088 0 0 2.296469
72 0.369032 0.369032 0 0 2.241041
Combined Score Genes
0 324.649934 PDGFRB;PDGFRA;IGF1
1 306.878965 PDGFRB;PDGFRA;IGF1
2 214.675455 PDGFRB;PDGFRA;IGF1
3 61.931908 PDGFRB;PDGFRA;CXCL12;MMP2;IGF1
4 211.037980 AKR1C3;IGF1
.. ... ...
68 5.964122 CXCL12
69 3.950189 PDGFRA
70 2.866012 CXCL12
71 2.333091 PDGFRB
72 2.234028 TAC1
[73 rows x 10 columns],
'1': Gene_set Term Overlap P-value Adjusted P-value \
0 KEGG_2019_Human ABC transporters 1/45 0.006735 0.006735
Old P-value Old Adjusted P-value Odds Ratio Combined Score Genes
0 0 0 226.738636 1133.789719 ABCC11 ,
'2': Gene_set Term Overlap \
0 KEGG_2019_Human Tuberculosis 6/179
1 KEGG_2019_Human Staphylococcus aureus infection 4/68
2 KEGG_2019_Human Pertussis 4/76
3 KEGG_2019_Human Complement and coagulation cascades 4/79
4 KEGG_2019_Human Hematopoietic cell lineage 4/97
5 KEGG_2019_Human Systemic lupus erythematosus 4/133
6 KEGG_2019_Human Phagosome 4/152
7 KEGG_2019_Human Toll-like receptor signaling pathway 3/104
8 KEGG_2019_Human Natural killer cell mediated cytotoxicity 3/131
9 KEGG_2019_Human Asthma 2/31
10 KEGG_2019_Human Prion diseases 2/35
11 KEGG_2019_Human Cell adhesion molecules (CAMs) 3/145
12 KEGG_2019_Human Transcriptional misregulation in cancer 3/186
13 KEGG_2019_Human Legionellosis 2/55
14 KEGG_2019_Human Acute myeloid leukemia 2/66
15 KEGG_2019_Human Fc epsilon RI signaling pathway 2/68
16 KEGG_2019_Human Leishmaniasis 2/74
17 KEGG_2019_Human Amoebiasis 2/96
18 KEGG_2019_Human T cell receptor signaling pathway 2/101
19 KEGG_2019_Human Chagas disease (American trypanosomiasis) 2/103
20 KEGG_2019_Human Sphingolipid signaling pathway 2/119
21 KEGG_2019_Human Osteoclast differentiation 2/127
22 KEGG_2019_Human Phospholipase D signaling pathway 2/148
23 KEGG_2019_Human Regulation of actin cytoskeleton 2/214
24 KEGG_2019_Human Primary immunodeficiency 1/37
25 KEGG_2019_Human Allograft rejection 1/38
26 KEGG_2019_Human Graft-versus-host disease 1/41
27 KEGG_2019_Human Type I diabetes mellitus 1/43
28 KEGG_2019_Human Intestinal immune network for IgA production 1/48
29 KEGG_2019_Human Cytokine-cytokine receptor interaction 2/294
30 KEGG_2019_Human MAPK signaling pathway 2/295
31 KEGG_2019_Human Cholesterol metabolism 1/50
32 KEGG_2019_Human Autoimmune thyroid disease 1/53
33 KEGG_2019_Human Pathogenic Escherichia coli infection 1/55
34 KEGG_2019_Human Viral myocarditis 1/59
35 KEGG_2019_Human Antigen processing and presentation 1/77
36 KEGG_2019_Human Salmonella infection 1/86
37 KEGG_2019_Human Salivary secretion 1/90
38 KEGG_2019_Human Fc gamma R-mediated phagocytosis 1/91
39 KEGG_2019_Human Rheumatoid arthritis 1/91
40 KEGG_2019_Human Th1 and Th2 cell differentiation 1/92
41 KEGG_2019_Human NF-kappa B signaling pathway 1/95
42 KEGG_2019_Human C-type lectin receptor signaling pathway 1/104
43 KEGG_2019_Human Th17 cell differentiation 1/107
44 KEGG_2019_Human TNF signaling pathway 1/110
45 KEGG_2019_Human Leukocyte transendothelial migration 1/112
46 KEGG_2019_Human Lysosome 1/123
47 KEGG_2019_Human Platelet activation 1/124
48 KEGG_2019_Human Tight junction 1/170
49 KEGG_2019_Human Kaposi sarcoma-associated herpesvirus infection 1/186
50 KEGG_2019_Human Chemokine signaling pathway 1/190
51 KEGG_2019_Human Rap1 signaling pathway 1/206
52 KEGG_2019_Human Human immunodeficiency virus 1 infection 1/212
53 KEGG_2019_Human Human T-cell leukemia virus 1 infection 1/219
P-value Adjusted P-value Old P-value Old Adjusted P-value \
0 1.891859e-07 0.000010 0 0
1 2.721979e-06 0.000067 0 0
2 4.254012e-06 0.000067 0 0
3 4.966980e-06 0.000067 0 0
4 1.124974e-05 0.000121 0 0
5 3.901661e-05 0.000351 0 0
6 6.568305e-05 0.000507 0 0
7 4.523374e-04 0.003053 0 0
8 8.859160e-04 0.004966 0 0
9 9.196678e-04 0.004966 0 0
10 1.172560e-03 0.005345 0 0
11 1.187804e-03 0.005345 0 0
12 2.420398e-03 0.010054 0 0
13 2.874421e-03 0.011087 0 0
14 4.111205e-03 0.014709 0 0
15 4.358303e-03 0.014709 0 0
16 5.139897e-03 0.016327 0 0
17 8.508370e-03 0.025525 0 0
18 9.380632e-03 0.026299 0 0
19 9.740293e-03 0.026299 0 0
20 1.283392e-02 0.033001 0 0
21 1.452129e-02 0.035643 0 0
22 1.937688e-02 0.045494 0 0
23 3.828686e-02 0.086145 0 0
24 5.231877e-02 0.111521 0 0
25 5.369545e-02 0.111521 0 0
26 5.781392e-02 0.115628 0 0
27 6.054995e-02 0.116775 0 0
28 6.735649e-02 0.118146 0 0
29 6.742397e-02 0.118146 0 0
30 6.782454e-02 0.118146 0 0
31 7.006576e-02 0.118236 0 0
32 7.411541e-02 0.121280 0 0
33 7.680572e-02 0.121986 0 0
34 8.216370e-02 0.126767 0 0
35 1.059054e-01 0.158858 0 0
36 1.175527e-01 0.164945 0 0
37 1.226822e-01 0.164945 0 0
38 1.239600e-01 0.164945 0 0
39 1.239600e-01 0.164945 0 0
40 1.252361e-01 0.164945 0 0
41 1.290535e-01 0.165926 0 0
42 1.404096e-01 0.176328 0 0
43 1.441630e-01 0.176537 0 0
44 1.479007e-01 0.176537 0 0
45 1.503837e-01 0.176537 0 0
46 1.639158e-01 0.185778 0 0
47 1.651356e-01 0.185778 0 0
48 2.194272e-01 0.241818 0 0
49 2.374995e-01 0.256187 0 0
50 2.419540e-01 0.256187 0 0
51 2.595223e-01 0.269504 0 0
52 2.660085e-01 0.271028 0 0
53 2.735064e-01 0.273506 0 0
Odds Ratio Combined Score Genes
0 29.853732 462.151763 FCGR3A;ITGAM;FCER1G;MRC1;ITGAX;CD14
1 49.767500 637.728286 C1QA;FCGR3A;ITGAM;C1QC
2 44.220000 546.897390 C1QA;ITGAM;CD14;C1QC
3 42.444800 518.365551 C1QA;ITGAM;ITGAX;C1QC
4 34.198710 389.699967 CD4;ITGAM;CD14;CD1C
5 24.610233 249.831344 CD86;C1QA;FCGR3A;C1QC
6 21.430270 206.387855 FCGR3A;ITGAM;MRC1;CD14
7 22.699924 174.813977 CD86;MAP3K8;CD14
8 17.887320 125.727974 FCGR3A;TYROBP;FCER1G
9 50.937420 356.128872 FCER1G;FCER1A
10 44.754209 302.026738 C1QA;C1QC
11 16.112405 108.527501 CD86;CD4;ITGAM
12 12.476671 75.157259 CD86;ITGAM;CD14
13 27.837876 162.904577 ITGAM;CD14
14 23.040509 126.585458 ITGAM;CD14
15 22.340067 121.433291 FCER1G;FCER1A
16 20.472222 107.903397 FCGR3A;ITGAM
17 15.663515 74.663352 ITGAM;CD14
18 14.868687 69.423507 CD4;MAP3K8
19 14.572791 67.493647 C1QA;C1QC
20 12.569801 54.749826 FCER1G;FCER1A
21 11.760593 49.772466 FCGR3A;TYROBP
22 10.058346 39.666842 FCER1G;FCER1A
23 6.903913 22.525043 ITGAM;ITGAX
24 19.776786 58.349429 CD4
25 19.241313 56.269813 CD86
26 17.795536 50.726631 CD86
27 16.946429 47.522644 CD86
28 15.139818 40.843533 CD86
29 4.992136 13.462566 CX3CR1;CD4
30 4.974845 13.386469 MAP3K8;CD14
31 14.520408 38.599907 APOC1
32 13.680632 35.598807 CD86
33 13.172619 33.807213 CD14
34 12.261700 30.642498 CD86
35 9.349154 20.990809 CD4
36 8.355462 17.887944 CD14
37 7.978331 16.739800 LYZ
38 7.889286 16.471219 FCGR3A
39 7.889286 16.471219 CD86
40 7.802198 16.209491 CD4
41 7.552052 15.463037 CD14
42 6.889043 13.524512 FCER1G
43 6.693059 12.963188 CD4
44 6.507864 12.437923 MAP3K8
45 6.389961 12.106200 ITGAM
46 5.810597 10.507898 CD68
47 5.763066 10.379214 FCER1G
48 4.184700 6.347080 CD1C
49 3.819691 5.491149 CD86
50 3.738095 5.304385 CX3CR1
51 3.443554 4.645053 ITGAM
52 3.344617 4.429033 CD4
53 3.236075 4.195345 CD4 ,
'3': Gene_set Term \
0 KEGG_2019_Human Cytokine-cytokine receptor interaction
1 KEGG_2019_Human Pathways in cancer
2 KEGG_2019_Human Thyroid cancer
3 KEGG_2019_Human Primary immunodeficiency
4 KEGG_2019_Human Malaria
5 KEGG_2019_Human Autoimmune thyroid disease
6 KEGG_2019_Human Endometrial cancer
7 KEGG_2019_Human Basal cell carcinoma
8 KEGG_2019_Human Acute myeloid leukemia
9 KEGG_2019_Human Adherens junction
10 KEGG_2019_Human Arrhythmogenic right ventricular cardiomyopath...
11 KEGG_2019_Human Colorectal cancer
12 KEGG_2019_Human Rheumatoid arthritis
13 KEGG_2019_Human Prostate cancer
14 KEGG_2019_Human Hematopoietic cell lineage
15 KEGG_2019_Human T cell receptor signaling pathway
16 KEGG_2019_Human Melanogenesis
17 KEGG_2019_Human FoxO signaling pathway
18 KEGG_2019_Human Cell adhesion molecules (CAMs)
19 KEGG_2019_Human Breast cancer
20 KEGG_2019_Human Gastric cancer
21 KEGG_2019_Human Cushing syndrome
22 KEGG_2019_Human Wnt signaling pathway
23 KEGG_2019_Human Hippo signaling pathway
24 KEGG_2019_Human JAK-STAT signaling pathway
25 KEGG_2019_Human Hepatocellular carcinoma
26 KEGG_2019_Human Chemokine signaling pathway
27 KEGG_2019_Human Human papillomavirus infection
28 KEGG_2019_Human PI3K-Akt signaling pathway
Overlap P-value Adjusted P-value Old P-value Old Adjusted P-value \
0 2/294 0.002091 0.042645 0 0
1 2/530 0.006647 0.042645 0 0
2 1/37 0.009217 0.042645 0 0
3 1/37 0.009217 0.042645 0 0
4 1/49 0.012191 0.042645 0 0
5 1/53 0.013181 0.042645 0 0
6 1/58 0.014417 0.042645 0 0
7 1/63 0.015653 0.042645 0 0
8 1/66 0.016393 0.042645 0 0
9 1/72 0.017872 0.042645 0 0
10 1/72 0.017872 0.042645 0 0
11 1/86 0.021318 0.042645 0 0
12 1/91 0.022546 0.042645 0 0
13 1/97 0.024018 0.042645 0 0
14 1/97 0.024018 0.042645 0 0
15 1/101 0.024999 0.042645 0 0
16 1/101 0.024999 0.042645 0 0
17 1/132 0.032570 0.046070 0 0
18 1/145 0.035731 0.046070 0 0
19 1/147 0.036217 0.046070 0 0
20 1/149 0.036703 0.046070 0 0
21 1/155 0.038158 0.046070 0 0
22 1/158 0.038884 0.046070 0 0
23 1/160 0.039369 0.046070 0 0
24 1/162 0.039853 0.046070 0 0
25 1/168 0.041304 0.046070 0 0
26 1/190 0.046610 0.050063 0 0
27 1/330 0.079829 0.082681 0 0
28 1/354 0.085430 0.085430 0 0
Odds Ratio Combined Score Genes
0 44.984018 277.547668 CCR7;IL7R
1 24.579545 123.233099 TCF7;IL7R
2 138.604167 649.602069 TCF7
3 138.604167 649.602069 IL7R
4 103.890625 457.850064 KLRB1
5 95.879808 415.060560 CTLA4
6 87.447368 370.717026 TCF7
7 80.375000 334.128841 TCF7
8 76.653846 315.116590 TCF7
9 70.154930 282.337966 TCF7
10 70.154930 282.337966 TCF7
11 58.558824 225.346680 TCF7
12 55.291667 209.676956 CTLA4
13 51.820312 193.235173 TCF7
14 51.820312 193.235173 IL7R
15 49.737500 183.478498 CTLA4
16 49.737500 183.478498 TCF7
17 37.908397 129.811804 IL7R
18 34.463542 114.822970 CTLA4
19 33.988014 112.779843 TCF7
20 33.525338 110.798233 TCF7
21 32.209416 105.196962 TCF7
22 31.589172 102.575141 TCF7
23 31.188679 100.888598 TCF7
24 30.798137 99.248846 IL7R
25 29.682635 94.592394 TCF7
26 26.198413 80.322554 CCR7
27 14.943769 37.775791 TCF7
28 13.910765 34.221286 IL7R ,
'4': Gene_set Term \
0 KEGG_2019_Human T cell receptor signaling pathway
1 KEGG_2019_Human Hematopoietic cell lineage
2 KEGG_2019_Human Primary immunodeficiency
3 KEGG_2019_Human Chagas disease (American trypanosomiasis)
4 KEGG_2019_Human Antigen processing and presentation
5 KEGG_2019_Human Th1 and Th2 cell differentiation
6 KEGG_2019_Human Th17 cell differentiation
7 KEGG_2019_Human Natural killer cell mediated cytotoxicity
8 KEGG_2019_Human Graft-versus-host disease
9 KEGG_2019_Human Epstein-Barr virus infection
10 KEGG_2019_Human Human immunodeficiency virus 1 infection
11 KEGG_2019_Human Measles
12 KEGG_2019_Human Cell adhesion molecules (CAMs)
13 KEGG_2019_Human Human T-cell leukemia virus 1 infection
14 KEGG_2019_Human Prion diseases
15 KEGG_2019_Human Allograft rejection
16 KEGG_2019_Human Type I diabetes mellitus
17 KEGG_2019_Human Autoimmune thyroid disease
18 KEGG_2019_Human Viral myocarditis
19 KEGG_2019_Human Cytosolic DNA-sensing pathway
20 KEGG_2019_Human Epithelial cell signaling in Helicobacter pylo...
21 KEGG_2019_Human Rheumatoid arthritis
22 KEGG_2019_Human Amoebiasis
23 KEGG_2019_Human Toll-like receptor signaling pathway
24 KEGG_2019_Human TNF signaling pathway
25 KEGG_2019_Human Apoptosis
26 KEGG_2019_Human Influenza A
27 KEGG_2019_Human NOD-like receptor signaling pathway
28 KEGG_2019_Human Chemokine signaling pathway
29 KEGG_2019_Human Human cytomegalovirus infection
30 KEGG_2019_Human Cytokine-cytokine receptor interaction
31 KEGG_2019_Human MAPK signaling pathway
32 KEGG_2019_Human Neuroactive ligand-receptor interaction
33 KEGG_2019_Human Herpes simplex virus 1 infection
Overlap P-value Adjusted P-value Old P-value Old Adjusted P-value \
0 6/101 5.222206e-10 1.775550e-08 0 0
1 5/97 3.538446e-08 4.070210e-07 0 0
2 4/37 4.700205e-08 4.070210e-07 0 0
3 5/103 4.788482e-08 4.070210e-07 0 0
4 4/77 9.387489e-07 6.383493e-06 0 0
5 4/92 1.919736e-06 1.087850e-05 0 0
6 4/107 3.511705e-06 1.705685e-05 0 0
7 4/131 7.851247e-06 3.336780e-05 0 0
8 3/41 8.896824e-06 3.361022e-05 0 0
9 4/201 4.228283e-05 1.437616e-04 0 0
10 4/212 5.203991e-05 1.608506e-04 0 0
11 3/138 3.362160e-04 9.526121e-04 0 0
12 3/145 3.886948e-04 1.016587e-03 0 0
13 3/219 1.286535e-03 3.124443e-03 0 0
14 1/35 3.444008e-02 7.806418e-02 0 0
15 1/38 3.733907e-02 7.934552e-02 0 0
16 1/43 4.215235e-02 8.430470e-02 0 0
17 1/53 5.171041e-02 9.767522e-02 0 0
18 1/59 5.740169e-02 1.027188e-01 0 0
19 1/63 6.117783e-02 1.040023e-01 0 0
20 1/68 6.587779e-02 1.066593e-01 0 0
21 1/91 8.721122e-02 1.347810e-01 0 0
22 1/96 9.178727e-02 1.356855e-01 0 0
23 1/104 9.906366e-02 1.403402e-01 0 0
24 1/110 1.044846e-01 1.420990e-01 0 0
25 1/143 1.337500e-01 1.749038e-01 0 0
26 1/171 1.578665e-01 1.987949e-01 0 0
27 1/178 1.637952e-01 1.988942e-01 0 0
28 1/190 1.738665e-01 2.038435e-01 0 0
29 1/225 2.025872e-01 2.295989e-01 0 0
30 1/294 2.564532e-01 2.732833e-01 0 0
31 1/295 2.572078e-01 2.732833e-01 0 0
32 1/338 2.889776e-01 2.977345e-01 0 0
33 1/492 3.924930e-01 3.924930e-01 0 0
Odds Ratio Combined Score Genes
0 89.706767 1917.296538 CD8B;CD8A;CD3G;CD247;CD3E;CD3D
1 72.057971 1236.298117 CD8B;CD8A;CD3G;CD3E;CD3D
2 151.113636 2549.751663 CD8B;CD8A;CD3E;CD3D
3 67.625850 1139.797678 CCL5;CD3G;CD247;CD3E;CD3D
4 68.174658 946.176833 CD8B;CD8A;KLRD1;KLRC1
5 56.511364 743.877325 CD3G;CD247;CD3E;CD3D
6 48.245146 605.930512 CD3G;CD247;CD3E;CD3D
7 39.080709 459.387408 PRF1;KLRD1;KLRC1;CD247
8 92.609907 1077.036200 PRF1;KLRD1;KLRC1
9 25.105330 252.839031 CD3G;CD247;CD3E;CD3D
10 23.764423 234.400380 CD3G;CD247;CD3E;CD3D
11 25.941176 207.471217 CD3G;CD3E;CD3D
12 24.653687 193.598400 CD8B;CD8A;TIGIT
13 16.147059 107.471636 CD3G;CD3E;CD3D
14 30.876161 104.007406 CCL5
15 28.368421 93.267286 PRF1
16 24.984962 79.114005 PRF1
17 20.170040 59.745598 PRF1
18 18.078040 51.661282 PRF1
19 16.908319 47.241345 CCL5
20 15.642577 42.547087 CCL5
21 11.631579 28.374333 CCL5
22 11.016620 26.310792 SERPINB9
23 10.156873 23.482615 CCL5
24 9.594882 21.672112 CCL5
25 7.352854 14.792346 PRF1
26 6.133127 11.321786 CCL5
27 5.888492 10.653098 CCL5
28 5.511278 9.641801 CCL5
29 4.641917 7.411214 CCL5
30 3.536375 4.812332 CCL5
31 3.524168 4.785365 DUSP2
32 3.067781 3.808361 GZMA
33 2.089077 1.953782 CCL5 ,
'5': Gene_set Term \
0 KEGG_2019_Human Fluid shear stress and atherosclerosis
1 KEGG_2019_Human Leukocyte transendothelial migration
2 KEGG_2019_Human Vascular smooth muscle contraction
3 KEGG_2019_Human PI3K-Akt signaling pathway
4 KEGG_2019_Human Focal adhesion
5 KEGG_2019_Human Rap1 signaling pathway
6 KEGG_2019_Human Renin secretion
7 KEGG_2019_Human Pathways in cancer
8 KEGG_2019_Human HIF-1 signaling pathway
9 KEGG_2019_Human Melanogenesis
10 KEGG_2019_Human Neuroactive ligand-receptor interaction
11 KEGG_2019_Human Relaxin signaling pathway
12 KEGG_2019_Human Cell adhesion molecules (CAMs)
13 KEGG_2019_Human Phospholipase D signaling pathway
14 KEGG_2019_Human Transcriptional misregulation in cancer
15 KEGG_2019_Human Proximal tubule bicarbonate reclamation
16 KEGG_2019_Human Calcium signaling pathway
17 KEGG_2019_Human Proteoglycans in cancer
18 KEGG_2019_Human cAMP signaling pathway
19 KEGG_2019_Human Ras signaling pathway
20 KEGG_2019_Human Thyroid cancer
21 KEGG_2019_Human Malaria
22 KEGG_2019_Human MAPK signaling pathway
23 KEGG_2019_Human VEGF signaling pathway
24 KEGG_2019_Human Viral myocarditis
25 KEGG_2019_Human Long-term potentiation
26 KEGG_2019_Human Adherens junction
27 KEGG_2019_Human Bile secretion
28 KEGG_2019_Human Bacterial invasion of epithelial cells
29 KEGG_2019_Human PPAR signaling pathway
30 KEGG_2019_Human Complement and coagulation cascades
31 KEGG_2019_Human ECM-receptor interaction
32 KEGG_2019_Human Hypertrophic cardiomyopathy (HCM)
33 KEGG_2019_Human Prostate cancer
34 KEGG_2019_Human Hematopoietic cell lineage
35 KEGG_2019_Human AGE-RAGE signaling pathway in diabetic complic...
36 KEGG_2019_Human Longevity regulating pathway
37 KEGG_2019_Human TNF signaling pathway
38 KEGG_2019_Human Serotonergic synapse
39 KEGG_2019_Human AMPK signaling pathway
40 KEGG_2019_Human Platelet activation
41 KEGG_2019_Human Osteoclast differentiation
42 KEGG_2019_Human Apoptosis
43 KEGG_2019_Human Adrenergic signaling in cardiomyocytes
44 KEGG_2019_Human Hepatitis C
45 KEGG_2019_Human Wnt signaling pathway
46 KEGG_2019_Human JAK-STAT signaling pathway
47 KEGG_2019_Human cGMP-PKG signaling pathway
48 KEGG_2019_Human Tight junction
49 KEGG_2019_Human Kaposi sarcoma-associated herpesvirus infection
50 KEGG_2019_Human Huntington disease
51 KEGG_2019_Human Thermogenesis
52 KEGG_2019_Human Endocytosis
53 KEGG_2019_Human Cytokine-cytokine receptor interaction
54 KEGG_2019_Human MicroRNAs in cancer
55 KEGG_2019_Human Human papillomavirus infection
Overlap P-value Adjusted P-value Old P-value Old Adjusted P-value \
0 4/139 0.000069 0.003868 0 0
1 3/112 0.000753 0.021092 0 0
2 3/132 0.001212 0.022619 0 0
3 4/354 0.002346 0.032841 0 0
4 3/199 0.003891 0.040006 0 0
5 3/206 0.004286 0.040006 0 0
6 2/69 0.005442 0.043537 0 0
7 4/530 0.009739 0.063550 0 0
8 2/100 0.011135 0.063550 0 0
9 2/101 0.011348 0.063550 0 0
10 3/338 0.016518 0.084092 0 0
11 2/130 0.018309 0.085442 0 0
12 2/145 0.022461 0.093336 0 0
13 2/148 0.023334 0.093336 0 0
14 2/186 0.035549 0.119406 0 0
15 1/23 0.036179 0.119406 0 0
16 2/188 0.036248 0.119406 0 0
17 2/201 0.040923 0.127316 0 0
18 2/212 0.045047 0.132771 0 0
19 2/232 0.052922 0.148181 0 0
20 1/37 0.057577 0.153537 0 0
21 1/49 0.075551 0.192310 0 0
22 2/295 0.080537 0.196091 0 0
23 1/59 0.090275 0.202215 0 0
24 1/59 0.090275 0.202215 0 0
25 1/67 0.101890 0.208947 0 0
26 1/72 0.109077 0.208947 0 0
27 1/72 0.109077 0.208947 0 0
28 1/74 0.111936 0.208947 0 0
29 1/74 0.111936 0.208947 0 0
30 1/79 0.119045 0.215048 0 0
31 1/82 0.123283 0.215746 0 0
32 1/85 0.127502 0.216368 0 0
33 1/97 0.144183 0.228603 0 0
34 1/97 0.144183 0.228603 0 0
35 1/100 0.148304 0.228603 0 0
36 1/102 0.151041 0.228603 0 0
37 1/110 0.161904 0.238277 0 0
38 1/113 0.165943 0.238277 0 0
39 1/120 0.175293 0.245411 0 0
40 1/124 0.180591 0.246057 0 0
41 1/127 0.184543 0.246057 0 0
42 1/143 0.205308 0.264558 0 0
43 1/145 0.207867 0.264558 0 0
44 1/155 0.220544 0.273072 0 0
45 1/158 0.224309 0.273072 0 0
46 1/162 0.229301 0.273210 0 0
47 1/166 0.234262 0.273306 0 0
48 1/170 0.239192 0.273363 0 0
49 1/186 0.258607 0.289640 0 0
50 1/193 0.266950 0.293121 0 0
51 1/231 0.310674 0.334572 0 0
52 1/244 0.325043 0.343442 0 0
53 1/294 0.377655 0.389651 0 0
54 1/299 0.382692 0.389651 0 0
55 1/330 0.413053 0.413053 0 0
Odds Ratio Combined Score Genes
0 20.987302 201.067277 EDN1;CAV1;PECAM1;KDR
1 18.847517 135.533859 CLDN5;PECAM1;RAPGEF3
2 15.909383 106.842987 EDN1;RAMP2;AVPR1A
3 8.007347 48.485534 ANGPT2;VWF;IL3RA;KDR
4 10.435609 57.906911 VWF;CAV1;KDR
5 10.072193 54.916677 ANGPT2;KDR;RAPGEF3
6 19.801990 103.239516 EDN1;AQP1
7 5.280282 24.456249 EDN1;EDNRB;IL3RA;PPARG
8 13.517007 60.795508 EDN1;ANGPT2
9 13.379798 59.924047 EDN1;EDNRB
10 6.062687 24.877036 EDN1;EDNRB;AVPR1A
11 10.333333 41.337049 EDN1;EDNRB
12 9.242424 35.084094 CLDN5;PECAM1
13 9.051142 34.012819 AVPR1A;RAPGEF3
14 7.168116 23.918863 ZEB1;PPARG
15 29.246334 97.076777 AQP1
16 7.090323 23.521154 EDNRB;AVPR1A
17 6.622781 21.166846 CAV1;KDR
18 6.272381 19.444653 EDN1;RAPGEF3
19 5.721159 16.814131 ANGPT2;KDR
20 17.860215 50.984486 PPARG
21 13.387097 34.578254 PECAM1
22 4.476678 11.276914 ANGPT2;KDR
23 11.073415 26.630430 KDR
24 11.073415 26.630430 CAV1
25 9.727273 22.215716 RAPGEF3
26 9.039982 20.029906 SNAI1
27 9.039982 20.029906 AQP1
28 8.791427 19.251718 CAV1
29 8.791427 19.251718 PPARG
30 8.225806 17.506626 VWF
31 7.919952 16.578593 VWF
32 7.635945 15.727145 EDN1
33 6.677419 12.931993 ZEB1
34 6.677419 12.931993 IL3RA
35 6.474096 12.355753 EDN1
36 6.345257 11.993830 PPARG
37 5.877183 10.700891 EDN1
38 5.718894 10.271775 RAPGEF3
39 5.380591 9.369189 PPARG
40 5.204563 8.907716 VWF
41 5.079877 8.584358 PPARG
42 4.503862 7.130722 IL3RA
43 4.440860 6.975958 RAPGEF3
44 4.150398 6.273978 CLDN5
45 4.070475 6.084264 SOX17
46 3.968543 5.844549 IL3RA
47 3.871554 5.618841 EDNRB
48 3.779156 5.406034 CLDN5
49 3.449520 4.665285 ANGPT2
50 3.322581 4.388117 PPARG
51 2.768303 3.236181 PPARG
52 2.618479 2.942638 CAV1
53 2.166135 2.109324 IL3RA
54 2.129249 2.045194 ZEB1
55 1.925581 1.702561 VWF ,
'6': Gene_set Term \
0 KEGG_2019_Human Proteoglycans in cancer
1 KEGG_2019_Human Leukocyte transendothelial migration
2 KEGG_2019_Human Cell adhesion molecules (CAMs)
3 KEGG_2019_Human beta-Alanine metabolism
4 KEGG_2019_Human Cysteine and methionine metabolism
5 KEGG_2019_Human Endocrine and other factor-regulated calcium r...
6 KEGG_2019_Human Arginine and proline metabolism
7 KEGG_2019_Human Glutathione metabolism
8 KEGG_2019_Human Prolactin signaling pathway
9 KEGG_2019_Human ECM-receptor interaction
10 KEGG_2019_Human Salmonella infection
11 KEGG_2019_Human Thyroid hormone signaling pathway
12 KEGG_2019_Human Estrogen signaling pathway
13 KEGG_2019_Human Fluid shear stress and atherosclerosis
14 KEGG_2019_Human Breast cancer
15 KEGG_2019_Human Hepatitis C
16 KEGG_2019_Human Tight junction
17 KEGG_2019_Human Focal adhesion
18 KEGG_2019_Human MAPK signaling pathway
19 KEGG_2019_Human Pathways in cancer
Overlap P-value Adjusted P-value Old P-value Old Adjusted P-value \
0 3/201 0.000508 0.010169 0 0
1 2/112 0.003543 0.035432 0 0
2 2/145 0.005860 0.039069 0 0
3 1/31 0.024523 0.109718 0 0
4 1/47 0.036958 0.109718 0 0
5 1/48 0.037730 0.109718 0 0
6 1/49 0.038502 0.109718 0 0
7 1/56 0.043887 0.109718 0 0
8 1/70 0.054573 0.121180 0 0
9 1/82 0.063644 0.121180 0 0
10 1/86 0.066649 0.121180 0 0
11 1/116 0.088902 0.146360 0 0
12 1/137 0.104182 0.146360 0 0
13 1/139 0.105625 0.146360 0 0
14 1/147 0.111374 0.146360 0 0
15 1/155 0.117088 0.146360 0 0
16 1/170 0.127709 0.150246 0 0
17 1/199 0.147905 0.164339 0 0
18 1/295 0.211677 0.222818 0 0
19 1/530 0.349414 0.349414 0 0
Odds Ratio Combined Score Genes
0 23.060606 174.894114 SDC4;FLNB;ESR1
1 25.810390 145.640570 CLDN4;RHOH
2 19.821179 101.872011 CLDN4;SDC4
3 44.342222 164.427886 SMS
4 28.895652 95.297145 SMS
5 28.279433 92.680050 ESR1
6 27.688889 90.184078 SMS
7 24.156364 75.515955 SMS
8 19.241546 55.958422 ESR1
9 16.381070 45.120903 SDC4
10 15.607059 42.268827 FLNB
11 11.518261 27.876728 ESR1
12 9.729412 22.004181 ESR1
13 9.587440 21.551245 SDC4
14 9.058447 19.882069 ESR1
15 8.584416 18.412124 CLDN4
16 7.816568 16.086497 CLDN4
17 6.661953 12.732227 FLNB
18 4.464853 6.932553 FLNB
19 2.451796 2.578058 ESR1 ,
'7': Empty DataFrame
Columns: [Gene_set, Term, Overlap, P-value, Adjusted P-value, Old P-value, Old Adjusted P-value, Odds Ratio, Combined Score, Genes]
Index: [],
'8': Gene_set Term Overlap \
0 KEGG_2019_Human Prostate cancer 2/97
1 KEGG_2019_Human Cellular senescence 2/160
2 KEGG_2019_Human Pathways in cancer 3/530
3 KEGG_2019_Human Steroid biosynthesis 1/19
4 KEGG_2019_Human Human cytomegalovirus infection 2/225
5 KEGG_2019_Human Histidine metabolism 1/23
6 KEGG_2019_Human Bladder cancer 1/41
7 KEGG_2019_Human Human papillomavirus infection 2/330
8 KEGG_2019_Human PI3K-Akt signaling pathway 2/354
9 KEGG_2019_Human Pathogenic Escherichia coli infection 1/55
10 KEGG_2019_Human Acute myeloid leukemia 1/66
11 KEGG_2019_Human Aminoacyl-tRNA biosynthesis 1/66
12 KEGG_2019_Human Melanoma 1/72
13 KEGG_2019_Human p53 signaling pathway 1/72
14 KEGG_2019_Human Glioma 1/75
15 KEGG_2019_Human Chronic myeloid leukemia 1/76
16 KEGG_2019_Human ErbB signaling pathway 1/85
17 KEGG_2019_Human Gap junction 1/88
18 KEGG_2019_Human IL-17 signaling pathway 1/93
19 KEGG_2019_Human Small cell lung cancer 1/93
20 KEGG_2019_Human Herpes simplex virus 1 infection 2/492
21 KEGG_2019_Human Choline metabolism in cancer 1/99
22 KEGG_2019_Human HIF-1 signaling pathway 1/100
23 KEGG_2019_Human Longevity regulating pathway 1/102
24 KEGG_2019_Human C-type lectin receptor signaling pathway 1/104
25 KEGG_2019_Human Thyroid hormone signaling pathway 1/116
26 KEGG_2019_Human AMPK signaling pathway 1/120
27 KEGG_2019_Human Cell cycle 1/124
28 KEGG_2019_Human Oocyte meiosis 1/125
29 KEGG_2019_Human FoxO signaling pathway 1/132
30 KEGG_2019_Human Insulin signaling pathway 1/137
31 KEGG_2019_Human Ubiquitin mediated proteolysis 1/137
32 KEGG_2019_Human Apoptosis 1/143
33 KEGG_2019_Human Phagosome 1/152
34 KEGG_2019_Human mTOR signaling pathway 1/152
35 KEGG_2019_Human RNA transport 1/165
36 KEGG_2019_Human Tight junction 1/170
37 KEGG_2019_Human Axon guidance 1/181
38 KEGG_2019_Human Transcriptional misregulation in cancer 1/186
39 KEGG_2019_Human Epstein-Barr virus infection 1/201
40 KEGG_2019_Human Proteoglycans in cancer 1/201
41 KEGG_2019_Human Viral carcinogenesis 1/201
42 KEGG_2019_Human Regulation of actin cytoskeleton 1/214
43 KEGG_2019_Human Endocytosis 1/244
44 KEGG_2019_Human MicroRNAs in cancer 1/299
P-value Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.004604 0.179144 0 0 22.032133
1 0.012088 0.179144 0 0 13.205197
2 0.017255 0.179144 0 0 6.151803
3 0.019771 0.179144 0 0 55.447222
4 0.022990 0.179144 0 0 9.325466
5 0.023886 0.179144 0 0 45.356818
6 0.042199 0.186756 0 0 24.923750
7 0.046375 0.186756 0 0 6.306483
8 0.052584 0.186756 0 0 5.869318
9 0.056216 0.186756 0 0 18.449074
10 0.067092 0.186756 0 0 15.318462
11 0.067092 0.186756 0 0 15.318462
12 0.072974 0.186756 0 0 14.019718
13 0.072974 0.186756 0 0 14.019718
14 0.075902 0.186756 0 0 13.449324
15 0.076876 0.186756 0 0 13.269333
16 0.085598 0.186756 0 0 11.842262
17 0.088488 0.186756 0 0 11.432184
18 0.093284 0.186756 0 0 10.808152
19 0.093284 0.186756 0 0 10.808152
20 0.093301 0.186756 0 0 4.186681
21 0.099009 0.186756 0 0 10.143367
22 0.099960 0.186756 0 0 10.040404
23 0.101858 0.186756 0 0 9.840594
24 0.103753 0.186756 0 0 9.648544
25 0.115042 0.189103 0 0 8.636522
26 0.118775 0.189103 0 0 8.344538
27 0.122493 0.189103 0 0 8.071545
28 0.123420 0.189103 0 0 8.006048
29 0.129884 0.189103 0 0 7.575573
30 0.134473 0.189103 0 0 7.295221
31 0.134473 0.189103 0 0 7.295221
32 0.139950 0.190418 0 0 6.984859
33 0.148103 0.190418 0 0 6.565563
34 0.148103 0.190418 0 0 6.565563
35 0.159750 0.199687 0 0 6.041159
36 0.164189 0.199689 0 0 5.860947
37 0.173876 0.204875 0 0 5.499722
38 0.178244 0.204875 0 0 5.349730
39 0.191216 0.204875 0 0 4.944750
40 0.191216 0.204875 0 0 4.944750
41 0.191216 0.204875 0 0 4.944750
42 0.202301 0.211710 0 0 4.639906
43 0.227331 0.232498 0 0 4.060905
44 0.271290 0.271290 0 0 3.302181
Combined Score Genes
0 118.551490 AR;MDM2
1 58.307830 EIF4EBP1;MDM2
2 24.974114 AR;TRAF4;MDM2
3 217.548632 SQLE
4 35.182111 EIF4EBP1;MDM2
5 169.383484 HDC
6 78.892444 MDM2
7 19.367179 EIF4EBP1;MDM2
8 17.287210 EIF4EBP1;MDM2
9 53.106558 TUBA4A
10 41.385655 EIF4EBP1
11 41.385655 KARS
12 36.698673 MDM2
13 36.698673 MDM2
14 34.676539 MDM2
15 34.043281 MDM2
16 29.109419 EIF4EBP1
17 27.721833 TUBA4A
18 25.638039 TRAF4
19 25.638039 TRAF4
20 9.930501 EIF4EBP1;SRPK1
21 23.456986 EIF4EBP1
22 23.122924 EIF4EBP1
23 22.477606 EIF4EBP1
24 21.861093 MDM2
25 18.676088 MDM2
26 17.778225 EIF4EBP1
27 16.947828 MDM2
28 16.749937 AR
29 15.462601 MDM2
30 14.637057 EIF4EBP1
31 14.637057 MDM2
32 13.735523 TUBA4A
33 12.539225 TUBA4A
34 12.539225 EIF4EBP1
35 11.080370 EIF4EBP1
36 10.589196 TUBA4A
37 9.621278 ENAH
38 9.226149 MDM2
39 8.180346 MDM2
40 8.180346 MDM2
41 8.180346 MDM2
42 7.414564 ENAH
43 6.015610 MDM2
44 4.307918 MDM2 ,
'9': Gene_set Term Overlap \
0 KEGG_2019_Human Vascular smooth muscle contraction 3/132
1 KEGG_2019_Human Estrogen signaling pathway 3/137
2 KEGG_2019_Human Relaxin signaling pathway 2/130
3 KEGG_2019_Human Apelin signaling pathway 2/137
4 KEGG_2019_Human Oxytocin signaling pathway 2/153
5 KEGG_2019_Human Tight junction 2/170
6 KEGG_2019_Human Calcium signaling pathway 2/188
7 KEGG_2019_Human Bladder cancer 1/41
8 KEGG_2019_Human Acute myeloid leukemia 1/66
9 KEGG_2019_Human Renin secretion 1/69
10 KEGG_2019_Human Pathways in cancer 2/530
11 KEGG_2019_Human PPAR signaling pathway 1/74
12 KEGG_2019_Human Gastric acid secretion 1/75
13 KEGG_2019_Human Chronic myeloid leukemia 1/76
14 KEGG_2019_Human Rheumatoid arthritis 1/91
15 KEGG_2019_Human IL-17 signaling pathway 1/93
16 KEGG_2019_Human Pancreatic secretion 1/98
17 KEGG_2019_Human Progesterone-mediated oocyte maturation 1/99
18 KEGG_2019_Human Th17 cell differentiation 1/107
19 KEGG_2019_Human Platelet activation 1/124
20 KEGG_2019_Human Oocyte meiosis 1/125
21 KEGG_2019_Human Breast cancer 1/147
22 KEGG_2019_Human cGMP-PKG signaling pathway 1/166
23 KEGG_2019_Human Transcriptional misregulation in cancer 1/186
24 KEGG_2019_Human Focal adhesion 1/199
25 KEGG_2019_Human cAMP signaling pathway 1/212
26 KEGG_2019_Human Regulation of actin cytoskeleton 1/214
27 KEGG_2019_Human Neuroactive ligand-receptor interaction 1/338
P-value Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.000077 0.001197 0 0 46.181395
1 0.000086 0.001197 0 0 44.447015
2 0.003120 0.024029 0 0 28.208807
3 0.003458 0.024029 0 0 26.736700
4 0.004291 0.024029 0 0 23.884407
5 0.005268 0.024584 0 0 21.449134
6 0.006404 0.025616 0 0 19.355816
7 0.026332 0.092163 0 0 41.556250
8 0.042073 0.096606 0 0 25.541026
9 0.043946 0.096606 0 0 24.410539
10 0.045071 0.096606 0 0 6.700758
11 0.047060 0.096606 0 0 22.732877
12 0.047682 0.096606 0 0 22.424550
13 0.048303 0.096606 0 0 22.124444
14 0.057578 0.097208 0 0 18.423148
15 0.058809 0.097208 0 0 18.020833
16 0.061878 0.097208 0 0 17.087629
17 0.062491 0.097208 0 0 16.912415
18 0.067380 0.099297 0 0 15.629717
19 0.077691 0.104392 0 0 13.457995
20 0.078294 0.104392 0 0 13.348790
21 0.091474 0.116421 0 0 11.324772
22 0.102716 0.125045 0 0 10.011111
23 0.114411 0.133479 0 0 8.919820
24 0.121937 0.135382 0 0 8.328704
25 0.129404 0.135382 0 0 7.810427
26 0.130547 0.135382 0 0 7.736307
27 0.198800 0.198800 0 0 4.859050
Combined Score Genes
0 437.673467 ACTA2;MYH11;MYLK
1 416.324945 KRT16;KRT14;PGR
2 162.759363 ACTA2;MMP1
3 151.519063 ACTA2;MYLK
4 130.200082 OXTR;MYLK
5 112.524413 MYH11;RUNX1
6 97.763170 OXTR;MYLK
7 151.138399 MMP1
8 80.922929 RUNX1
9 76.277933 CLCA2
10 20.769040 MMP1;RUNX1
11 69.479163 MMP1
12 68.242504 MYLK
13 67.042784 RUNX1
14 52.590864 MMP1
15 51.061388 MMP1
16 47.547814 CLCA2
17 46.893606 PGR
18 42.159718 RUNX1
19 34.385397 MYLK
20 34.003130 PGR
21 27.085501 PGR
22 22.783185 MYLK
23 19.337821 RUNX1
24 17.525699 MYLK
25 15.970901 OXTR
26 15.751274 MYLK
27 7.849591 OXTR ,
'11': Gene_set Term Overlap \
0 KEGG_2019_Human Protein processing in endoplasmic reticulum 2/165
1 KEGG_2019_Human Protein export 1/23
2 KEGG_2019_Human Cytokine-cytokine receptor interaction 2/294
3 KEGG_2019_Human Primary immunodeficiency 1/37
4 KEGG_2019_Human Vasopressin-regulated water reabsorption 1/44
5 KEGG_2019_Human Intestinal immune network for IgA production 1/48
6 KEGG_2019_Human B cell receptor signaling pathway 1/71
7 KEGG_2019_Human Autophagy 1/128
8 KEGG_2019_Human Apoptosis 1/143
9 KEGG_2019_Human Non-alcoholic fatty liver disease (NAFLD) 1/149
10 KEGG_2019_Human Alzheimer disease 1/171
11 KEGG_2019_Human MAPK signaling pathway 1/295
P-value Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.008482 0.080083 0 0 16.212679
1 0.019379 0.080083 0 0 56.707386
2 0.025324 0.080083 0 0 8.991324
3 0.031001 0.080083 0 0 34.630208
4 0.036763 0.080083 0 0 28.982558
5 0.040041 0.080083 0 0 26.510638
6 0.058689 0.100609 0 0 17.779464
7 0.103443 0.143304 0 0 9.771654
8 0.114883 0.143304 0 0 8.732835
9 0.119420 0.143304 0 0 8.376267
10 0.135871 0.148223 0 0 7.284191
11 0.223314 0.223314 0 0 4.185587
Combined Score Genes
0 77.331692 ERN1;DERL3
1 223.630151 SEC11C
2 33.052236 TNFRSF17;CD27
3 120.296366 CD79A
4 95.736874 AQP3
5 85.306989 TNFRSF17
6 50.413824 CD79A
7 22.169313 ERN1
8 18.896479 ERN1
9 17.800458 ERN1
10 14.539606 ERN1
11 6.274935 DUSP5 ,
'12': Gene_set Term Overlap \
0 KEGG_2019_Human Allograft rejection 2/38
1 KEGG_2019_Human Graft-versus-host disease 2/41
2 KEGG_2019_Human Type I diabetes mellitus 2/43
3 KEGG_2019_Human Autoimmune thyroid disease 2/53
4 KEGG_2019_Human B cell receptor signaling pathway 2/71
5 KEGG_2019_Human Rheumatoid arthritis 2/91
6 KEGG_2019_Human Hematopoietic cell lineage 2/97
7 KEGG_2019_Human Cell adhesion molecules (CAMs) 2/145
8 KEGG_2019_Human PI3K-Akt signaling pathway 2/354
9 KEGG_2019_Human Primary immunodeficiency 1/37
10 KEGG_2019_Human Ether lipid metabolism 1/47
11 KEGG_2019_Human Intestinal immune network for IgA production 1/48
12 KEGG_2019_Human Viral myocarditis 1/59
13 KEGG_2019_Human NF-kappa B signaling pathway 1/95
14 KEGG_2019_Human Glycerophospholipid metabolism 1/97
15 KEGG_2019_Human Toll-like receptor signaling pathway 1/104
16 KEGG_2019_Human Osteoclast differentiation 1/127
17 KEGG_2019_Human Natural killer cell mediated cytotoxicity 1/131
18 KEGG_2019_Human Systemic lupus erythematosus 1/133
19 KEGG_2019_Human Apoptosis 1/143
20 KEGG_2019_Human Transcriptional misregulation in cancer 1/186
21 KEGG_2019_Human Epstein-Barr virus infection 1/201
22 KEGG_2019_Human Endocytosis 1/244
23 KEGG_2019_Human Cytokine-cytokine receptor interaction 1/294
P-value Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.000315 0.003234 0 0 92.361111
1 0.000367 0.003234 0 0 85.243590
2 0.000404 0.003234 0 0 81.077236
3 0.000614 0.003686 0 0 65.147059
4 0.001100 0.005280 0 0 48.108696
5 0.001798 0.006993 0 0 37.260300
6 0.002040 0.006993 0 0 34.896491
7 0.004486 0.013459 0 0 23.127040
8 0.024703 0.061437 0 0 9.296402
9 0.025599 0.061437 0 0 42.628205
10 0.032412 0.066182 0 0 33.344482
11 0.033091 0.066182 0 0 32.633388
12 0.040530 0.074825 0 0 26.429708
13 0.064505 0.105313 0 0 16.278232
14 0.065821 0.105313 0 0 15.937500
15 0.070412 0.105617 0 0 14.849141
16 0.085349 0.112684 0 0 12.124542
17 0.087923 0.112684 0 0 11.749112
18 0.089208 0.112684 0 0 11.569930
19 0.095608 0.114729 0 0 10.749729
20 0.122652 0.140174 0 0 8.233264
21 0.131908 0.143900 0 0 7.610000
22 0.157942 0.164809 0 0 6.249763
23 0.187301 0.187301 0 0 5.170123
Combined Score Genes
0 744.614730 CD80;GZMB
1 674.212459 CD80;GZMB
2 633.507213 CD80;GZMB
3 481.759646 CD80;GZMB
4 327.740947 CD79B;CD19
5 235.522606 CD80;LTB
6 216.184248 CD19;MS4A1
7 125.040912 SELL;CD80
8 34.404231 TCL1A;CD19
9 156.241159 CD19
10 114.345446 PLD4
11 111.230476 CD80
12 84.726024 CD80
13 44.618744 LTB
14 43.363038 PLD4
15 39.400658 CD80
16 29.838641 LILRA4
17 28.565490 GZMB
18 27.961987 CD80
19 25.235015 GZMB
20 17.276717 GZMB
21 15.415200 CD19
22 11.534114 SMAP2
23 8.660142 LTB ,
'13': Gene_set Term \
0 KEGG_2019_Human Estrogen signaling pathway
1 KEGG_2019_Human Phenylalanine metabolism
2 KEGG_2019_Human Histidine metabolism
3 KEGG_2019_Human Cytokine-cytokine receptor interaction
4 KEGG_2019_Human beta-Alanine metabolism
5 KEGG_2019_Human Tyrosine metabolism
6 KEGG_2019_Human Intestinal immune network for IgA production
7 KEGG_2019_Human Pathogenic Escherichia coli infection
8 KEGG_2019_Human Central carbon metabolism in cancer
9 KEGG_2019_Human Acute myeloid leukemia
10 KEGG_2019_Human Glycolysis / Gluconeogenesis
11 KEGG_2019_Human Prolactin signaling pathway
12 KEGG_2019_Human Arrhythmogenic right ventricular cardiomyopath...
13 KEGG_2019_Human Metabolism of xenobiotics by cytochrome P450
14 KEGG_2019_Human Chemical carcinogenesis
15 KEGG_2019_Human Gap junction
16 KEGG_2019_Human IL-17 signaling pathway
17 KEGG_2019_Human Hematopoietic cell lineage
18 KEGG_2019_Human Melanogenesis
19 KEGG_2019_Human Drug metabolism
20 KEGG_2019_Human TNF signaling pathway
21 KEGG_2019_Human Signaling pathways regulating pluripotency of ...
22 KEGG_2019_Human Breast cancer
23 KEGG_2019_Human Phospholipase D signaling pathway
24 KEGG_2019_Human Phagosome
25 KEGG_2019_Human Wnt signaling pathway
26 KEGG_2019_Human JAK-STAT signaling pathway
27 KEGG_2019_Human Chemokine signaling pathway
28 KEGG_2019_Human Rap1 signaling pathway
29 KEGG_2019_Human Ras signaling pathway
30 KEGG_2019_Human MAPK signaling pathway
31 KEGG_2019_Human PI3K-Akt signaling pathway
32 KEGG_2019_Human Pathways in cancer
Overlap P-value Adjusted P-value Old P-value Old Adjusted P-value \
0 2/137 0.005922 0.140691 0 0
1 1/17 0.014358 0.140691 0 0
2 1/23 0.019379 0.140691 0 0
3 2/294 0.025324 0.140691 0 0
4 1/31 0.026036 0.140691 0 0
5 1/36 0.030175 0.140691 0 0
6 1/48 0.040041 0.140691 0 0
7 1/55 0.045753 0.140691 0 0
8 1/65 0.053857 0.140691 0 0
9 1/66 0.054664 0.140691 0 0
10 1/68 0.056276 0.140691 0 0
11 1/70 0.057885 0.140691 0 0
12 1/72 0.059492 0.140691 0 0
13 1/74 0.061096 0.140691 0 0
14 1/82 0.067486 0.140691 0 0
15 1/88 0.072252 0.140691 0 0
16 1/93 0.076206 0.140691 0 0
17 1/97 0.079357 0.140691 0 0
18 1/101 0.082499 0.140691 0 0
19 1/108 0.087973 0.140691 0 0
20 1/110 0.089531 0.140691 0 0
21 1/139 0.111846 0.157882 0 0
22 1/147 0.117910 0.157882 0 0
23 1/148 0.118666 0.157882 0 0
24 1/152 0.121681 0.157882 0 0
25 1/158 0.126185 0.157882 0 0
26 1/162 0.129176 0.157882 0 0
27 1/190 0.149845 0.176603 0 0
28 1/206 0.161448 0.183716 0 0
29 1/232 0.179985 0.197983 0 0
30 1/295 0.223314 0.237721 0 0
31 1/354 0.261930 0.270115 0 0
32 1/530 0.366668 0.366668 0 0
Odds Ratio Combined Score Genes
0 19.602963 100.544681 KRT15;KRT23
1 77.996094 330.973809 ALDH1A3
2 56.707386 223.630151 ALDH1A3
3 8.991324 33.052236 LIF;CXCL16
4 41.568750 151.654408 ALDH1A3
5 35.621429 124.701400 ALDH1A3
6 26.510638 85.306989 PIGR
7 23.065972 71.146852 TUBB2B
8 19.452148 56.827835 KIT
9 19.151923 55.665963 KIT
10 18.578358 53.459018 ALDH1A3
11 18.038043 51.395724 ELF5
12 17.528169 49.463084 DSC2
13 17.046233 47.649591 ALDH1A3
14 15.356481 41.398567 ALDH1A3
15 14.293103 37.556550 TUBB2B
16 13.512908 34.786537 S100A8
17 12.947266 32.805690 KIT
18 12.426875 31.004655 KIT
19 11.609813 28.220306 ALDH1A3
20 11.395642 27.499633 LIF
21 8.987772 19.688932 LIF
22 8.491866 18.154182 KIT
23 8.433673 17.975922 KIT
24 8.208609 17.290239 TUBB2B
25 7.892516 16.337531 SFRP1
26 7.694876 15.748150 LIF
27 6.545635 12.424613 CXCL16
28 6.029878 10.995928 KIT
29 5.344156 9.164610 KIT
30 4.185587 6.274935 KIT
31 3.475567 4.656138 KIT
32 2.298440 2.306020 KIT }
[17]:
## GO analysis
enrich_dict = get_niche_enrichr(final_markers, geneset='GO_Biological_Process_2023',
niche_column='cluster', niches="all",
organism="human", topn_genes=200)
enrich_dict
[17]:
{'0': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
.. ...
472 GO_Biological_Process_2023
473 GO_Biological_Process_2023
474 GO_Biological_Process_2023
475 GO_Biological_Process_2023
476 GO_Biological_Process_2023
Term Overlap P-value \
0 Positive Regulation Of Cellular Metabolic Proc... 4/56 9.218820e-07
1 Extracellular Matrix Organization (GO:0030198) 5/176 3.439735e-06
2 Diterpenoid Metabolic Process (GO:0016101) 3/28 7.027351e-06
3 Positive Regulation Of Cell Motility (GO:2000147) 5/221 1.042434e-05
4 Inositol Lipid-Mediated Signaling (GO:0048017) 3/33 1.165118e-05
.. ... ... ...
472 Regulation Of Gene Expression (GO:0010468) 2/1127 4.544509e-01
473 Negative Regulation Of Apoptotic Process (GO:0... 1/482 4.826842e-01
474 Positive Regulation Of DNA-templated Transcrip... 2/1243 5.068592e-01
475 Positive Regulation Of Nucleic Acid-Templated ... 1/557 5.337910e-01
476 Regulation Of Transcription By RNA Polymerase ... 2/2028 7.745690e-01
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.000440 0 0 66.625418
1 0.000820 0 0 26.318448
2 0.001111 0 0 99.740000
3 0.001111 0 0 20.788089
4 0.001111 0 0 83.095833
.. ... ... ... ...
472 0.458294 0 0 1.340302
473 0.485739 0 0 1.558612
474 0.508993 0 0 1.207542
475 0.534912 0 0 1.343179
476 0.774569 0 0 0.708667
Combined Score Genes
0 925.883346 PDGFRB;AKR1C1;ADIPOQ;AKR1C3
1 331.089124 POSTN;CCDC80;LUM;MMP2;DPT
2 1183.484992 ADH1B;AKR1C1;AKR1C3
3 238.467796 PDGFRB;PDGFRA;LRRC15;CXCL12;IGF1
4 943.977200 PDGFRB;PDGFRA;IGF1
.. ... ...
472 1.057050 SFRP4;IGF1
473 1.135281 IGF1
474 0.820551 IGF1;FSTL3
475 0.843182 IGF1
476 0.181028 IGF1;FSTL3
[477 rows x 10 columns],
'1': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
5 GO_Biological_Process_2023
6 GO_Biological_Process_2023
7 GO_Biological_Process_2023
8 GO_Biological_Process_2023
9 GO_Biological_Process_2023
10 GO_Biological_Process_2023
11 GO_Biological_Process_2023
12 GO_Biological_Process_2023
13 GO_Biological_Process_2023
14 GO_Biological_Process_2023
15 GO_Biological_Process_2023
16 GO_Biological_Process_2023
17 GO_Biological_Process_2023
18 GO_Biological_Process_2023
19 GO_Biological_Process_2023
20 GO_Biological_Process_2023
21 GO_Biological_Process_2023
22 GO_Biological_Process_2023
23 GO_Biological_Process_2023
24 GO_Biological_Process_2023
25 GO_Biological_Process_2023
Term Overlap P-value \
0 Leukotriene Transport (GO:0071716) 1/6 0.000900
1 Nucleotide Transport (GO:0006862) 1/6 0.000900
2 Regulation Of DNA-templated Transcription In R... 1/7 0.001050
3 Positive Regulation Of Protein Kinase A Signal... 1/9 0.001349
4 Icosanoid Transport (GO:0071715) 1/14 0.002099
5 Regulation Of Protein Kinase A Signaling (GO:0... 1/16 0.002398
6 Xenobiotic Transport (GO:0042908) 1/20 0.002997
7 Bile Acid And Bile Salt Transport (GO:0015721) 1/23 0.003446
8 Regulation Of Cell Cycle Phase Transition (GO:... 1/25 0.003745
9 Negative Regulation Of Notch Signaling Pathway... 1/27 0.004045
10 Cellular Response To Heat (GO:0034605) 1/40 0.005988
11 Organic Hydroxy Compound Transport (GO:0015850) 1/45 0.006735
12 Positive Regulation Of NIK/NF-kappaB Signaling... 1/53 0.007929
13 Regulation Of Notch Signaling Pathway (GO:0008... 1/70 0.010464
14 Monocarboxylic Acid Transport (GO:0015718) 1/73 0.010911
15 Regulation Of Cell Cycle G1/S Phase Transition... 1/74 0.011059
16 Regulation Of NIK/NF-kappaB Signaling (GO:1901... 1/83 0.012399
17 Lipid Transport (GO:0006869) 1/108 0.016113
18 Positive Regulation Of Protein Serine/Threonin... 1/117 0.017448
19 Organic Substance Transport (GO:0071702) 1/201 0.029849
20 Negative Regulation Of Signal Transduction (GO... 1/267 0.039519
21 Negative Regulation Of Programmed Cell Death (... 1/381 0.056071
22 Negative Regulation Of Apoptotic Process (GO:0... 1/482 0.070575
23 Positive Regulation Of Intracellular Signal Tr... 1/525 0.076704
24 Regulation Of Apoptotic Process (GO:0042981) 1/705 0.102071
25 Regulation Of DNA-templated Transcription (GO:... 1/1922 0.261493
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.008771 0 0 1999.200000
1 0.008771 0 0 1999.200000
2 0.008771 0 0 1665.916667
3 0.008771 0 0 1249.312500
4 0.010392 0 0 768.615385
5 0.010392 0 0 666.066667
6 0.010516 0 0 525.736842
7 0.010516 0 0 453.977273
8 0.010516 0 0 416.104167
9 0.010516 0 0 384.057692
10 0.014154 0 0 255.871795
11 0.014593 0 0 226.738636
12 0.015859 0 0 191.778846
13 0.017972 0 0 144.405797
14 0.017972 0 0 138.368056
15 0.017972 0 0 136.465753
16 0.018963 0 0 121.432927
17 0.023275 0 0 92.943925
18 0.023877 0 0 85.693966
19 0.038804 0 0 49.492500
20 0.048929 0 0 37.088346
21 0.066265 0 0 25.811842
22 0.079780 0 0 20.286902
23 0.083096 0 0 18.581107
24 0.106154 0 0 13.702415
25 0.261493 0 0 4.704841
Combined Score Genes
0 14021.167888 ABCC11
1 14021.167888 ABCC11
2 11427.001555 TCIM
3 8255.546445 TCIM
4 4739.656184 ABCC11
5 4018.416231 TCIM
6 3054.588550 ABCC11
7 2574.276639 ABCC11
8 2324.863325 TCIM
9 2116.293582 TCIM
10 1309.541399 TCIM
11 1133.789719 ABCC11
12 927.671855 TCIM
13 658.467663 TCIM
14 625.150765 ABCC11
15 614.706229 TCIM
16 533.108411 TCIM
17 383.682514 ABCC11
18 346.933335 TCIM
19 173.797559 ABCC11
20 119.831047 TCIM
21 74.367574 TCIM
22 53.782261 TCIM
23 47.712506 TCIM
24 31.270134 TCIM
25 6.310824 TCIM ,
'2': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
.. ...
395 GO_Biological_Process_2023
396 GO_Biological_Process_2023
397 GO_Biological_Process_2023
398 GO_Biological_Process_2023
399 GO_Biological_Process_2023
Term Overlap P-value \
0 Microglial Cell Activation (GO:0001774) 5/20 6.803156e-11
1 Positive Regulation Of Tumor Necrosis Factor S... 5/82 1.125362e-07
2 Synapse Pruning (GO:0098883) 3/8 1.527262e-07
3 Macrophage Activation (GO:0042116) 4/35 1.808757e-07
4 Positive Regulation Of Cytokine Production (GO... 7/320 2.899729e-07
.. ... ... ...
395 Regulation Of DNA-templated Transcription (GO:... 3/1922 5.370756e-01
396 Regulation Of Apoptotic Process (GO:0042981) 1/705 6.470547e-01
397 Protein Modification Process (GO:0036211) 1/711 6.502260e-01
398 Positive Regulation Of Transcription By RNA Po... 1/938 7.519249e-01
399 Regulation Of Transcription By RNA Polymerase ... 2/2028 8.078562e-01
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 2.721262e-08 0 0 277.166667
1 1.808757e-05 0 0 53.825758
2 1.808757e-05 0 0 460.753846
3 1.808757e-05 0 0 102.916129
4 2.319784e-05 0 0 19.983445
.. ... ... ... ...
395 5.425006e-01 0 0 1.085421
396 6.519443e-01 0 0 0.977425
397 6.534934e-01 0 0 0.968863
398 7.538095e-01 0 0 0.725492
399 8.078562e-01 0 0 0.656100
Combined Score Genes
0 6488.762532 CX3CR1;C1QA;ITGAM;TYROBP;AIF1
1 861.211613 CD86;FCGR3A;TYROBP;CD14;HAVCR2
2 7231.356200 CX3CR1;ITGAM;C1QC
3 1597.819788 C1QA;FCGR3A;ITGAM;AIF1
4 300.820349 CD86;CD4;TYROBP;FCER1G;CD14;AIF1;HAVCR2
.. ... ...
395 0.674715 CD86;CD4;HAVCR2
396 0.425497 MNDA
397 0.417033 PDK4
398 0.206851 MMP12
399 0.139993 MMP12;HAVCR2
[400 rows x 10 columns],
'3': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
.. ...
121 GO_Biological_Process_2023
122 GO_Biological_Process_2023
123 GO_Biological_Process_2023
124 GO_Biological_Process_2023
125 GO_Biological_Process_2023
Term Overlap P-value \
0 Cellular Response To Cytokine Stimulus (GO:007... 3/308 0.000035
1 Establishment Of Lymphocyte Polarity (GO:0001767) 1/5 0.001249
2 Positive Regulation Of Antigen Processing And ... 1/5 0.001249
3 Regulation Of Natural Killer Cell Mediated Imm... 1/5 0.001249
4 Positive Regulation Of Dendritic Cell Antigen ... 1/5 0.001249
.. ... ... ...
121 Regulation Of Apoptotic Process (GO:0042981) 1/705 0.164270
122 Regulation Of Cell Population Proliferation (G... 1/766 0.177398
123 Regulation Of Gene Expression (GO:0010468) 1/1127 0.251758
124 Regulation Of DNA-templated Transcription (GO:... 1/1922 0.396636
125 Regulation Of Transcription By RNA Polymerase ... 1/2028 0.414121
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.004454 0 0 96.836066
1 0.015731 0 0 1249.437500
2 0.015731 0 0 1249.437500
3 0.015731 0 0 1249.437500
4 0.015731 0 0 1249.437500
.. ... ... ... ...
121 0.169655 0 0 6.850497
122 0.181725 0 0 6.284314
123 0.255819 0 0 4.189387
124 0.399809 0 0 2.352160
125 0.414121 0 0 2.216083
Combined Score Genes
0 992.593628 TCF7;CCR7;IL7R
1 8352.531058 CCR7
2 8352.531058 CCR7
3 8352.531058 KLRB1
4 8352.531058 CCR7
.. ... ...
121 12.373685 CTLA4
122 10.867829 IL7R
123 5.778365 TCF7
124 2.175127 TCF7
125 1.953692 TCF7
[126 rows x 10 columns],
'4': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
.. ...
260 GO_Biological_Process_2023
261 GO_Biological_Process_2023
262 GO_Biological_Process_2023
263 GO_Biological_Process_2023
264 GO_Biological_Process_2023
Term Overlap P-value \
0 Alpha-Beta T Cell Activation (GO:0046631) 5/15 1.735319e-12
1 T Cell Receptor Signaling Pathway (GO:0050852) 6/95 3.594182e-10
2 T Cell Activation (GO:0042110) 6/111 9.274137e-10
3 Antigen Receptor-Mediated Signaling Pathway (G... 6/134 2.899637e-09
4 Negative Regulation Of Natural Killer Cell Med... 3/16 4.748696e-07
.. ... ... ...
260 Negative Regulation Of Apoptotic Process (GO:0... 1/482 3.862312e-01
261 Positive Regulation Of Cell Population Prolife... 1/483 3.868601e-01
262 Negative Regulation Of Cellular Process (GO:00... 1/537 4.199275e-01
263 Positive Regulation Of Cellular Process (GO:00... 1/594 4.529906e-01
264 Regulation Of Gene Expression (GO:0010468) 1/1127 6.866905e-01
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 4.598596e-10 0 0 665.666667
1 4.762291e-08 0 0 95.783307
2 8.192154e-08 0 0 81.122449
3 1.921009e-07 0 0 66.468750
4 2.516809e-05 0 0 271.045249
.. ... ... ... ...
260 3.912898e-01 0 0 2.133603
261 3.912898e-01 0 0 2.129067
262 4.231209e-01 0 0 1.909269
263 4.547065e-01 0 0 1.720689
264 6.866905e-01 0 0 0.881275
Combined Score Genes
0 18026.140001 NKG7;CD3G;CD247;CD3E;CD3D
1 2082.954976 CD8B;CD8A;CD3G;CD247;CD3E;CD3D
2 1687.235100 CD8B;CD8A;CD3G;CD247;CD3E;CD3D
3 1306.687913 CD8B;CD8A;CD3G;CD247;CD3E;CD3D
4 3946.479949 SERPINB9;KLRD1;KLRC1
.. ... ...
260 2.029738 SERPINB9
261 2.021959 CCL5
262 1.656622 LPXN
263 1.362586 CCL5
264 0.331246 CD3E
[265 rows x 10 columns],
'5': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
.. ...
638 GO_Biological_Process_2023
639 GO_Biological_Process_2023
640 GO_Biological_Process_2023
641 GO_Biological_Process_2023
642 GO_Biological_Process_2023
Term Overlap P-value \
0 Vasculogenesis (GO:0001570) 6/33 1.096114e-11
1 Blood Vessel Morphogenesis (GO:0048514) 6/53 2.222005e-10
2 Regulation Of Angiogenesis (GO:0045765) 6/205 7.819976e-07
3 Endothelial Cell Development (GO:0001885) 3/20 4.163403e-06
4 Regulation Of Cell Migration (GO:0030334) 7/434 4.544200e-06
.. ... ... ...
638 Protein Localization (GO:0008104) 1/351 4.327918e-01
639 Ubiquitin-Dependent Protein Catabolic Process ... 1/367 4.473980e-01
640 Organelle Organization (GO:0006996) 1/418 4.915643e-01
641 Nervous System Development (GO:0007399) 1/433 5.038900e-01
642 Protein Phosphorylation (GO:0006468) 1/500 5.554998e-01
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 7.048016e-09 0 0 170.435897
1 7.143747e-08 0 0 97.811784
2 1.676082e-04 0 0 22.925010
3 5.843841e-04 0 0 121.405680
4 5.843841e-04 0 0 12.813770
.. ... ... ... ...
638 4.355010e-01 0 0 1.808111
639 4.494952e-01 0 0 1.727657
640 4.930980e-01 0 0 1.512416
641 5.046749e-01 0 0 1.458781
642 5.554998e-01 0 0 1.258582
Combined Score Genes
0 4301.233551 EGFL7;RAMP2;SOX17;SOX18;CAV1;KDR
1 2174.105650 EGFL7;RAMP2;SOX17;SOX18;CAV1;KDR
2 322.358054 CLDN5;RAMP2;KDR;PPARG;RAPGEF3;AQP1
3 1504.116552 SOX18;PECAM1;RAPGEF3
4 157.630634 CLDN5;EDN1;CAV1;MMRN2;PECAM1;SNAI1;KDR
.. ... ...
638 1.514290 CAV1
639 1.389566 PELI1
640 1.074061 CAV1
641 0.999845 EDNRB
642 0.739904 KDR
[643 rows x 10 columns],
'6': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
.. ...
190 GO_Biological_Process_2023
191 GO_Biological_Process_2023
192 GO_Biological_Process_2023
193 GO_Biological_Process_2023
194 GO_Biological_Process_2023
Term Overlap P-value \
0 Cell-Cell Adhesion Via Plasma-Membrane Adhesio... 3/172 0.000322
1 Negative Regulation Of I-kappaB kinase/NF-kapp... 2/42 0.000507
2 Heterophilic Cell-Cell Adhesion Via Plasma Mem... 2/49 0.000690
3 Regulation Of miRNA Transcription (GO:1902893) 2/59 0.001000
4 Vesicle Organization (GO:0016050) 2/94 0.002513
.. ... ... ...
190 Positive Regulation Of Gene Expression (GO:001... 1/480 0.322150
191 Positive Regulation Of Cell Population Prolife... 1/483 0.323816
192 Negative Regulation Of Cellular Process (GO:00... 1/537 0.353148
193 Positive Regulation Of Cellular Process (GO:00... 1/594 0.382813
194 Regulation Of Apoptotic Process (GO:0042981) 1/705 0.436957
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.044870 0 0 27.057351
1 0.044870 0 0 71.228571
2 0.044870 0 0 60.598784
3 0.048735 0 0 49.942356
4 0.054437 0 0 30.888199
.. ... ... ... ...
190 0.328876 0 0 2.714683
191 0.328876 0 0 2.697372
192 0.356807 0 0 2.418905
193 0.384786 0 0 2.179989
194 0.436957 0 0 1.825758
Combined Score Genes
0 217.538951 CLDN4;CEACAM6;CEACAM8
1 540.402969 RHOH;ESR1
2 441.060243 CEACAM6;CEACAM8
3 345.005260 KLF5;ESR1
4 184.911060 TRAPPC3;HOOK2
.. ... ...
190 3.075021 TFAP2A
191 3.041502 CEACAM6
192 2.517762 TFAP2A
193 2.093247 CEACAM6
194 1.511583 TFAP2A
[195 rows x 10 columns],
'7': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
Term Overlap P-value \
0 Regulation Of Cell Death (GO:0010941) 1/59 0.00295
1 Negative Regulation Of Cell Death (GO:0060548) 1/64 0.00320
2 Negative Regulation Of Cellular Process (GO:00... 1/537 0.02685
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.00480 0 0 19941.0
1 0.00480 0 0 19936.0
2 0.02685 0 0 19463.0
Combined Score Genes
0 116175.593855 AGR3
1 114524.748408 AGR3
2 70407.318772 AGR3 ,
'8': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
.. ...
217 GO_Biological_Process_2023
218 GO_Biological_Process_2023
219 GO_Biological_Process_2023
220 GO_Biological_Process_2023
221 GO_Biological_Process_2023
Term Overlap P-value \
0 Nuclear Chromosome Segregation (GO:0098813) 2/16 0.000125
1 Regulation Of Mitotic Cell Cycle (GO:0007346) 3/125 0.000292
2 Positive Regulation Of Mitotic Cell Cycle (GO:... 2/27 0.000363
3 Positive Regulation Of Viral Genome Replicatio... 2/29 0.000419
4 Negative Regulation Of Intrinsic Apoptotic Sig... 2/61 0.001851
.. ... ... ...
217 Protein Modification Process (GO:0036211) 1/711 0.532581
218 Negative Regulation Of Transcription By RNA Po... 1/763 0.558354
219 Positive Regulation Of Transcription By RNA Po... 1/938 0.635511
220 Regulation Of Transcription By RNA Polymerase ... 2/2028 0.643294
221 Positive Regulation Of DNA-templated Transcrip... 1/1243 0.740283
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.023259 0 0 150.112782
1 0.023259 0 0 27.127049
2 0.023259 0 0 84.016842
3 0.023259 0 0 77.785575
4 0.077652 0 0 35.539697
.. ... ... ... ...
217 0.542353 0 0 1.356972
218 0.566003 0 0 1.260958
219 0.641288 0 0 1.016115
220 0.646205 0 0 0.932769
221 0.740283 0 0 0.754308
Combined Score Genes
0 1349.226508 TOP2A;CENPF
1 220.786893 EIF4EBP1;MDM2;MKI67
2 665.560125 EIF4EBP1;MDM2
3 604.972981 TOP2A;SRPK1
4 223.614183 RTKN2;MDM2
.. ... ...
217 0.854920 SRPK1
218 0.734838 MDM2
219 0.460631 HMGA1
220 0.411494 HMGA1;MDM2
221 0.226837 HMGA1
[222 rows x 10 columns],
'9': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
.. ...
143 GO_Biological_Process_2023
144 GO_Biological_Process_2023
145 GO_Biological_Process_2023
146 GO_Biological_Process_2023
147 GO_Biological_Process_2023
Term Overlap P-value \
0 Intermediate Filament Organization (GO:0045109) 3/68 0.000010
1 Muscle Contraction (GO:0006936) 3/94 0.000028
2 Smooth Muscle Contraction (GO:0006939) 2/14 0.000035
3 Supramolecular Fiber Organization (GO:0097435) 4/316 0.000039
4 Positive Regulation Of Wound Healing (GO:0090303) 2/40 0.000300
.. ... ... ...
143 Positive Regulation Of Cytokine Production (GO... 1/320 0.189209
144 Proteolysis (GO:0006508) 1/330 0.194550
145 Negative Regulation Of Gene Expression (GO:001... 1/336 0.197739
146 Positive Regulation Of Multicellular Organisma... 1/387 0.224380
147 Regulation Of Cell Population Proliferation (G... 1/766 0.398206
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.001445 0 0 91.947692
1 0.001445 0 0 65.591209
2 0.001445 0 0 302.651515
3 0.001445 0 0 28.027066
4 0.008880 0 0 95.449761
.. ... ... ... ...
143 0.194465 0 0 5.137931
144 0.198575 0 0 4.979230
145 0.200448 0 0 4.888557
146 0.225906 0 0 4.231649
147 0.398206 0 0 2.093900
Combined Score Genes
0 1054.173298 KRT16;KRT14;KRT5
1 688.107571 OXTR;MYH11;MYLK
2 3102.378062 MYH11;MYLK
3 284.484086 KRT16;KRT14;MYH11;KRT5
4 774.263839 FOXC2;MYLK
.. ... ...
143 8.554159 RUNX1
144 8.151326 MMP1
145 7.923405 PGR
146 6.323845 OXTR
147 1.928034 RUNX1
[148 rows x 10 columns],
'11': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
.. ...
185 GO_Biological_Process_2023
186 GO_Biological_Process_2023
187 GO_Biological_Process_2023
188 GO_Biological_Process_2023
189 GO_Biological_Process_2023
Term Overlap P-value \
0 Response To Lipid (GO:0033993) 2/110 0.003863
1 Negative Regulation Of Amyloid Precursor Prote... 1/5 0.004243
2 IRE1-mediated Unfolded Protein Response (GO:00... 1/6 0.005090
3 Peptidyl-Serine Autophosphorylation (GO:0036289) 1/7 0.005936
4 Regulation Of Alpha-Beta T Cell Differentiatio... 1/7 0.005936
.. ... ... ...
185 Regulation Of Apoptotic Process (GO:0042981) 1/705 0.456820
186 Negative Regulation Of Transcription By RNA Po... 1/763 0.483930
187 Negative Regulation Of DNA-templated Transcrip... 1/1025 0.591284
188 Regulation Of DNA-templated Transcription (GO:... 1/1922 0.820636
189 Regulation Of Transcription By RNA Polymerase ... 1/2028 0.837707
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.078115 0 0 24.537037
1 0.078115 0 0 312.171875
2 0.078115 0 0 249.725000
3 0.078115 0 0 208.093750
4 0.078115 0 0 208.093750
.. ... ... ... ...
185 0.466644 0 0 1.711559
186 0.491694 0 0 1.576526
187 0.597574 0 0 1.157166
188 0.824978 0 0 0.587650
189 0.837707 0 0 0.553651
Combined Score Genes
0 136.337656 TRIB1;AQP3
1 1705.223501 ITM2C
2 1318.679916 ERN1
3 1066.850192 ERN1
4 1066.850192 PRDM1
.. ... ...
185 1.340949 CD27
186 1.144265 PRDM1
187 0.608043 PRDM1
188 0.116164 PRDM1
189 0.098044 PRDM1
[190 rows x 10 columns],
'12': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
.. ...
131 GO_Biological_Process_2023
132 GO_Biological_Process_2023
133 GO_Biological_Process_2023
134 GO_Biological_Process_2023
135 GO_Biological_Process_2023
Term Overlap P-value \
0 B Cell Receptor Signaling Pathway (GO:0050853) 3/46 0.000004
1 B Cell Activation (GO:0042113) 3/92 0.000033
2 Antigen Receptor-Mediated Signaling Pathway (G... 3/134 0.000101
3 Regulation Of B Cell Activation (GO:0050864) 2/25 0.000135
4 B Cell Proliferation (GO:0042100) 2/31 0.000209
.. ... ... ...
131 Regulation Of Apoptotic Process (GO:0042981) 1/705 0.395028
132 Regulation Of DNA-templated Transcription (GO:... 2/1922 0.395231
133 Regulation Of Cell Population Proliferation (G... 1/766 0.421270
134 Positive Regulation Of Transcription By RNA Po... 1/938 0.489681
135 Regulation Of Transcription By RNA Polymerase ... 1/2028 0.776278
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.000554 0 0 126.488372
1 0.002247 0 0 60.971399
2 0.004597 0 0 41.335878
3 0.004599 0 0 144.659420
4 0.005689 0 0 114.695402
.. ... ... ... ...
131 0.404146 0 0 2.106862
132 0.404146 0 0 1.568229
133 0.427557 0 0 1.932730
134 0.493308 0 0 1.563829
135 0.776278 0 0 0.681530
Combined Score Genes
0 1569.908871 CD79B;CD19;MS4A1
1 629.072298 CD79B;BANK1;MS4A1
2 380.137511 CD79B;CD19;MS4A1
3 1288.680448 BANK1;CD19
4 971.758750 CD19;MS4A1
.. ... ...
131 1.956850 TCL1A
132 1.455764 SPIB;CD80
133 1.670811 TCL1A
134 1.116576 SPIB
135 0.172594 SPIB
[136 rows x 10 columns],
'13': Gene_set \
0 GO_Biological_Process_2023
1 GO_Biological_Process_2023
2 GO_Biological_Process_2023
3 GO_Biological_Process_2023
4 GO_Biological_Process_2023
.. ...
305 GO_Biological_Process_2023
306 GO_Biological_Process_2023
307 GO_Biological_Process_2023
308 GO_Biological_Process_2023
309 GO_Biological_Process_2023
Term Overlap P-value \
0 Intermediate Filament Organization (GO:0045109) 3/68 0.000025
1 Positive Regulation Of Growth (GO:0045927) 3/81 0.000042
2 Immune Response-Regulating Cell Surface Recept... 2/13 0.000053
3 Positive Regulation Of Cell Growth (GO:0030307) 3/106 0.000093
4 Fc Receptor Signaling Pathway (GO:0038093) 2/26 0.000218
.. ... ... ...
305 Regulation Of DNA-templated Transcription (GO:... 2/1922 0.496171
306 Regulation Of Transcription By RNA Polymerase ... 2/2028 0.526104
307 Positive Regulation Of Transcription By RNA Po... 1/938 0.558217
308 Negative Regulation Of DNA-templated Transcrip... 1/1025 0.591284
309 Regulation Of Gene Expression (GO:0010468) 1/1127 0.627084
Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio \
0 0.005451 0 0 65.663736
1 0.005451 0 0 54.684066
2 0.005451 0 0 242.084848
3 0.007224 0 0 41.359223
4 0.013109 0 0 110.883333
.. ... ... ... ...
305 0.502656 0 0 1.254375
306 0.531245 0 0 1.181770
307 0.561841 0 0 1.270411
308 0.593198 0 0 1.157166
309 0.627084 0 0 1.046681
Combined Score Genes
0 696.597919 KRT15;KRT23;KRT6B
1 551.396934 SFRP1;S100A8;CXCL16
2 2384.526542 PIGR;KIT
3 383.837658 SFRP1;S100A8;CXCL16
4 934.672523 PIGR;KIT
.. ... ...
305 0.879111 SFRP1;ELF5
306 0.759000 ELF5;LIF
307 0.740660 LIF
308 0.608043 SFRP1
309 0.488459 SFRP1
[310 rows x 10 columns]}
[ ]: