Spatial molecular niche discovery
Here we will use seqFISH Mouse Organogenesis datasets as an example to illustrate how Garfield performs spatial molecular niche discovery for multiple batches. Data access: https://marionilab.cruk.cam.ac.uk/SpatialMouseAtlas/.
[1]:
import os
import pandas as pd
import numpy as np
os.chdir('/data2/zhouwg_data/project/Garfield')
os.getcwd()
[1]:
'/data2/zhouwg_data/project/Garfield'
[2]:
# load packages
import os
import warnings
import Garfield as gf
import scanpy as sc
from mudata import MuData
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
gf.__version__
[2]:
'1.0.0'
[3]:
# Load data
# Define paths
st_data_folder_path = "/home/zhouweige/zhouwg_data/project/Garfield_tutorials/data" # spatially resolved transcriptomics data
dataset = "seqfish_mouse_organogenesis"
cell_type_key = "celltype_mapped_refined"
adata_list = []
batch_indeces = list(np.arange(6) + 1)
batches = ["embryo1_z2",
"embryo1_z5",
"embryo2_z2",
"embryo2_z5",
"embryo3_z2",
"embryo3_z5"]
for batch_idx, batch in zip(batch_indeces, batches):
adata = sc.read_h5ad(f"{st_data_folder_path}/{dataset}_batch{batch_idx}.h5ad")
adata.var_names_make_unique(join="++")
# make spot name unique
# adata.obs_names = [x+'_'+batch for x in adata.obs_names]
adata_list.append(adata)
[4]:
import anndata as ad
adata_concat = ad.concat(adata_list, label="slice_name", keys=batches)
adata_concat
[4]:
AnnData object with n_obs × n_vars = 52568 × 351
obs: 'Area', 'celltype_mapped_refined', 'sample', 'batch', 'slice_name'
obsm: 'spatial'
layers: 'counts'
[5]:
# check the batch information of this dataset
adata_concat.obs['batch'].value_counts()
[5]:
batch
embryo3_z2 11169
embryo1_z2 10150
embryo3_z5 9408
embryo2_z2 7661
embryo1_z5 7656
embryo2_z5 6524
Name: count, dtype: int64
[6]:
# Ensure adata.X is counts.
adata_concat.X = adata_concat.layers['counts'].copy()
adata_concat.X.max()
[6]:
141.0
Integrating spatially resolved transcriptomics data using Garfield
For spatial niche discovery from spatially resolved transcriptomics data, we should adjust the following paremeters, and all parameter introductions can be found in Garfield_Model_Parameters.
[7]:
# set workdir
workdir = '/home/zhouweige/zhouwg_data/project/Garfield_tutorials/result/garfield_spRNA_embryo'
gf.settings.set_workdir(workdir)
### modify parameter
user_config = dict(
## Input options
adata_list=adata_concat,
profile='spatial',
data_type='single-modal',
sample_col='batch',
weight=0.5,
## Preprocessing options
graph_const_method='mu_std', # mu_std, Radius, KNN, Squidpy
used_hvg=True,
min_cells=3,
min_features=0,
keep_mt=False,
target_sum=1e4,
rna_n_top_features=3000,
n_components=50,
n_neighbors=5,
metric='euclidean',
svd_solver='arpack',
# datasets
used_pca_feat=False,
adj_key='connectivities',
# data split parameters
edge_val_ratio=0.1,
edge_test_ratio=0.,
node_val_ratio=0.1,
node_test_ratio=0.,
## Model options
augment_type='svd',
svd_q=5,
use_FCencoder=False,
conv_type='GATv2Conv', # GAT or GATv2Conv or GCN
gnn_layer=2,
hidden_dims=[128, 128],
bottle_neck_neurons=20,
cluster_num=20,
drop_feature_rate=0.2,
drop_edge_rate=0.2,
num_heads=3,
dropout=0.2,
concat=True,
used_edge_weight=True,
used_DSBN=False,
used_mmd=False,
# data loader parameters
num_neighbors=5,
loaders_n_hops=2,
edge_batch_size=4096,
node_batch_size=128, # None
# loss parameters
include_edge_recon_loss=True,
include_gene_expr_recon_loss=True,
lambda_latent_contrastive_instanceloss=1.0,
lambda_latent_contrastive_clusterloss=0.5,
lambda_gene_expr_recon=10.,
lambda_edge_recon=5., #
lambda_latent_adj_recon_loss=1.0,
lambda_omics_recon_mmd_loss=0.2,
# train parameters
n_epochs_no_edge_recon=0,
learning_rate=0.001,
weight_decay=1e-05,
gradient_clipping=5,
# other parameters
latent_key='garfield_latent',
reload_best_model=True,
use_early_stopping=True,
early_stopping_kwargs=None,
monitor=True,
device_id=1,
seed=2024,
verbose=True
)
dict_config = gf.settings.set_gf_params(user_config)
Saving results in: /home/zhouweige/zhouwg_data/project/Garfield_tutorials/result/garfield_spRNA_embryo
[8]:
from Garfield.model import Garfield
# Initialize model
model = Garfield(dict_config)
--- DATA LOADING AND PREPROCESSING ---
/home/zhouweige/anaconda3/envs/Garfield/lib/python3.9/site-packages/scipy/sparse/_index.py:143: SparseEfficiencyWarning: Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.
self._set_arrayXarray(i, j, x)
------Calculating spatial graph...
COSINE SIM GRAPH DECODER -> dropout_rate: 0.2
[9]:
# Train model
model.train()
--- INITIALIZING TRAINER ---
Using GPU: device-1
Number of training nodes: 47311
Number of validation nodes: 5257
Number of training edges: 272860
Number of validation edges: 30317
Edge batch size: 4096
Node batch size: 128
--- MODEL TRAINING ---
Epoch 1/100 |--------------------| 1.0% val_auroc_score: 0.8252; val_auprc_score: 0.8270; val_best_acc_score: 0.7531; val_best_f1_score: 0.7606; train_kl_reg_loss: 143.9980; train_edge_recon_loss: 5885.2862; train_gene_expr_recon_loss: 670891.8582; train_lambda_latent_adj_recon_loss: 2975.3924; train_lambda_latent_contrastive_instanceloss: 7.0683; train_lambda_latent_contrastive_clusterloss: 3.6366; train_global_loss: 679907.2332; train_optim_loss: 679907.2332; val_kl_reg_loss: 99.1661; val_edge_recon_loss: 5287.1518; val_gene_expr_recon_loss: 576232.7344; val_lambda_latent_adj_recon_loss: 2698.1509; val_lambda_latent_contrastive_instanceloss: 6.4858; val_lambda_latent_contrastive_clusterloss: 3.4129; val_global_loss: 584327.1250; val_optim_loss: 584327.1250
Epoch 2/100 |--------------------| 2.0% val_auroc_score: 0.8855; val_auprc_score: 0.8968; val_best_acc_score: 0.8180; val_best_f1_score: 0.8165; train_kl_reg_loss: 162.3178; train_edge_recon_loss: 5570.9410; train_gene_expr_recon_loss: 581223.2808; train_lambda_latent_adj_recon_loss: 2754.0141; train_lambda_latent_contrastive_instanceloss: 6.6238; train_lambda_latent_contrastive_clusterloss: 3.3036; train_global_loss: 589720.4795; train_optim_loss: 589720.4795; val_kl_reg_loss: 179.9585; val_edge_recon_loss: 5172.7614; val_gene_expr_recon_loss: 540662.9297; val_lambda_latent_adj_recon_loss: 3102.7676; val_lambda_latent_contrastive_instanceloss: 6.3932; val_lambda_latent_contrastive_clusterloss: 2.9511; val_global_loss: 549127.7500; val_optim_loss: 549127.7500
Epoch 3/100 |--------------------| 3.0% val_auroc_score: 0.8980; val_auprc_score: 0.9121; val_best_acc_score: 0.8374; val_best_f1_score: 0.8321; train_kl_reg_loss: 186.9358; train_edge_recon_loss: 5424.3385; train_gene_expr_recon_loss: 560887.7957; train_lambda_latent_adj_recon_loss: 2676.9108; train_lambda_latent_contrastive_instanceloss: 6.4969; train_lambda_latent_contrastive_clusterloss: 2.9072; train_global_loss: 569185.3862; train_optim_loss: 569185.3862; val_kl_reg_loss: 154.7024; val_edge_recon_loss: 4827.5549; val_gene_expr_recon_loss: 533907.7031; val_lambda_latent_adj_recon_loss: 2409.7722; val_lambda_latent_contrastive_instanceloss: 6.3748; val_lambda_latent_contrastive_clusterloss: 2.6642; val_global_loss: 541308.7891; val_optim_loss: 541308.7891
Epoch 4/100 |--------------------| 4.0% val_auroc_score: 0.8965; val_auprc_score: 0.9150; val_best_acc_score: 0.8383; val_best_f1_score: 0.8313; train_kl_reg_loss: 192.4402; train_edge_recon_loss: 5259.7526; train_gene_expr_recon_loss: 553952.8601; train_lambda_latent_adj_recon_loss: 2243.6234; train_lambda_latent_contrastive_instanceloss: 6.4629; train_lambda_latent_contrastive_clusterloss: 2.6861; train_global_loss: 561657.8265; train_optim_loss: 561657.8265; val_kl_reg_loss: 166.0171; val_edge_recon_loss: 4679.4329; val_gene_expr_recon_loss: 524051.4688; val_lambda_latent_adj_recon_loss: 1909.6371; val_lambda_latent_contrastive_instanceloss: 6.3632; val_lambda_latent_contrastive_clusterloss: 2.5104; val_global_loss: 530815.4414; val_optim_loss: 530815.4414
Epoch 5/100 |█-------------------| 5.0% val_auroc_score: 0.8926; val_auprc_score: 0.9119; val_best_acc_score: 0.8347; val_best_f1_score: 0.8258; train_kl_reg_loss: 212.0395; train_edge_recon_loss: 5125.5335; train_gene_expr_recon_loss: 546801.5289; train_lambda_latent_adj_recon_loss: 1847.5145; train_lambda_latent_contrastive_instanceloss: 6.4340; train_lambda_latent_contrastive_clusterloss: 2.5556; train_global_loss: 553995.6110; train_optim_loss: 553995.6110; val_kl_reg_loss: 201.1441; val_edge_recon_loss: 4601.6688; val_gene_expr_recon_loss: 521174.4688; val_lambda_latent_adj_recon_loss: 1845.8800; val_lambda_latent_contrastive_instanceloss: 6.3566; val_lambda_latent_contrastive_clusterloss: 2.4339; val_global_loss: 527831.9531; val_optim_loss: 527831.9531
Epoch 6/100 |█-------------------| 6.0% val_auroc_score: 0.8901; val_auprc_score: 0.9112; val_best_acc_score: 0.8348; val_best_f1_score: 0.8245; train_kl_reg_loss: 242.8385; train_edge_recon_loss: 5071.4030; train_gene_expr_recon_loss: 542545.4767; train_lambda_latent_adj_recon_loss: 1764.0965; train_lambda_latent_contrastive_instanceloss: 6.4185; train_lambda_latent_contrastive_clusterloss: 2.4657; train_global_loss: 549632.7099; train_optim_loss: 549632.7099; val_kl_reg_loss: 213.3091; val_edge_recon_loss: 4568.9492; val_gene_expr_recon_loss: 520497.9961; val_lambda_latent_adj_recon_loss: 1773.7744; val_lambda_latent_contrastive_instanceloss: 6.3481; val_lambda_latent_contrastive_clusterloss: 2.3450; val_global_loss: 527062.6992; val_optim_loss: 527062.6992
Epoch 7/100 |█-------------------| 7.0% val_auroc_score: 0.8930; val_auprc_score: 0.9151; val_best_acc_score: 0.8371; val_best_f1_score: 0.8286; train_kl_reg_loss: 256.7653; train_edge_recon_loss: 5031.2601; train_gene_expr_recon_loss: 540507.4622; train_lambda_latent_adj_recon_loss: 1670.0192; train_lambda_latent_contrastive_instanceloss: 6.4079; train_lambda_latent_contrastive_clusterloss: 2.3941; train_global_loss: 547474.3088; train_optim_loss: 547474.3088; val_kl_reg_loss: 240.0973; val_edge_recon_loss: 4518.9738; val_gene_expr_recon_loss: 514080.4648; val_lambda_latent_adj_recon_loss: 1721.5623; val_lambda_latent_contrastive_instanceloss: 6.3438; val_lambda_latent_contrastive_clusterloss: 2.2874; val_global_loss: 520569.7266; val_optim_loss: 520569.7266
Epoch 8/100 |█-------------------| 8.0% val_auroc_score: 0.8949; val_auprc_score: 0.9146; val_best_acc_score: 0.8361; val_best_f1_score: 0.8278; train_kl_reg_loss: 275.7386; train_edge_recon_loss: 5009.1849; train_gene_expr_recon_loss: 537798.3410; train_lambda_latent_adj_recon_loss: 1635.0023; train_lambda_latent_contrastive_instanceloss: 6.3948; train_lambda_latent_contrastive_clusterloss: 2.3364; train_global_loss: 544726.9841; train_optim_loss: 544726.9841; val_kl_reg_loss: 231.0863; val_edge_recon_loss: 4497.0161; val_gene_expr_recon_loss: 514525.2461; val_lambda_latent_adj_recon_loss: 1661.7053; val_lambda_latent_contrastive_instanceloss: 6.3447; val_lambda_latent_contrastive_clusterloss: 2.2413; val_global_loss: 520923.6406; val_optim_loss: 520923.6406
Epoch 9/100 |█-------------------| 9.0% val_auroc_score: 0.8962; val_auprc_score: 0.9164; val_best_acc_score: 0.8379; val_best_f1_score: 0.8299; train_kl_reg_loss: 279.5599; train_edge_recon_loss: 4994.0874; train_gene_expr_recon_loss: 532982.8526; train_lambda_latent_adj_recon_loss: 1589.0474; train_lambda_latent_contrastive_instanceloss: 6.3902; train_lambda_latent_contrastive_clusterloss: 2.3029; train_global_loss: 539854.2388; train_optim_loss: 539854.2388; val_kl_reg_loss: 231.6465; val_edge_recon_loss: 4475.7152; val_gene_expr_recon_loss: 510907.1016; val_lambda_latent_adj_recon_loss: 1548.3639; val_lambda_latent_contrastive_instanceloss: 6.3354; val_lambda_latent_contrastive_clusterloss: 2.2099; val_global_loss: 517171.3984; val_optim_loss: 517171.3984
Epoch 10/100 |██------------------| 10.0% val_auroc_score: 0.8931; val_auprc_score: 0.9150; val_best_acc_score: 0.8359; val_best_f1_score: 0.8270; train_kl_reg_loss: 292.8605; train_edge_recon_loss: 4982.7499; train_gene_expr_recon_loss: 532560.5868; train_lambda_latent_adj_recon_loss: 1577.1255; train_lambda_latent_contrastive_instanceloss: 6.3810; train_lambda_latent_contrastive_clusterloss: 2.2637; train_global_loss: 539421.9669; train_optim_loss: 539421.9669; val_kl_reg_loss: 248.0701; val_edge_recon_loss: 4458.2536; val_gene_expr_recon_loss: 510130.5156; val_lambda_latent_adj_recon_loss: 1589.7175; val_lambda_latent_contrastive_instanceloss: 6.3374; val_lambda_latent_contrastive_clusterloss: 2.1822; val_global_loss: 516435.0703; val_optim_loss: 516435.0703
Epoch 11/100 |██------------------| 11.0% val_auroc_score: 0.8961; val_auprc_score: 0.9165; val_best_acc_score: 0.8388; val_best_f1_score: 0.8299; train_kl_reg_loss: 295.9002; train_edge_recon_loss: 4969.2420; train_gene_expr_recon_loss: 529195.2411; train_lambda_latent_adj_recon_loss: 1546.5997; train_lambda_latent_contrastive_instanceloss: 6.3753; train_lambda_latent_contrastive_clusterloss: 2.2393; train_global_loss: 536015.5998; train_optim_loss: 536015.5998; val_kl_reg_loss: 244.7273; val_edge_recon_loss: 4457.2879; val_gene_expr_recon_loss: 512043.1133; val_lambda_latent_adj_recon_loss: 1519.4812; val_lambda_latent_contrastive_instanceloss: 6.3323; val_lambda_latent_contrastive_clusterloss: 2.1609; val_global_loss: 518273.0977; val_optim_loss: 518273.0977
Epoch 12/100 |██------------------| 12.0% val_auroc_score: 0.8965; val_auprc_score: 0.9170; val_best_acc_score: 0.8384; val_best_f1_score: 0.8299; train_kl_reg_loss: 290.2068; train_edge_recon_loss: 4958.8105; train_gene_expr_recon_loss: 527256.4380; train_lambda_latent_adj_recon_loss: 1502.3812; train_lambda_latent_contrastive_instanceloss: 6.3720; train_lambda_latent_contrastive_clusterloss: 2.2209; train_global_loss: 534016.4272; train_optim_loss: 534016.4272; val_kl_reg_loss: 252.6359; val_edge_recon_loss: 4451.9817; val_gene_expr_recon_loss: 508041.3281; val_lambda_latent_adj_recon_loss: 1524.9273; val_lambda_latent_contrastive_instanceloss: 6.3239; val_lambda_latent_contrastive_clusterloss: 2.1251; val_global_loss: 514279.3203; val_optim_loss: 514279.3203
Epoch 13/100 |██------------------| 13.0% val_auroc_score: 0.8998; val_auprc_score: 0.9198; val_best_acc_score: 0.8424; val_best_f1_score: 0.8330; train_kl_reg_loss: 301.1507; train_edge_recon_loss: 4951.7350; train_gene_expr_recon_loss: 526170.7831; train_lambda_latent_adj_recon_loss: 1523.9489; train_lambda_latent_contrastive_instanceloss: 6.3681; train_lambda_latent_contrastive_clusterloss: 2.2055; train_global_loss: 532956.1959; train_optim_loss: 532956.1959; val_kl_reg_loss: 244.9166; val_edge_recon_loss: 4446.4363; val_gene_expr_recon_loss: 508866.7656; val_lambda_latent_adj_recon_loss: 1513.7197; val_lambda_latent_contrastive_instanceloss: 6.3230; val_lambda_latent_contrastive_clusterloss: 2.1247; val_global_loss: 515080.2930; val_optim_loss: 515080.2930
Epoch 14/100 |██------------------| 14.0% val_auroc_score: 0.8963; val_auprc_score: 0.9178; val_best_acc_score: 0.8417; val_best_f1_score: 0.8318; train_kl_reg_loss: 291.3508; train_edge_recon_loss: 4951.6723; train_gene_expr_recon_loss: 525927.7659; train_lambda_latent_adj_recon_loss: 1480.0718; train_lambda_latent_contrastive_instanceloss: 6.3687; train_lambda_latent_contrastive_clusterloss: 2.1943; train_global_loss: 532659.4361; train_optim_loss: 532659.4361; val_kl_reg_loss: 252.7811; val_edge_recon_loss: 4446.4618; val_gene_expr_recon_loss: 503508.1758; val_lambda_latent_adj_recon_loss: 1509.3852; val_lambda_latent_contrastive_instanceloss: 6.3274; val_lambda_latent_contrastive_clusterloss: 2.1220; val_global_loss: 509725.2578; val_optim_loss: 509725.2578
Epoch 15/100 |███-----------------| 15.0% val_auroc_score: 0.9002; val_auprc_score: 0.9203; val_best_acc_score: 0.8422; val_best_f1_score: 0.8337; train_kl_reg_loss: 285.8749; train_edge_recon_loss: 4943.2590; train_gene_expr_recon_loss: 525165.6632; train_lambda_latent_adj_recon_loss: 1443.0932; train_lambda_latent_contrastive_instanceloss: 6.3647; train_lambda_latent_contrastive_clusterloss: 2.1833; train_global_loss: 531846.4496; train_optim_loss: 531846.4496; val_kl_reg_loss: 224.6013; val_edge_recon_loss: 4416.8882; val_gene_expr_recon_loss: 505968.2500; val_lambda_latent_adj_recon_loss: 1393.3304; val_lambda_latent_contrastive_instanceloss: 6.3290; val_lambda_latent_contrastive_clusterloss: 2.1134; val_global_loss: 512011.5117; val_optim_loss: 512011.5117
Epoch 16/100 |███-----------------| 16.0% val_auroc_score: 0.9030; val_auprc_score: 0.9240; val_best_acc_score: 0.8481; val_best_f1_score: 0.8384; train_kl_reg_loss: 285.2293; train_edge_recon_loss: 4943.5495; train_gene_expr_recon_loss: 524888.5089; train_lambda_latent_adj_recon_loss: 1435.2312; train_lambda_latent_contrastive_instanceloss: 6.3615; train_lambda_latent_contrastive_clusterloss: 2.1716; train_global_loss: 531561.0555; train_optim_loss: 531561.0555; val_kl_reg_loss: 236.5831; val_edge_recon_loss: 4435.7971; val_gene_expr_recon_loss: 511203.5156; val_lambda_latent_adj_recon_loss: 1454.1355; val_lambda_latent_contrastive_instanceloss: 6.3278; val_lambda_latent_contrastive_clusterloss: 2.1142; val_global_loss: 517338.4727; val_optim_loss: 517338.4727
Epoch 17/100 |███-----------------| 17.0% val_auroc_score: 0.9034; val_auprc_score: 0.9230; val_best_acc_score: 0.8467; val_best_f1_score: 0.8380; train_kl_reg_loss: 295.6101; train_edge_recon_loss: 4942.3230; train_gene_expr_recon_loss: 522807.2085; train_lambda_latent_adj_recon_loss: 1461.6460; train_lambda_latent_contrastive_instanceloss: 6.3606; train_lambda_latent_contrastive_clusterloss: 2.1564; train_global_loss: 529515.3074; train_optim_loss: 529515.3074; val_kl_reg_loss: 247.3454; val_edge_recon_loss: 4424.4774; val_gene_expr_recon_loss: 501113.3594; val_lambda_latent_adj_recon_loss: 1437.2095; val_lambda_latent_contrastive_instanceloss: 6.3199; val_lambda_latent_contrastive_clusterloss: 2.1066; val_global_loss: 507230.8242; val_optim_loss: 507230.8242
Epoch 18/100 |███-----------------| 18.0% val_auroc_score: 0.9023; val_auprc_score: 0.9220; val_best_acc_score: 0.8428; val_best_f1_score: 0.8346; train_kl_reg_loss: 296.2562; train_edge_recon_loss: 4928.3164; train_gene_expr_recon_loss: 523123.4981; train_lambda_latent_adj_recon_loss: 1441.5480; train_lambda_latent_contrastive_instanceloss: 6.3559; train_lambda_latent_contrastive_clusterloss: 2.1503; train_global_loss: 529798.1283; train_optim_loss: 529798.1283; val_kl_reg_loss: 252.3596; val_edge_recon_loss: 4441.3446; val_gene_expr_recon_loss: 508097.1133; val_lambda_latent_adj_recon_loss: 1453.0274; val_lambda_latent_contrastive_instanceloss: 6.3211; val_lambda_latent_contrastive_clusterloss: 2.0710; val_global_loss: 514252.2422; val_optim_loss: 514252.2422
Epoch 19/100 |███-----------------| 19.0% val_auroc_score: 0.9030; val_auprc_score: 0.9215; val_best_acc_score: 0.8447; val_best_f1_score: 0.8360; train_kl_reg_loss: 303.9804; train_edge_recon_loss: 4928.4915; train_gene_expr_recon_loss: 521529.1665; train_lambda_latent_adj_recon_loss: 1439.9777; train_lambda_latent_contrastive_instanceloss: 6.3528; train_lambda_latent_contrastive_clusterloss: 2.1325; train_global_loss: 528210.1129; train_optim_loss: 528210.1129; val_kl_reg_loss: 251.3782; val_edge_recon_loss: 4421.6002; val_gene_expr_recon_loss: 502723.9062; val_lambda_latent_adj_recon_loss: 1397.3658; val_lambda_latent_contrastive_instanceloss: 6.3165; val_lambda_latent_contrastive_clusterloss: 2.0860; val_global_loss: 508802.6523; val_optim_loss: 508802.6523
Epoch 20/100 |████----------------| 20.0% val_auroc_score: 0.9049; val_auprc_score: 0.9237; val_best_acc_score: 0.8466; val_best_f1_score: 0.8377; train_kl_reg_loss: 298.4227; train_edge_recon_loss: 4924.6892; train_gene_expr_recon_loss: 521198.7281; train_lambda_latent_adj_recon_loss: 1411.4458; train_lambda_latent_contrastive_instanceloss: 6.3539; train_lambda_latent_contrastive_clusterloss: 2.1450; train_global_loss: 527841.7854; train_optim_loss: 527841.7854; val_kl_reg_loss: 242.6093; val_edge_recon_loss: 4420.7876; val_gene_expr_recon_loss: 509210.9922; val_lambda_latent_adj_recon_loss: 1398.2573; val_lambda_latent_contrastive_instanceloss: 6.3150; val_lambda_latent_contrastive_clusterloss: 2.0734; val_global_loss: 515281.0430; val_optim_loss: 515281.0430
Epoch 21/100 |████----------------| 21.0% val_auroc_score: 0.9078; val_auprc_score: 0.9263; val_best_acc_score: 0.8495; val_best_f1_score: 0.8414; train_kl_reg_loss: 298.6156; train_edge_recon_loss: 4921.0351; train_gene_expr_recon_loss: 520087.0401; train_lambda_latent_adj_recon_loss: 1415.8348; train_lambda_latent_contrastive_instanceloss: 6.3515; train_lambda_latent_contrastive_clusterloss: 2.1229; train_global_loss: 526731.0033; train_optim_loss: 526731.0033; val_kl_reg_loss: 244.2098; val_edge_recon_loss: 4419.9619; val_gene_expr_recon_loss: 505643.0352; val_lambda_latent_adj_recon_loss: 1373.9524; val_lambda_latent_contrastive_instanceloss: 6.3196; val_lambda_latent_contrastive_clusterloss: 2.0759; val_global_loss: 511689.5391; val_optim_loss: 511689.5391
Reducing learning rate: metric has not improved more than 0.0 in the last 4 epochs.
New learning rate is 0.0001.
Epoch 22/100 |████----------------| 22.0% val_auroc_score: 0.9079; val_auprc_score: 0.9268; val_best_acc_score: 0.8495; val_best_f1_score: 0.8414; train_kl_reg_loss: 296.6114; train_edge_recon_loss: 4913.8968; train_gene_expr_recon_loss: 517173.4622; train_lambda_latent_adj_recon_loss: 1373.3456; train_lambda_latent_contrastive_instanceloss: 6.3477; train_lambda_latent_contrastive_clusterloss: 2.1230; train_global_loss: 523765.7906; train_optim_loss: 523765.7906; val_kl_reg_loss: 233.6331; val_edge_recon_loss: 4394.0042; val_gene_expr_recon_loss: 502743.5547; val_lambda_latent_adj_recon_loss: 1336.0905; val_lambda_latent_contrastive_instanceloss: 6.3144; val_lambda_latent_contrastive_clusterloss: 2.0668; val_global_loss: 508715.6602; val_optim_loss: 508715.6602
Epoch 23/100 |████----------------| 23.0% val_auroc_score: 0.9069; val_auprc_score: 0.9259; val_best_acc_score: 0.8483; val_best_f1_score: 0.8396; train_kl_reg_loss: 301.2392; train_edge_recon_loss: 4906.6374; train_gene_expr_recon_loss: 516599.6287; train_lambda_latent_adj_recon_loss: 1386.9922; train_lambda_latent_contrastive_instanceloss: 6.3471; train_lambda_latent_contrastive_clusterloss: 2.1182; train_global_loss: 523202.9646; train_optim_loss: 523202.9646; val_kl_reg_loss: 242.8831; val_edge_recon_loss: 4404.0029; val_gene_expr_recon_loss: 503694.2695; val_lambda_latent_adj_recon_loss: 1355.2806; val_lambda_latent_contrastive_instanceloss: 6.3104; val_lambda_latent_contrastive_clusterloss: 2.0523; val_global_loss: 509704.8086; val_optim_loss: 509704.8086
Epoch 24/100 |████----------------| 24.0% val_auroc_score: 0.9086; val_auprc_score: 0.9267; val_best_acc_score: 0.8502; val_best_f1_score: 0.8420; train_kl_reg_loss: 306.9987; train_edge_recon_loss: 4906.5948; train_gene_expr_recon_loss: 515721.0051; train_lambda_latent_adj_recon_loss: 1385.8733; train_lambda_latent_contrastive_instanceloss: 6.3446; train_lambda_latent_contrastive_clusterloss: 2.1204; train_global_loss: 522328.9398; train_optim_loss: 522328.9398; val_kl_reg_loss: 250.2436; val_edge_recon_loss: 4398.5364; val_gene_expr_recon_loss: 503862.2031; val_lambda_latent_adj_recon_loss: 1382.0446; val_lambda_latent_contrastive_instanceloss: 6.3199; val_lambda_latent_contrastive_clusterloss: 2.0455; val_global_loss: 509901.3828; val_optim_loss: 509901.3828
Epoch 25/100 |█████---------------| 25.0% val_auroc_score: 0.9080; val_auprc_score: 0.9265; val_best_acc_score: 0.8482; val_best_f1_score: 0.8414; train_kl_reg_loss: 314.3502; train_edge_recon_loss: 4903.8369; train_gene_expr_recon_loss: 517129.7495; train_lambda_latent_adj_recon_loss: 1401.8595; train_lambda_latent_contrastive_instanceloss: 6.3460; train_lambda_latent_contrastive_clusterloss: 2.1140; train_global_loss: 523758.2575; train_optim_loss: 523758.2575; val_kl_reg_loss: 252.1183; val_edge_recon_loss: 4395.6864; val_gene_expr_recon_loss: 498417.9414; val_lambda_latent_adj_recon_loss: 1355.1604; val_lambda_latent_contrastive_instanceloss: 6.3055; val_lambda_latent_contrastive_clusterloss: 2.0429; val_global_loss: 504429.2539; val_optim_loss: 504429.2539
Epoch 26/100 |█████---------------| 26.0% val_auroc_score: 0.9094; val_auprc_score: 0.9265; val_best_acc_score: 0.8508; val_best_f1_score: 0.8432; train_kl_reg_loss: 321.0471; train_edge_recon_loss: 4900.5190; train_gene_expr_recon_loss: 516130.0588; train_lambda_latent_adj_recon_loss: 1412.2932; train_lambda_latent_contrastive_instanceloss: 6.3432; train_lambda_latent_contrastive_clusterloss: 2.1062; train_global_loss: 522772.3657; train_optim_loss: 522772.3657; val_kl_reg_loss: 256.5296; val_edge_recon_loss: 4388.4486; val_gene_expr_recon_loss: 500430.6328; val_lambda_latent_adj_recon_loss: 1343.0289; val_lambda_latent_contrastive_instanceloss: 6.3153; val_lambda_latent_contrastive_clusterloss: 2.0634; val_global_loss: 506427.0117; val_optim_loss: 506427.0117
Epoch 27/100 |█████---------------| 27.0% val_auroc_score: 0.9099; val_auprc_score: 0.9281; val_best_acc_score: 0.8512; val_best_f1_score: 0.8438; train_kl_reg_loss: 322.1430; train_edge_recon_loss: 4898.4001; train_gene_expr_recon_loss: 515153.5536; train_lambda_latent_adj_recon_loss: 1404.5756; train_lambda_latent_contrastive_instanceloss: 6.3393; train_lambda_latent_contrastive_clusterloss: 2.1024; train_global_loss: 521787.1152; train_optim_loss: 521787.1152; val_kl_reg_loss: 257.4874; val_edge_recon_loss: 4391.9070; val_gene_expr_recon_loss: 505991.1016; val_lambda_latent_adj_recon_loss: 1348.7751; val_lambda_latent_contrastive_instanceloss: 6.3120; val_lambda_latent_contrastive_clusterloss: 2.0559; val_global_loss: 511997.6367; val_optim_loss: 511997.6367
Epoch 28/100 |█████---------------| 28.0% val_auroc_score: 0.9098; val_auprc_score: 0.9278; val_best_acc_score: 0.8501; val_best_f1_score: 0.8421; train_kl_reg_loss: 325.8071; train_edge_recon_loss: 4897.5165; train_gene_expr_recon_loss: 516324.5163; train_lambda_latent_adj_recon_loss: 1412.2301; train_lambda_latent_contrastive_instanceloss: 6.3396; train_lambda_latent_contrastive_clusterloss: 2.1076; train_global_loss: 522968.5126; train_optim_loss: 522968.5126; val_kl_reg_loss: 257.3006; val_edge_recon_loss: 4390.4759; val_gene_expr_recon_loss: 505502.5625; val_lambda_latent_adj_recon_loss: 1376.8004; val_lambda_latent_contrastive_instanceloss: 6.3225; val_lambda_latent_contrastive_clusterloss: 2.0599; val_global_loss: 511535.5234; val_optim_loss: 511535.5234
Epoch 29/100 |█████---------------| 29.0% val_auroc_score: 0.9089; val_auprc_score: 0.9271; val_best_acc_score: 0.8508; val_best_f1_score: 0.8424; train_kl_reg_loss: 327.5898; train_edge_recon_loss: 4894.8562; train_gene_expr_recon_loss: 515555.4314; train_lambda_latent_adj_recon_loss: 1412.6009; train_lambda_latent_contrastive_instanceloss: 6.3408; train_lambda_latent_contrastive_clusterloss: 2.1047; train_global_loss: 522198.9282; train_optim_loss: 522198.9282; val_kl_reg_loss: 257.2934; val_edge_recon_loss: 4386.9654; val_gene_expr_recon_loss: 503678.5039; val_lambda_latent_adj_recon_loss: 1391.2138; val_lambda_latent_contrastive_instanceloss: 6.3122; val_lambda_latent_contrastive_clusterloss: 2.0511; val_global_loss: 509722.3438; val_optim_loss: 509722.3438
Reducing learning rate: metric has not improved more than 0.0 in the last 4 epochs.
New learning rate is 1e-05.
Epoch 30/100 |██████--------------| 30.0% val_auroc_score: 0.9104; val_auprc_score: 0.9286; val_best_acc_score: 0.8522; val_best_f1_score: 0.8441; train_kl_reg_loss: 331.1566; train_edge_recon_loss: 4893.3296; train_gene_expr_recon_loss: 515422.0555; train_lambda_latent_adj_recon_loss: 1408.6650; train_lambda_latent_contrastive_instanceloss: 6.3405; train_lambda_latent_contrastive_clusterloss: 2.1040; train_global_loss: 522063.6516; train_optim_loss: 522063.6516; val_kl_reg_loss: 259.2392; val_edge_recon_loss: 4378.0033; val_gene_expr_recon_loss: 502574.3398; val_lambda_latent_adj_recon_loss: 1362.4006; val_lambda_latent_contrastive_instanceloss: 6.3134; val_lambda_latent_contrastive_clusterloss: 2.0721; val_global_loss: 508582.3711; val_optim_loss: 508582.3711
Epoch 31/100 |██████--------------| 31.0% val_auroc_score: 0.9077; val_auprc_score: 0.9256; val_best_acc_score: 0.8500; val_best_f1_score: 0.8414; train_kl_reg_loss: 329.9439; train_edge_recon_loss: 4893.6683; train_gene_expr_recon_loss: 515213.6679; train_lambda_latent_adj_recon_loss: 1410.4909; train_lambda_latent_contrastive_instanceloss: 6.3404; train_lambda_latent_contrastive_clusterloss: 2.1039; train_global_loss: 521856.2183; train_optim_loss: 521856.2183; val_kl_reg_loss: 261.1753; val_edge_recon_loss: 4394.0333; val_gene_expr_recon_loss: 500577.7383; val_lambda_latent_adj_recon_loss: 1404.9992; val_lambda_latent_contrastive_instanceloss: 6.3130; val_lambda_latent_contrastive_clusterloss: 2.0408; val_global_loss: 506646.2930; val_optim_loss: 506646.2930
Epoch 32/100 |██████--------------| 32.0% val_auroc_score: 0.9089; val_auprc_score: 0.9272; val_best_acc_score: 0.8512; val_best_f1_score: 0.8421; train_kl_reg_loss: 330.7787; train_edge_recon_loss: 4894.3714; train_gene_expr_recon_loss: 514896.1311; train_lambda_latent_adj_recon_loss: 1412.8600; train_lambda_latent_contrastive_instanceloss: 6.3401; train_lambda_latent_contrastive_clusterloss: 2.0960; train_global_loss: 521542.5765; train_optim_loss: 521542.5765; val_kl_reg_loss: 260.0209; val_edge_recon_loss: 4390.2610; val_gene_expr_recon_loss: 500822.7734; val_lambda_latent_adj_recon_loss: 1365.0766; val_lambda_latent_contrastive_instanceloss: 6.3141; val_lambda_latent_contrastive_clusterloss: 2.0634; val_global_loss: 506846.5078; val_optim_loss: 506846.5078
Epoch 33/100 |██████--------------| 33.0% val_auroc_score: 0.9100; val_auprc_score: 0.9279; val_best_acc_score: 0.8518; val_best_f1_score: 0.8438; train_kl_reg_loss: 332.0043; train_edge_recon_loss: 4892.1082; train_gene_expr_recon_loss: 514247.2500; train_lambda_latent_adj_recon_loss: 1409.5115; train_lambda_latent_contrastive_instanceloss: 6.3378; train_lambda_latent_contrastive_clusterloss: 2.1037; train_global_loss: 520889.3176; train_optim_loss: 520889.3176; val_kl_reg_loss: 261.5237; val_edge_recon_loss: 4383.5261; val_gene_expr_recon_loss: 502154.4727; val_lambda_latent_adj_recon_loss: 1393.9978; val_lambda_latent_contrastive_instanceloss: 6.3086; val_lambda_latent_contrastive_clusterloss: 2.0519; val_global_loss: 508201.8828; val_optim_loss: 508201.8828
Stopping early: metric has not improved more than 0.0 in the last 8 epochs.
If the early stopping criterion is too strong, please instantiate it with different parameters in the train method.
Model training finished after 13 min 27 sec.
Using best model state, which was in epoch 25.
--- MODEL EVALUATION ---
val AUROC score: 0.9083
val AUPRC score: 0.9251
val best accuracy score: 0.8500
val best F1 score: 0.8419
val MSE score: 2.5068
[10]:
# Compute latent neighbor graph
latent_key = 'garfield_latent'
sc.pp.neighbors(model.adata,
use_rep=latent_key,
key_added=latent_key)
# Compute UMAP embedding
sc.tl.umap(model.adata,
neighbors_key=latent_key)
[11]:
# Compute latent Leiden clustering
latent_leiden_resolution = 1.2
latent_cluster_key = f"latent_leiden_{str(latent_leiden_resolution)}"
latent_key = "garfield_latent"
sc.tl.leiden(adata=model.adata,
resolution=latent_leiden_resolution,
key_added=latent_cluster_key,
neighbors_key=latent_key)
len(model.adata.obs[latent_cluster_key].unique())
[11]:
21
Visualize Garfield Latent Space
[12]:
sc.settings.set_figure_params(dpi=100, facecolor='white')
sc.pl.umap(model.adata, color=['celltype_mapped_refined', latent_cluster_key], show=True, wspace=0.35, size=3)
[15]:
## celltype
sc.pl.umap(model.adata, color=[ 'celltype_mapped_refined'], ncols=3,
wspace=0.5, show=True, legend_loc='on data', size=1, legend_fontsize=3)
[16]:
## batch information
sc.pl.umap(model.adata, color=['batch'], show=True)
[17]:
## sample information
sc.pl.umap(model.adata, color=['sample'], ncols=3,
wspace=0.5, show=True)
[20]:
### Display spatial location distribution by sample
import squidpy as sq
batch_indeces = list(model.adata.obs['sample'].unique())
for batch_idx in batch_indeces:
tmp = model.adata[model.adata.obs['sample'] == batch_idx, ]
# Visualize cell-level annotated data in physical space
sc.pl.embedding(tmp, basis="spatial",
color=["celltype_mapped_refined"],
ncols=1, wspace=0.20, edges=False)
sc.pl.embedding(tmp, basis="spatial",
color=[latent_cluster_key],
ncols=1, wspace=0.20, edges=False)
[21]:
# Save trained model
model_folder_path = f"{workdir}/model"
os.makedirs(model_folder_path, exist_ok=True)
model.save(dir_path=model_folder_path,
overwrite=True,
save_adata=True,
adata_file_name="adata_ref.h5ad")
Model saved successfully using pickle at /home/zhouweige/zhouwg_data/project/Garfield_tutorials/result/garfield_spRNA_embryo/model/attr.pkl
[ ]: