scMetaTraj workflow

Overview

scMetaTraj models metabolism as a continuous state space derived from pathway-level scores rather than a secondary annotation layered onto transcriptomic clustering. The package supports:

pathway/module scoring from a Seurat object or expression matrix
metabolic state space embedding
metabolic subclustering
metabolic pseudotime inference
trend and switchpoint analysis along metabolic pseudotime

This vignette uses a small simulated example so that it remains portable and does not depend on local files or large external datasets.

Simulate a small expression matrix

library(scMetaTraj)

set.seed(2026)

expr <- matrix(
  rexp(14 * 100, rate = 1),
  nrow = 14,
  ncol = 100,
  dimnames = list(
    c(
      "HK1", "PFKP", "LDHA", "GPI", "CS", "ACO2", "IDH3A",
      "NDUFA1", "COX4I1", "ATP5F1A", "G6PD", "PGD", "ACLY", "FASN"
    ),
    paste0("Cell", seq_len(100))
  )
)

gene_sets <- list(
  Glycolysis = c("HK1", "PFKP", "LDHA", "GPI"),
  TCA = c("CS", "ACO2", "IDH3A"),
  OXPHOS = c("NDUFA1", "COX4I1", "ATP5F1A"),
  PPP = c("G6PD", "PGD"),
  Lipid = c("ACLY", "FASN")
)

Score metabolic modules

scores <- scMetaTraj_score(
  x = expr,
  gene_sets = gene_sets,
  method = "mean",
  min_genes = 2,
  scale = FALSE
)

dim(scores)
#> [1] 100   5
colnames(scores)
#> [1] "Glycolysis" "TCA"        "OXPHOS"     "PPP"        "Lipid"

Embed cells in metabolic space

scMetaTraj_embed() returns PCA coordinates for analysis or UMAP coordinates for visualization.

emb_pca <- scMetaTraj_embed(scores, method = "PCA", n_pcs = 4)
emb_umap <- scMetaTraj_embed(scores, method = "UMAP", n_pcs = 4)

head(emb_pca)
#>            PC_1       PC_2       PC_3        PC_4
#> Cell1 -3.983702 -1.2790806 -0.2054983  1.07070128
#> Cell2 -3.575829  0.6662594  0.6151458 -0.65285530
#> Cell3 -2.444516 -1.1414303  0.2780598  0.45342107
#> Cell4 -1.033728 -0.4600383 -0.3559383 -0.19045996
#> Cell5 -2.124445 -0.4049355  0.2825772 -0.44693714
#> Cell6 -2.827332  0.8008135 -0.5891042 -0.09569424
head(emb_umap)
#>            UMAP_1     UMAP_2
#> Cell1 -2.52324668  0.9325116
#> Cell2  0.91663524  1.3856661
#> Cell3 -2.55133665  0.1458008
#> Cell4 -0.07271127 -2.8093976
#> Cell5 -0.40895394 -0.1414887
#> Cell6  1.90534898  0.6933472

Identify metabolic subclusters

clusters <- scMetaTraj_cluster(
  embedding = emb_pca,
  k = 12,
  method = "louvain"
)

table(clusters)
#> clusters
#>  1  2  3  4  5  6  7  8 
#> 15 17 13 15 13  9  6 12

Cluster-level summaries can be generated with scMetaTraj_cluster_profile().

profile_df <- scMetaTraj_cluster_profile(scores, clusters, stat = "mean")
head(profile_df)
#>   Glycolysis         TCA     OXPHOS        PPP        Lipid
#> 1 -0.3350895  0.02402246  0.3090559  0.1109407  0.001554353
#> 2 -0.4707361 -0.27938571  1.2995770 -0.2121779 -1.037172627
#> 3 -0.6831452 -0.97218353 -1.1779708  0.9208805 -0.836588113
#> 4 -1.2396555 -0.43853118  1.6424364 -0.1644185  0.873645316
#> 5 -0.5470256 -0.58601713 -0.4120486 -1.0319860 -0.058917271
#> 6  1.0608655 -0.44905091 -0.6571238  1.8405274 -0.150764644

Infer metabolic pseudotime

traj <- scMetaTraj_infer(
  embedding = emb_pca,
  k = 12,
  root_mode = "pc1_min"
)

summary(traj$mPT)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  0.0000  0.5103  0.5861  0.5775  0.6674  1.0000
traj$root
#> [1] "Cell41"

The mPT distribution helper prepares ordered cluster labels along the trajectory:

dist_df <- scMetaTraj_mPT_distribution(traj$mPT, clusters)
head(dist_df)
#>             mPT cluster
#> Cell1 0.2804346       1
#> Cell2 0.5562978       2
#> Cell3 0.3560710       3
#> Cell4 0.7039233       4
#> Cell5 0.4653615       1
#> Cell6 0.6197645       3

Track module trends along mPT

gly_trend <- scMetaTraj_trend(
  scores = scores[, "Glycolysis"],
  mPT = traj$mPT,
  n_bins = 20
)
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : pseudoinverse used at 0.175
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : neighborhood radius 0.15
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : reciprocal condition number 5.0105e-23
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : There are other near singularities as well. 0.01
#> Warning in sqrt(sum.squares/one.delta): NaNs produced

head(gly_trend)
#>   mPT_bin     score score_smooth
#> 1   0.025 2.3539834    2.3539834
#> 4   0.175 0.3462838    0.3462838
#> 6   0.275 0.7134656    0.7134656
#> 7   0.325 1.3146386    1.3146386
#> 8   0.375 1.3208347    1.3208347
#> 9   0.425 1.4181018    1.4181018

To compare several modules at once:

multi_res <- scMetaTraj_trend_multi(
  score_mat = scores,
  mPT = traj$mPT,
  modules = c("Glycolysis", "TCA", "OXPHOS"),
  n_bins = 20
)
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : pseudoinverse used at 0.175
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : neighborhood radius 0.15
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : reciprocal condition number 5.0105e-23
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : There are other near singularities as well. 0.01
#> Warning in sqrt(sum.squares/one.delta): NaNs produced
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : pseudoinverse used at 0.175
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : neighborhood radius 0.15
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : reciprocal condition number 5.0105e-23
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : There are other near singularities as well. 0.01
#> Warning in sqrt(sum.squares/one.delta): NaNs produced
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : pseudoinverse used at 0.175
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : neighborhood radius 0.15
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : reciprocal condition number 5.0105e-23
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
#> : There are other near singularities as well. 0.01
#> Warning in sqrt(sum.squares/one.delta): NaNs produced

head(multi_res$trend_long)
#>   mPT_bin     score score_smooth     module
#> 1   0.025 2.3539834    2.3539834 Glycolysis
#> 2   0.175 0.3462838    0.3462838 Glycolysis
#> 3   0.275 0.7134656    0.7134656 Glycolysis
#> 4   0.325 1.3146386    1.3146386 Glycolysis
#> 5   0.375 1.3208347    1.3208347 Glycolysis
#> 6   0.425 1.4181018    1.4181018 Glycolysis
multi_res$switchpoints
#>       module mPT_switch index
#> 1 Glycolysis      0.175     2
#> 2        TCA      0.175     2
#> 3     OXPHOS      0.875    15

scMetaTraj_plot_trend_multi(
  multi_res$trend_long,
  multi_res$switchpoints
)

Example trend plot for several metabolic modules.

Interpret results

The workflow above illustrates the intended package logic:

summarize gene expression into curated metabolic modules
analyze cells in module-defined space rather than transcriptome-wide space
reconstruct graph-based metabolic pseudotime
quantify where module activity changes along the inferred trajectory

In real analyses, the same workflow can be applied to Seurat objects and larger curated metabolic gene set collections, while keeping the vignette itself lightweight and fully reproducible.