scMetaTraj workflow

Overview

scMetaTraj models metabolism as a continuous state space derived from pathway-level scores rather than a secondary annotation layered onto transcriptomic clustering. The package supports:

  • pathway/module scoring from a Seurat object or expression matrix
  • metabolic state space embedding
  • metabolic subclustering
  • metabolic pseudotime inference
  • trend and switchpoint analysis along metabolic pseudotime

This vignette uses a small simulated example so that it remains portable and does not depend on local files or large external datasets.

Simulate a small expression matrix

library(scMetaTraj)

set.seed(2026)

expr <- matrix(
  rexp(14 * 100, rate = 1),
  nrow = 14,
  ncol = 100,
  dimnames = list(
    c(
      "HK1", "PFKP", "LDHA", "GPI", "CS", "ACO2", "IDH3A",
      "NDUFA1", "COX4I1", "ATP5F1A", "G6PD", "PGD", "ACLY", "FASN"
    ),
    paste0("Cell", seq_len(100))
  )
)

gene_sets <- list(
  Glycolysis = c("HK1", "PFKP", "LDHA", "GPI"),
  TCA = c("CS", "ACO2", "IDH3A"),
  OXPHOS = c("NDUFA1", "COX4I1", "ATP5F1A"),
  PPP = c("G6PD", "PGD"),
  Lipid = c("ACLY", "FASN")
)

Score metabolic modules

scores <- scMetaTraj_score(
  x = expr,
  gene_sets = gene_sets,
  method = "mean",
  min_genes = 2,
  scale = FALSE
)

dim(scores)
#> [1] 100   5
colnames(scores)
#> [1] "Glycolysis" "TCA"        "OXPHOS"     "PPP"        "Lipid"

Embed cells in metabolic space

scMetaTraj_embed() returns PCA coordinates for analysis or UMAP coordinates for visualization.

emb_pca <- scMetaTraj_embed(scores, method = "PCA", n_pcs = 4)
emb_umap <- scMetaTraj_embed(scores, method = "UMAP", n_pcs = 4)

head(emb_pca)
#>            PC_1       PC_2       PC_3        PC_4
#> Cell1 -3.983702 -1.2790806 -0.2054983  1.07070128
#> Cell2 -3.575829  0.6662594  0.6151458 -0.65285530
#> Cell3 -2.444516 -1.1414303  0.2780598  0.45342107
#> Cell4 -1.033728 -0.4600383 -0.3559383 -0.19045996
#> Cell5 -2.124445 -0.4049355  0.2825772 -0.44693714
#> Cell6 -2.827332  0.8008135 -0.5891042 -0.09569424
head(emb_umap)
#>            UMAP_1     UMAP_2
#> Cell1 -2.52324668  0.9325116
#> Cell2  0.91663524  1.3856661
#> Cell3 -2.55133665  0.1458008
#> Cell4 -0.07271127 -2.8093976
#> Cell5 -0.40895394 -0.1414887
#> Cell6  1.90534898  0.6933472

Identify metabolic subclusters

clusters <- scMetaTraj_cluster(
  embedding = emb_pca,
  k = 12,
  method = "louvain"
)

table(clusters)
#> clusters
#>  1  2  3  4  5  6  7  8 
#> 15 17 13 15 13  9  6 12

Cluster-level summaries can be generated with scMetaTraj_cluster_profile().

profile_df <- scMetaTraj_cluster_profile(scores, clusters, stat = "mean")
head(profile_df)
#>   Glycolysis         TCA     OXPHOS        PPP        Lipid
#> 1 -0.3350895  0.02402246  0.3090559  0.1109407  0.001554353
#> 2 -0.4707361 -0.27938571  1.2995770 -0.2121779 -1.037172627
#> 3 -0.6831452 -0.97218353 -1.1779708  0.9208805 -0.836588113
#> 4 -1.2396555 -0.43853118  1.6424364 -0.1644185  0.873645316
#> 5 -0.5470256 -0.58601713 -0.4120486 -1.0319860 -0.058917271
#> 6  1.0608655 -0.44905091 -0.6571238  1.8405274 -0.150764644

Infer metabolic pseudotime

traj <- scMetaTraj_infer(
  embedding = emb_pca,
  k = 12,
  root_mode = "pc1_min"
)

summary(traj$mPT)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  0.0000  0.5103  0.5861  0.5775  0.6674  1.0000
traj$root
#> [1] "Cell41"

The mPT distribution helper prepares ordered cluster labels along the trajectory:

dist_df <- scMetaTraj_mPT_distribution(traj$mPT, clusters)
head(dist_df)
#>             mPT cluster
#> Cell1 0.2804346       1
#> Cell2 0.5562978       2
#> Cell3 0.3560710       3
#> Cell4 0.7039233       4
#> Cell5 0.4653615       1
#> Cell6 0.6197645       3

Interpret results

The workflow above illustrates the intended package logic:

  1. summarize gene expression into curated metabolic modules
  2. analyze cells in module-defined space rather than transcriptome-wide space
  3. reconstruct graph-based metabolic pseudotime
  4. quantify where module activity changes along the inferred trajectory

In real analyses, the same workflow can be applied to Seurat objects and larger curated metabolic gene set collections, while keeping the vignette itself lightweight and fully reproducible.