1. Quality Control

This document takes the cellranger count data for two samples (a WT and an Ifng CBS mutant mice) and performs standard single cell rnaseq analysis.

# Install hdf5 library if its not already installed
#sudo apt-get install libhdf5-dev or brew install hdf5
#install.packages("hdf5r")

# Load below libraries
library(Seurat)
library(ggplot2)
library(plotly)
library(tidyverse)
library(cowplot)
library(scProportionTest)
library(rollama)
library(SCassist)
library(httr)
library(jsonlite)

1.1. Data setup

# Set the input, output paths
setwd("/singlecellassistant/")
datapath="exampleData/LCMV"
outputpath="/singlecellassistant/exampleOutput_G/LCMV/"

# Load RData for example dataset
load("/singlecellassistant/exampleOutput_G/LCMV/LCMV.RData")

# Specify samples for analysis.
# Define LCMV Day4, CD4, CD8, NK samples
samples=c("WT","KO")

# Read the data file
WT <- Read10X_h5(file.path("/singlecellassistant/exampleData/LCMV/GSM6625298_scRNA_LCMV_Day4_CD4_CD8_NK_WT_filtered_feature_bc_matrix.h5"), use.names = T)

KO <- Read10X_h5(file.path("/singlecellassistant/exampleData/LCMV/GSM6625299_scRNA_LCMV_Day4_CD4_CD8_NK_KO_filtered_feature_bc_matrix.h5"), use.names = T)

# Add sample names to columnnames
colnames(WT$`Gene Expression`) <- paste(sapply(strsplit(colnames(WT$`Gene Expression`),split="-"),'[[',1L),"WT",sep="-")

colnames(KO$`Gene Expression`) <- paste(sapply(strsplit(colnames(KO$`Gene Expression`),split="-"),'[[',1L),"KO",sep="-")

# Create a Seurat data object from the gex matrix
WT <- CreateSeuratObject(counts = WT[["Gene Expression"]], names.field = 2,names.delim = "\\-")

KO <- CreateSeuratObject(counts = KO[["Gene Expression"]], names.field = 2,names.delim = "\\-")

# Merge seurat objects
allsamples <- merge(WT, KO, project = "LCMV")
allsamples
## An object of class Seurat
## 32285 features across 10551 samples within 1 assay
## Active assay: RNA (32285 features, 0 variable features)
##  2 layers present: counts.1, counts.2
head(allsamples)
##                     orig.ident nCount_RNA nFeature_RNA
## AAACCCAAGAGTTGTA-WT         WT      17880         4120
## AAACCCAAGATTTGCC-WT         WT       7916         2395
## AAACCCAAGTTGTAGA-WT         WT      35993         5094
## AAACCCACAAGTATCC-WT         WT       9059         2865
## AAACCCAGTCCAGGTC-WT         WT      10948         2939
## AAACCCATCCTGGGAC-WT         WT      16768         3484
## AAACGAACAGAGAGGG-WT         WT      10467         3159
## AAACGAACAGCGACAA-WT         WT      12996         2853
## AAACGAACAGTGTGGA-WT         WT      26674         4654
## AAACGAATCTAGCCTC-WT         WT      11434         3100
table(Idents(allsamples))
##
##   WT   KO
## 5666 4885
# Merge counts data
allsamples = JoinLayers(allsamples)

1.2. Quality Analysis

This part of the workflow uses standard plots to identify potential filtering cutoff values.

# Generate Percent mito for each of the cells
grep("^mt-",rownames(allsamples@assays$RNA$counts),value = TRUE)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3"
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
allsamples$percent.mito <- PercentageFeatureSet(allsamples, pattern = "^mt-")
summary(allsamples$percent.mito)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
##   0.000   1.819   2.425   5.053   3.310  83.600
head(allsamples)
##                     orig.ident nCount_RNA nFeature_RNA percent.mito
## AAACCCAAGAGTTGTA-WT         WT      17880         4120     3.333333
## AAACCCAAGATTTGCC-WT         WT       7916         2395     2.627590
## AAACCCAAGTTGTAGA-WT         WT      35993         5094     2.008724
## AAACCCACAAGTATCC-WT         WT       9059         2865     1.953858
## AAACCCAGTCCAGGTC-WT         WT      10948         2939     2.831567
## AAACCCATCCTGGGAC-WT         WT      16768         3484     0.942271
## AAACGAACAGAGAGGG-WT         WT      10467         3159     3.668673
## AAACGAACAGCGACAA-WT         WT      12996         2853     1.569714
## AAACGAACAGTGTGGA-WT         WT      26674         4654     1.570818
## AAACGAATCTAGCCTC-WT         WT      11434         3100     1.687948
# Generate Percent ribo for each of the cells
allsamples$percent.ribo <- PercentageFeatureSet(allsamples, pattern = "^Rp[sl]")
summary(allsamples$percent.ribo)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
##   2.002  18.156  24.731  24.894  32.868  56.275
head(allsamples)
##                     orig.ident nCount_RNA nFeature_RNA percent.mito
## AAACCCAAGAGTTGTA-WT         WT      17880         4120     3.333333
## AAACCCAAGATTTGCC-WT         WT       7916         2395     2.627590
## AAACCCAAGTTGTAGA-WT         WT      35993         5094     2.008724
## AAACCCACAAGTATCC-WT         WT       9059         2865     1.953858
## AAACCCAGTCCAGGTC-WT         WT      10948         2939     2.831567
## AAACCCATCCTGGGAC-WT         WT      16768         3484     0.942271
## AAACGAACAGAGAGGG-WT         WT      10467         3159     3.668673
## AAACGAACAGCGACAA-WT         WT      12996         2853     1.569714
## AAACGAACAGTGTGGA-WT         WT      26674         4654     1.570818
## AAACGAATCTAGCCTC-WT         WT      11434         3100     1.687948
##                     percent.ribo
## AAACCCAAGAGTTGTA-WT     22.80761
## AAACCCAAGATTTGCC-WT     16.28348
## AAACCCAAGTTGTAGA-WT     21.85147
## AAACCCACAAGTATCC-WT     16.81201
## AAACCCAGTCCAGGTC-WT     25.30142
## AAACCCATCCTGGGAC-WT     35.07276
## AAACGAACAGAGAGGG-WT     22.92921
## AAACGAACAGCGACAA-WT     42.22068
## AAACGAACAGTGTGGA-WT     26.22029
## AAACGAATCTAGCCTC-WT     28.44149
# Generate Percent hemoglobin for each of the cells
allsamples$percent.hb <- PercentageFeatureSet(allsamples, pattern = "^Hb[^(p)]")
summary(allsamples$percent.hb)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.
## 0.000000 0.000000 0.000000 0.004744 0.005529 2.162162
head(allsamples)
##                     orig.ident nCount_RNA nFeature_RNA percent.mito
## AAACCCAAGAGTTGTA-WT         WT      17880         4120     3.333333
## AAACCCAAGATTTGCC-WT         WT       7916         2395     2.627590
## AAACCCAAGTTGTAGA-WT         WT      35993         5094     2.008724
## AAACCCACAAGTATCC-WT         WT       9059         2865     1.953858
## AAACCCAGTCCAGGTC-WT         WT      10948         2939     2.831567
## AAACCCATCCTGGGAC-WT         WT      16768         3484     0.942271
## AAACGAACAGAGAGGG-WT         WT      10467         3159     3.668673
## AAACGAACAGCGACAA-WT         WT      12996         2853     1.569714
## AAACGAACAGTGTGGA-WT         WT      26674         4654     1.570818
## AAACGAATCTAGCCTC-WT         WT      11434         3100     1.687948
##                     percent.ribo  percent.hb
## AAACCCAAGAGTTGTA-WT     22.80761 0.011185682
## AAACCCAAGATTTGCC-WT     16.28348 0.000000000
## AAACCCAAGTTGTAGA-WT     21.85147 0.002778318
## AAACCCACAAGTATCC-WT     16.81201 0.000000000
## AAACCCAGTCCAGGTC-WT     25.30142 0.009134088
## AAACCCATCCTGGGAC-WT     35.07276 0.005963740
## AAACGAACAGAGAGGG-WT     22.92921 0.009553836
## AAACGAACAGCGACAA-WT     42.22068 0.000000000
## AAACGAACAGTGTGGA-WT     26.22029 0.000000000
## AAACGAATCTAGCCTC-WT     28.44149 0.000000000
# Check the total number of cells in this experiment
table(allsamples$orig.ident)
##
##   KO   WT
## 4885 5666

1.3. Quality Filtering

Here we look at the distribution of the data on MT, Rb and Hb genes and identify the cutoff values to filter and visualize the before and after QC data.

# Generate before filtering quality plots
qcbefore=VlnPlot(allsamples,features = c("nFeature_RNA", "nCount_RNA","percent.mito","percent.ribo"),ncol = 2, pt.size = 0) +
  NoLegend()
qcbefore

# Save the plot in current working directory
ggsave(paste(outputpath,"LCMV-before-qc-violineplot.pdf",sep=""), plot = qcbefore, width = 15, height = 20, units = "cm")

# Filter data based on the above plot.
allsamplesgood <- subset(allsamples, subset =  nCount_RNA > 2000 & nCount_RNA < 37000 & nFeature_RNA > 500 & nFeature_RNA < 6500 & percent.mito < 8 & percent.ribo < 50 & percent.hb < 2)

# Counts before filtering
table(allsamples$orig.ident)
##
##   KO   WT
## 4885 5666
# Counts after filtering
table(allsamplesgood$orig.ident)
##
##   KO   WT
## 4130 4740

1.4. Generate after QC plots

# Generate after filtering quality plots
qcafter=VlnPlot(allsamplesgood,features = c("nFeature_RNA", "nCount_RNA","percent.mito","percent.ribo"),ncol = 2, pt.size = 0) +
  NoLegend()
qcafter

# Save the plot in current working directory
ggsave(paste(outputpath,"LCMV-after-qc-violineplot.pdf",sep=""), plot = qcafter, width = 15, height = 20, units = "cm")

2. Normalization

Here we use the standard normalization method.

# Cell level normalization - accounts for sequencing depth
allsamplesgood <- NormalizeData(
  object = allsamplesgood,
  normalization.method = "LogNormalize",
  scale.factor = 10000)

3. Variable features

Here we identify variable genes and perform scaling, including regressing to remove any technical variations

##################### Find variable genes
# Returns top 2000 variable genes
# VST is uses LOESS method
# FindVariableFeatures needs normalization
allsamplesgood <- FindVariableFeatures(
  object = allsamplesgood,
  selection.method = "vst")
top10 <- head(VariableFeatures(allsamplesgood), 10)
top10
##  [1] "Ccl4"      "Hist1h1b"  "Xcl1"      "Hist1h2ap" "Hist1h2ae" "Spp1"
##  [7] "Hist1h3c"  "Ccl3"      "H2-Aa"     "Cxcl10"
## Scaling & Batch Correction
##################### SCALE DATA
# PCA needs scaled data
# Zero centers and scales (mean/sd) gene/feature data in each cell (for across sample comparison), so extreme ranges in expression do not affect clustering (done for making cells with similar expression cluster together)
dim(allsamplesgood)
## [1] 32285  8870
allsamplesgood <- ScaleData(
  object = allsamplesgood,
  # Scale all genes - not just variable genes
  features = rownames(allsamplesgood),
  # Remove unwanted sources of variation (technical, batch etc.)
  vars.to.regress = c("percent.mito", "nFeature_RNA", "percent.ribo", "nCount_RNA"))

# List top 10 variable genes
top10 <- head(VariableFeatures(allsamplesgood), 10)
top10
##  [1] "Ccl4"      "Hist1h1b"  "Xcl1"      "Hist1h2ap" "Hist1h2ae" "Spp1"
##  [7] "Hist1h3c"  "Ccl3"      "H2-Aa"     "Cxcl10"
# Plot variable features, label the top 10, save the plot
vfp1 <- VariableFeaturePlot(allsamplesgood)
vfp1 <- LabelPoints(plot = vfp1, points = top10, repel = TRUE)
vfp1

ggsave(paste(outputpath,"LCMV-variablefeatureplot.pdf",sep=""), plot = vfp1, width = 20, height = 15, units = "cm")

4. PCA

Here we perform PCA to use for downstream analysis

# Run PCA to generate 50 pcs
allsamplesgood <- RunPCA(object = allsamplesgood, npcs=50)

# Print genes in the top 5 pcs
print(allsamplesgood[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1
## Positive:  Nusap1, Ccna2, Knl1, Top2a, Birc5
## Negative:  Hopx, Bcl2a1b, Ctsb, Tnfrsf1b, Ly6a
## PC_ 2
## Positive:  Klrb1c, Fcer1g, Nkg7, Klrd1, Plek
## Negative:  Inpp4b, Cd6, Hif1a, Rgs10, Tnfrsf4
## PC_ 3
## Positive:  Aif1, C1qa, Slc40a1, C1qb, Hmox1
## Negative:  Hopx, Tnfrsf18, S100a10, Cd6, Sdf4
## PC_ 4
## Positive:  E2f1, Cdc6, Lig1, Ccne2, Pcna
## Negative:  Ube2c, Cenpf, Ccnb1, Cdc20, Aspm
## PC_ 5
## Positive:  Gm42418, Ly6c1, Nsg2, Actn1, Sell
## Negative:  Tnfrsf4, Tnfrsf18, Foxp3, Maf, Ctla4

4.1. Choosing appropriate number of PCs

Here we look at the elbow plot and identify the number of PCs to use for downstream analysis

# Identify number of PCs that explains majority of variations
ElbowPlot(allsamplesgood, ndims=50, reduction = "pca")

# Visualize the genes in the first PC
VizDimLoadings(allsamplesgood, dims = 1, ncol = 1) + theme_minimal(base_size = 8)

# Visualize the genes in the second PC
VizDimLoadings(allsamplesgood, dims = 2, ncol = 1) + theme_minimal(base_size = 8)

# Set the identity column to  WT vs KO
Idents(allsamplesgood) <- "orig.ident"

# Visualize the cells after pca
DimPlot(object = allsamplesgood, reduction = "pca")

# Plot heatmap with cells=500 plotting cells with extreme cells on both ends of spectrum
DimHeatmap(object = allsamplesgood, dims = 1:5, cells = 500, balanced = TRUE)

5. FindNeighbors

Here we run FindNeighbors function, given the number of pcs (looking at the elbow plot) we are going to use for the downstream analysis.

# Run findneighbors
allsamplesgood <- FindNeighbors(allsamplesgood, dims = 1:20)

6. Clustering

Here we do clustering, randomnly choosing some resolution numbers, to start with.

# Perform clustering using the different resolutions, to identify the numbers, to match the 23 clusters we got in the manuscript.
allsamplesgood <- FindClusters(allsamplesgood, resolution = seq(1.5,2,0.1))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8870
## Number of edges: 310666
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7853
## Number of communities: 19
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8870
## Number of edges: 310666
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7785
## Number of communities: 21
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8870
## Number of edges: 310666
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7710
## Number of communities: 22
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8870
## Number of edges: 310666
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7656
## Number of communities: 23
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8870
## Number of edges: 310666
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7596
## Number of communities: 23
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8870
## Number of edges: 310666
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7533
## Number of communities: 26
## Elapsed time: 0 seconds
head(allsamplesgood)
##                     orig.ident nCount_RNA nFeature_RNA percent.mito
## AAACCCAAGAGTTGTA-WT         WT      17880         4120     3.333333
## AAACCCAAGATTTGCC-WT         WT       7916         2395     2.627590
## AAACCCAAGTTGTAGA-WT         WT      35993         5094     2.008724
## AAACCCACAAGTATCC-WT         WT       9059         2865     1.953858
## AAACCCAGTCCAGGTC-WT         WT      10948         2939     2.831567
## AAACCCATCCTGGGAC-WT         WT      16768         3484     0.942271
## AAACGAACAGAGAGGG-WT         WT      10467         3159     3.668673
## AAACGAACAGCGACAA-WT         WT      12996         2853     1.569714
## AAACGAACAGTGTGGA-WT         WT      26674         4654     1.570818
## AAACGAATCTAGCCTC-WT         WT      11434         3100     1.687948
##                     percent.ribo  percent.hb RNA_snn_res.1.5 RNA_snn_res.1.6
## AAACCCAAGAGTTGTA-WT     22.80761 0.011185682              14              15
## AAACCCAAGATTTGCC-WT     16.28348 0.000000000               0               0
## AAACCCAAGTTGTAGA-WT     21.85147 0.002778318               9               8
## AAACCCACAAGTATCC-WT     16.81201 0.000000000              15              16
## AAACCCAGTCCAGGTC-WT     25.30142 0.009134088              13               5
## AAACCCATCCTGGGAC-WT     35.07276 0.005963740              12              13
## AAACGAACAGAGAGGG-WT     22.92921 0.009553836               2               2
## AAACGAACAGCGACAA-WT     42.22068 0.000000000               6               4
## AAACGAACAGTGTGGA-WT     26.22029 0.000000000               9               8
## AAACGAATCTAGCCTC-WT     28.44149 0.000000000              12              13
##                     RNA_snn_res.1.7 RNA_snn_res.1.8 RNA_snn_res.1.9
## AAACCCAAGAGTTGTA-WT              16              17              16
## AAACCCAAGATTTGCC-WT               1               1               1
## AAACCCAAGTTGTAGA-WT               9               9              10
## AAACCCACAAGTATCC-WT              19              19              20
## AAACCCAGTCCAGGTC-WT               7              16              13
## AAACCCATCCTGGGAC-WT              12              14              15
## AAACGAACAGAGAGGG-WT               2               2               2
## AAACGAACAGCGACAA-WT               4               3               4
## AAACGAACAGTGTGGA-WT               9               9              10
## AAACGAATCTAGCCTC-WT               6              10               8
##                     RNA_snn_res.2 seurat_clusters
## AAACCCAAGAGTTGTA-WT            15              15
## AAACCCAAGATTTGCC-WT             1               1
## AAACCCAAGTTGTAGA-WT            16              16
## AAACCCACAAGTATCC-WT            22              22
## AAACCCAGTCCAGGTC-WT            18              18
## AAACCCATCCTGGGAC-WT            12              12
## AAACGAACAGAGAGGG-WT             0               0
## AAACGAACAGCGACAA-WT             2               2
## AAACGAACAGTGTGGA-WT            17              17
## AAACGAATCTAGCCTC-WT            12              12
# Count number of clusters at each resolution
sapply(grep("res",colnames(allsamplesgood@meta.data),value = TRUE),
       function(x) length(unique(allsamplesgood@meta.data[,x])))
## RNA_snn_res.1.5 RNA_snn_res.1.6 RNA_snn_res.1.7 RNA_snn_res.1.8 RNA_snn_res.1.9
##              19              21              22              23              23
##   RNA_snn_res.2
##              26
# change default identity
Idents(allsamplesgood) <- "RNA_snn_res.1.8"

# list cell number in each cluster for HC vs UV
table(Idents(allsamplesgood),allsamplesgood$orig.ident)
##
##       KO  WT
##   0  620 250
##   1  384 397
##   2  382 335
##   3   68 540
##   4  323 284
##   5  348 163
##   6  321 173
##   7  280 182
##   8   86 343
##   9  138 263
##   10  83 313
##   11 193 197
##   12 183 177
##   13 157 172
##   14 165 125
##   15  96 194
##   16  66 198
##   17  48 189
##   18  50  73
##   19  45  65
##   20  45  45
##   21  38  48
##   22  11  14
set.seed(20)

# Run UMAP with identified dimensions
allsamplesgood <- RunUMAP(allsamplesgood, dims = 1:20)

# Plot umap
DimPlot(object = allsamplesgood, pt.size=0.5, reduction = "umap", label = T)

# Color by WT vs KO
DimPlot(object = allsamplesgood, pt.size=0.5, reduction = "umap", group.by = "orig.ident")

# Plot umap
dimplot1<-DimPlot(object = allsamplesgood, pt.size=0.5, reduction = "umap", label = T)

# Color by WT vs KO
dimplot2<-DimPlot(object = allsamplesgood, pt.size=0.5, reduction = "umap", group.by = "orig.ident")

# Save umaps
ggsave(paste(outputpath,"LCMV-umap-clusters.pdf",sep=""), plot = dimplot1, width = 20, height = 15, units = "cm")
ggsave(paste(outputpath,"LCMV-umap-clusters-type.pdf",sep=""), plot = dimplot2, width = 20, height = 15, units = "cm")

# Custom list of genes to visualize
mygenes = c("Ly6c1", "Tbc1d4","Foxp3","Klrg1", "Irf8","Fcer1g","Gm44175","Cd8a","Cd8b1","Mcm3","Dut","Pclaf","Cd4","Klrb1c")
RidgePlot(allsamplesgood, features = mygenes)

VlnPlot(allsamplesgood, features = mygenes, pt.size=0)

FeaturePlot(allsamplesgood, features = mygenes)

DotPlot(allsamplesgood, features = mygenes) + coord_flip()

DoHeatmap(subset(allsamplesgood), features = mygenes, size = 3)

7. Marker Identification

Here we run findallmarkers to identify potential markers that are unique for each of the clusters.

########################### Find all markers (about 80 minutes)
#markersall=FindAllMarkers(allsamplesgood)
head(markersall)
##         p_val avg_log2FC pct.1 pct.2 p_val_adj cluster    gene
## Fcgrt       0   4.664083 0.375 0.018         0       0   Fcgrt
## Gm44175     0   4.223099 0.339 0.017         0       0 Gm44175
## Cd8a        0   3.724056 0.963 0.109         0       0    Cd8a
## Bend4       0   3.676809 0.393 0.029         0       0   Bend4
## Lcn4        0   3.560143 0.516 0.052         0       0    Lcn4
## Cd8b1       0   3.516116 0.963 0.180         0       0   Cd8b1
# Save all markers as tab delimited file
write.table(markersall, file=paste(outputpath,"LCMV-markersall.txt",sep=""),quote=F, row.names = FALSE, sep = ",")

# Plot selected markers
VlnPlot(allsamplesgood,features=c("Ifitm3","Isg15", "Il17f","Tmem176b","Rorc"), pt.size = 0)

FeaturePlot(allsamplesgood, features="Isg15")

# Extract the top marker for each cluster
head(markersall)
##         p_val avg_log2FC pct.1 pct.2 p_val_adj cluster    gene
## Fcgrt       0   4.664083 0.375 0.018         0       0   Fcgrt
## Gm44175     0   4.223099 0.339 0.017         0       0 Gm44175
## Cd8a        0   3.724056 0.963 0.109         0       0    Cd8a
## Bend4       0   3.676809 0.393 0.029         0       0   Bend4
## Lcn4        0   3.560143 0.516 0.052         0       0    Lcn4
## Cd8b1       0   3.516116 0.963 0.180         0       0   Cd8b1
markersall %>%
  group_by(cluster) %>%
  slice(1) %>%
  pull(gene) -> best.gene.per.cluster

best.gene.per.cluster
##  [1] "Fcgrt"     "Cma1"      "Lrrc32"    "Ly6c1"     "Rps27"     "Gpm6b"
##  [7] "Tnfrsf4"   "Sell"      "Cd8b1"     "Hist1h2af" "Tmsb4x"    "Tcf7"
## [13] "Gm36723"   "Actn1"     "Gm14085"   "Lat2"      "Lad1"      "E2f1"
## [19] "Tlr13"     "Cdc20"     "Kcnj8"     "Il4"       "Cx3cl1"
# Store plot as ggplot list
vlnplot1<-VlnPlot(allsamplesgood,features=best.gene.per.cluster, pt.size = 0, combine = FALSE)

# Set properties for each plot in the list
p1 <- list()
for (i in seq_along(vlnplot1)){
  #Change x and y tick label font size.
  p1[[i]] = vlnplot1[[i]] +
    theme(legend.position = "none",
          axis.text.x = element_text(size = 8),
          axis.text.y = element_text(size = 8),
          axis.title.y = element_text(size = 6),
          axis.title.x = element_blank(),
          title = element_text(size=8))
}

# Plot all the plots in the list
plot_grid(plotlist = p1, ncol = 3)

# Save plot to file
vlnplot1=plot_grid(plotlist = p1, ncol = 3)
ggsave(paste(outputpath,"LCMV-vlnplot-best-per-cluster.pdf",sep=""), plot = vlnplot1, width = 20, height = 25, units = "cm")

# Plot features in umap
fplot1<-FeaturePlot(allsamplesgood,features=best.gene.per.cluster, pt.size = 0, combine = FALSE)
p2 <- list()
for (i in seq_along(fplot1)){
  #Change x and y label font size.
  p2[[i]] = fplot1[[i]] +
    theme(legend.position = "none",
          axis.text.x = element_text(size = 8),
          axis.text.y = element_text(size = 8),
          axis.title.y = element_blank(),
          axis.title.x = element_blank(),
          title = element_text(size=8))
}

plot_grid(plotlist = p2, ncol = 4)

fplot1=plot_grid(plotlist = p2, ncol = 4)
ggsave(paste(outputpath,"LCMV-fplot-best-per-cluster.pdf",sep=""), plot = fplot1, width = 20, height = 25, units = "cm")

7.1. Manual Annotation

Here we manually annotate the clusters based on known markers.

Idents(allsamplesgood) <- "RNA_snn_res.1.8"

# Read annotation file
myannotations=read.csv("/singlecellassistant/exampleData/LCMV/manualannotation.csv",sep=",")

# Map manual annotation
allsamplesgood$annotated_RNA_snn_res.1.8 <- plyr::mapvalues(
  x = allsamplesgood$RNA_snn_res.1.8,
  from = myannotations$clusternumber,
  to = myannotations$clustername
)

Idents(allsamplesgood) <- "annotated_RNA_snn_res.1.8"
Idents(allsamplesgood) <- "RNA_snn_res.1.8"

head(allsamplesgood)
##                     orig.ident nCount_RNA nFeature_RNA percent.mito
## AAACCCAAGAGTTGTA-WT         WT      17880         4120     3.333333
## AAACCCAAGATTTGCC-WT         WT       7916         2395     2.627590
## AAACCCAAGTTGTAGA-WT         WT      35993         5094     2.008724
## AAACCCACAAGTATCC-WT         WT       9059         2865     1.953858
## AAACCCAGTCCAGGTC-WT         WT      10948         2939     2.831567
## AAACCCATCCTGGGAC-WT         WT      16768         3484     0.942271
## AAACGAACAGAGAGGG-WT         WT      10467         3159     3.668673
## AAACGAACAGCGACAA-WT         WT      12996         2853     1.569714
## AAACGAACAGTGTGGA-WT         WT      26674         4654     1.570818
## AAACGAATCTAGCCTC-WT         WT      11434         3100     1.687948
##                     percent.ribo  percent.hb RNA_snn_res.1.5 RNA_snn_res.1.6
## AAACCCAAGAGTTGTA-WT     22.80761 0.011185682              14              15
## AAACCCAAGATTTGCC-WT     16.28348 0.000000000               0               0
## AAACCCAAGTTGTAGA-WT     21.85147 0.002778318               9               8
## AAACCCACAAGTATCC-WT     16.81201 0.000000000              15              16
## AAACCCAGTCCAGGTC-WT     25.30142 0.009134088              13               5
## AAACCCATCCTGGGAC-WT     35.07276 0.005963740              12              13
## AAACGAACAGAGAGGG-WT     22.92921 0.009553836               2               2
## AAACGAACAGCGACAA-WT     42.22068 0.000000000               6               4
## AAACGAACAGTGTGGA-WT     26.22029 0.000000000               9               8
## AAACGAATCTAGCCTC-WT     28.44149 0.000000000              12              13
##                     RNA_snn_res.1.7 RNA_snn_res.1.8 RNA_snn_res.1.9
## AAACCCAAGAGTTGTA-WT              16              17              16
## AAACCCAAGATTTGCC-WT               1               1               1
## AAACCCAAGTTGTAGA-WT               9               9              10
## AAACCCACAAGTATCC-WT              19              19              20
## AAACCCAGTCCAGGTC-WT               7              16              13
## AAACCCATCCTGGGAC-WT              12              14              15
## AAACGAACAGAGAGGG-WT               2               2               2
## AAACGAACAGCGACAA-WT               4               3               4
## AAACGAACAGTGTGGA-WT               9               9              10
## AAACGAATCTAGCCTC-WT               6              10               8
##                     RNA_snn_res.2 seurat_clusters annotated_RNA_snn_res.1.8
## AAACCCAAGAGTTGTA-WT            15              15                        NK
## AAACCCAAGATTTGCC-WT             1               1                        NK
## AAACCCAAGTTGTAGA-WT            16              16                        NK
## AAACCCACAAGTATCC-WT            22              22                        NK
## AAACCCAGTCCAGGTC-WT            18              18                      CD4+
## AAACCCATCCTGGGAC-WT            12              12                      CD4+
## AAACGAACAGAGAGGG-WT             0               0                      CD4+
## AAACGAACAGCGACAA-WT             2               2                      CD4+
## AAACGAACAGTGTGGA-WT            17              17                        NK
## AAACGAATCTAGCCTC-WT            12              12                      CD4+
# create and save umap with labels
an0=DimPlot(allsamplesgood, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'annotated_RNA_snn_res.1.8', split.by = "orig.ident")

an0a=an0+theme(legend.position="none")
an0a

ggsave(paste(outputpath,"LCMV-umap-with-sctype.pdf",sep=""),plot=an0a, width = 50, height = 30, units = "cm")

8. Proportion test

# Sample proportion table
# Proportion and counts of cells in individual samples

# Proportion test
# assign sample group
allsamplesgood$sample=ifelse(grepl("WT",allsamplesgood$orig.ident), "WT", ifelse(grepl("KO",allsamplesgood$orig.ident), "KO", ""))

prop.table(table(Idents(allsamplesgood), allsamplesgood$orig.ident))
##
##               KO          WT
##   0  0.069898534 0.028184893
##   1  0.043291995 0.044757610
##   2  0.043066516 0.037767756
##   3  0.007666291 0.060879369
##   4  0.036414882 0.032018038
##   5  0.039233371 0.018376550
##   6  0.036189402 0.019503946
##   7  0.031567080 0.020518602
##   8  0.009695603 0.038669673
##   9  0.015558061 0.029650507
##   10 0.009357384 0.035287486
##   11 0.021758737 0.022209696
##   12 0.020631342 0.019954904
##   13 0.017700113 0.019391206
##   14 0.018602029 0.014092446
##   15 0.010822999 0.021871477
##   16 0.007440812 0.022322435
##   17 0.005411499 0.021307779
##   18 0.005636979 0.008229989
##   19 0.005073281 0.007328072
##   20 0.005073281 0.005073281
##   21 0.004284104 0.005411499
##   22 0.001240135 0.001578354
table(Idents(allsamplesgood), allsamplesgood$orig.ident)
##
##       KO  WT
##   0  620 250
##   1  384 397
##   2  382 335
##   3   68 540
##   4  323 284
##   5  348 163
##   6  321 173
##   7  280 182
##   8   86 343
##   9  138 263
##   10  83 313
##   11 193 197
##   12 183 177
##   13 157 172
##   14 165 125
##   15  96 194
##   16  66 198
##   17  48 189
##   18  50  73
##   19  45  65
##   20  45  45
##   21  38  48
##   22  11  14
# Save tables
write.table(prop.table(table(Idents(allsamplesgood), allsamplesgood$orig.ident)), file=paste(outputpath,"LCMV-cluster-proportions.txt",sep=""), quote=F, sep = "\t")
write.table(table(Idents(allsamplesgood), allsamplesgood$orig.ident), file=paste(outputpath,"LCMV-cluster-counts.txt",sep=""), quote=F, sep = "\t")

# Run permutation test
prop_test <- sc_utils(allsamplesgood)
prop_test <- permutation_test(
  prop_test, cluster_identity = "RNA_snn_res.1.8",
  sample_1 = "WT", sample_2 = "KO",
  sample_identity = "orig.ident",
  n_permutations = 10000
)

plot_data=prop_test@results$permutation
plot_data
## Key: <clusters>
##     clusters          KO          WT  obs_log2FD       pval          FDR
##       <char>       <num>       <num>       <num>      <num>        <num>
##  1:        0 0.150121065 0.052742616  1.50908540 0.00009999 0.0001916475
##  2:        1 0.092978208 0.083755274  0.15071258 0.06889311 0.0990338466
##  3:       10 0.020096852 0.066033755 -1.71623414 0.00009999 0.0001916475
##  4:       11 0.046731235 0.041561181  0.16915050 0.13178682 0.1682463333
##  5:       12 0.044309927 0.037341772  0.24683957 0.05669433 0.0869313069
##  6:       13 0.038014528 0.036286920  0.06710127 0.35506449 0.3837479888
##  7:       14 0.039951574 0.026371308  0.59928321 0.00019998 0.0003538108
##  8:       15 0.023244552 0.040928270 -0.81620506 0.00009999 0.0001916475
##  9:       16 0.015980630 0.041772152 -1.38621722 0.00009999 0.0001916475
## 10:       17 0.011622276 0.039873418 -1.77853465 0.00009999 0.0001916475
## 11:       18 0.012106538 0.015400844 -0.34722309 0.10878912 0.1471852815
## 12:       19 0.010895884 0.013713080 -0.33176944 0.13898610 0.1682463333
## 13:        2 0.092493947 0.070675105  0.38815682 0.00009999 0.0001916475
## 14:       20 0.010895884 0.009493671  0.19874528 0.29197080 0.3357664234
## 15:       21 0.009200969 0.010126582 -0.13828971 0.36706329 0.3837479888
## 16:       22 0.002663438 0.002953586 -0.14917803 0.48575142 0.4857514249
## 17:        3 0.016464891 0.113924051 -2.79060748 0.00009999 0.0001916475
## 18:        4 0.078208232 0.059915612  0.38438851 0.00079992 0.0013141543
## 19:        5 0.084261501 0.034388186  1.29296062 0.00009999 0.0001916475
## 20:        6 0.077723971 0.036497890  1.09054654 0.00009999 0.0001916475
## 21:        7 0.067796610 0.038396624  0.82023365 0.00009999 0.0001916475
## 22:        8 0.020823245 0.072362869 -1.79705473 0.00009999 0.0001916475
## 23:        9 0.033414044 0.055485232 -0.73164926 0.00009999 0.0001916475
##     clusters          KO          WT  obs_log2FD       pval          FDR
##     boot_mean_log2FD boot_CI_2.5 boot_CI_97.5
##                <num>       <num>        <num>
##  1:       1.51103465  1.30797435    1.7161984
##  2:       0.14969813 -0.04509175    0.3420585
##  3:      -1.72064882 -2.08267159   -1.3814468
##  4:       0.17179183 -0.11030856    0.4562415
##  5:       0.24698499 -0.04873812    0.5328703
##  6:       0.06581051 -0.23421413    0.3749108
##  7:       0.60069610  0.27622403    0.9357109
##  8:      -0.81891276 -1.17281359   -0.4828932
##  9:      -1.39347477 -1.80845770   -1.0045160
## 10:      -1.79368552 -2.29001785   -1.3532399
## 11:      -0.34870706 -0.89439458    0.1608779
## 12:      -0.34021746 -0.90079040    0.1987453
## 13:       0.38886851  0.18289023    0.5908455
## 14:       0.20022766 -0.39989216    0.8075856
## 15:      -0.14388708 -0.77700718    0.4811450
## 16:      -0.17907950 -1.47932663    1.0061002
## 17:      -2.79899643 -3.18025417   -2.4614047
## 18:       0.38603790  0.16373578    0.6082658
## 19:       1.29603243  1.03802497    1.5689380
## 20:       1.08912592  0.82759434    1.3532229
## 21:       0.82212486  0.55961325    1.0918472
## 22:      -1.80488041 -2.14879346   -1.4739392
## 23:      -0.73377647 -1.03721817   -0.4481318
##     boot_mean_log2FD boot_CI_2.5 boot_CI_97.5
FDR_threshold = 0.05
log2FD_threshold = log2(1.5)
order_clusters = TRUE

plot_data[, significance := ifelse(
  FDR < FDR_threshold & abs(obs_log2FD) > log2FD_threshold,
  paste("FDR <", FDR_threshold, "& abs(Log2FD) >", round(log2FD_threshold, 2)),
  "n.s."
)]

plot_data[, significance := factor(significance, levels = c(
  paste("FDR <", FDR_threshold, "& abs(Log2FD) >", round(log2FD_threshold, 2)),
  "n.s."
))]

#library(tidyverse)
## Order the clusters by observed log2FD if requested.
if (order_clusters) {
  plot_data[, clusters := fct_reorder(factor(clusters), dplyr::desc(obs_log2FD))]
}

## Plot the results.
p <- ggplot(plot_data, aes(x = clusters, y = obs_log2FD)) +
  geom_pointrange(aes(ymin = boot_CI_2.5, ymax = boot_CI_97.5, color = significance)) +
  theme_bw() +
  geom_hline(yintercept = log2FD_threshold, lty = 2) +
  geom_hline(yintercept = -log2FD_threshold, lty = 2) +
  geom_hline(yintercept = 0) +
  scale_color_manual(values = c("salmon", "grey")) +
  coord_flip()

p

ggsave(paste(outputpath,"LCMV-cluster_proportion_test_labelled.pdf",sep=""), plot=p)

9. Pathway Enrichment Analysis

Here we perform pathway enrichment analysis for CD4+ clusters 3 and 5 that are different between WT vs KO in proportion.

########################### tables

# Current identity levels
levels(allsamplesgood)
##  [1] "0"  "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14"
## [16] "15" "16" "17" "18" "19" "20" "21" "22"
head(Idents(allsamplesgood))
## AAACCCAAGAGTTGTA-WT AAACCCAAGATTTGCC-WT AAACCCAAGTTGTAGA-WT AAACCCACAAGTATCC-WT
##                  17                   1                   9                  19
## AAACCCAGTCCAGGTC-WT AAACCCATCCTGGGAC-WT
##                  16                  14
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# Extract markers for cluster
# Filter the dataframe for cluster 3 and 5 (CD4+)
# Filter the dataframe for cluster 3, p-value <= 0.05, and |avg_log2FC| > 1
cluster3_markers <- markersall[
  markersall$cluster == 3 &
  markersall$p_val <= 0.05 &
  abs(markersall$avg_log2FC) > 3,
]

View(cluster3_markers)

# Extract gene names into a vector called "cluster0_genes"
cluster3_genes <- cluster3_markers$gene

library(enrichR)
# Set connection site
setEnrichrSite("Enrichr")

# Obtain datasets
dbs <- listEnrichrDbs()

# Check for dataset versions
dbs[grep("mouse", dbs$libraryName, ignore.case = TRUE), ]
##     geneCoverage genesPerTerm                                libraryName
## 19         19270          388                           Mouse_Gene_Atlas
## 145         4558           54                    WikiPathways_2019_Mouse
## 148         8551           98                            KEGG_2019_Mouse
## 187        25381          250 RNAseq_Automatic_GEO_Signatures_Mouse_Down
## 188        25409          250   RNAseq_Automatic_GEO_Signatures_Mouse_Up
## 191        30006          815                         HDSigDB_Mouse_2021
## 206         6713           68                KOMP2_Mouse_Phenotypes_2022
## 229         4494           51                    WikiPathways_2024_Mouse
##                                link numTerms
## 19     http://biogps.org/downloads/       96
## 145   https://www.wikipathways.org/      176
## 148            https://www.kegg.jp/      303
## 187 https://maayanlab.cloud/archs4/     4216
## 188 https://maayanlab.cloud/archs4/     4216
## 191         https://www.hdinhd.org/     2579
## 206 https://www.mousephenotype.org/      529
## 229   https://www.wikipathways.org/      188
##                                      appyter categoryId
## 19  31191bfadded5f96983f93b2a113cf2110ff5ddb          5
## 145 e7750958da20f585c8b6d5bc4451a5a4305514ba          7
## 148 187eb44b2d6fa154ebf628eba1f18537f64e797c          7
## 187                                                   8
## 188                                                   8
## 191                                                   4
## 206                                                   3
## 229                                                   2
# Create a list of selected datasets
dbs <- c("KEGG_2019_Mouse")

# Run enrichr
enriched3 <- enrichr(cluster3_genes, dbs)
## Uploading data to Enrichr... Done.
##   Querying KEGG_2019_Mouse... Done.
## Parsing results... Done.
# Filter enrichment results
enriched3$KEGG_2019_Mouse <-enriched3$KEGG_2019_Mouse[enriched3$KEGG_2019_Mouse$P.value <= 0.05, ]

# Top 10 Significantly enriched pathways
head(enriched3$KEGG_2019_Mouse)
##                                        Term Overlap      P.value
## 1 Natural killer cell mediated cytotoxicity  23/118 1.043512e-14
## 2    Cytokine-cytokine receptor interaction  26/292 1.558898e-08
## 3                 Graft-versus-host disease   10/64 3.231583e-06
## 4                                Amoebiasis  12/106 1.094524e-05
## 5   Transcriptional misregulation in cancer  15/183 4.610103e-05
## 6          Inflammatory bowel disease (IBD)    8/59 8.950109e-05
##   Adjusted.P.value Old.P.value Old.Adjusted.P.value Odds.Ratio Combined.Score
## 1     2.316598e-12           0                    0  10.087373      324.74884
## 2     1.730376e-06           0                    0   4.062830       73.03628
## 3     2.391372e-04           0                    0   7.522230       95.10008
## 4     6.074607e-04           0                    0   5.196574       59.35842
## 5     2.046886e-03           0                    0   3.643573       36.37989
## 6     3.183196e-03           0                    0   6.346256       59.15510
##                                                                                                                                                             Genes
## 1                         KLRA4;CSF2;KLRA7;FCER1G;KLRC2;SYK;KLRA1;KLRA3;GZMB;KLRA8;KLRA9;NCR1;KLRB1C;TYROBP;KLRK1;FASL;IFNG;PLCG2;RAC3;CD244A;KLRD1;SH2D1B1;KLRC1
## 2 CSF2;TNFSF13B;IL1RL1;IL18RAP;CCL6;CXCR3;CCL5;CCL4;CCL3;TNFSF11;CCR8;TNFRSF8;CCR5;CCR2;IL12RB2;IL10;TNFSF14;IL15;IL1R2;TNFSF12;TNFRSF9;FASL;IFNG;IL2RB;XCL1;IL9R
## 3                                                                                                       H2-EB1;KLRA7;IFNG;FASL;KLRA1;KLRA3;GZMB;KLRD1;KLRC1;KLRA9
## 4                                                                                 IL10;GNA15;SERPINB1B;CSF2;ITGAM;PLCB4;IFNG;SERPINB6B;IL1R2;SERPINB9B;LAMC1;TLR4
## 5                                                                     CEBPA;MEF2C;CDKN1A;CSF2;ITGAM;HPGD;EYA1;IL1R2;GZMB;RUNX2;HHEX;IL2RB;HIST1H3B;HIST1H3C;CDK14
## 6                                                                                                               IL10;H2-EB1;IL18RAP;IFNG;TBX21;FOXP3;TLR4;IL12RB2
# Filter the dataframe for cluster 5, p-value <= 0.05, and |avg_log2FC| > 1
cluster5_markers <- markersall[
  markersall$cluster == 5 &
  markersall$p_val <= 0.05 &
  abs(markersall$avg_log2FC) > 3,
]

View(cluster5_markers)

# Extract gene names into a vector called "cluster0_genes"
cluster5_genes <- cluster5_markers$gene

library(enrichR)
# Set connection site
setEnrichrSite("Enrichr")

# Obtain datasets
dbs <- listEnrichrDbs()

# Check for dataset versions
dbs[grep("mouse", dbs$libraryName, ignore.case = TRUE), ]
##     geneCoverage genesPerTerm                                libraryName
## 19         19270          388                           Mouse_Gene_Atlas
## 145         4558           54                    WikiPathways_2019_Mouse
## 148         8551           98                            KEGG_2019_Mouse
## 187        25381          250 RNAseq_Automatic_GEO_Signatures_Mouse_Down
## 188        25409          250   RNAseq_Automatic_GEO_Signatures_Mouse_Up
## 191        30006          815                         HDSigDB_Mouse_2021
## 206         6713           68                KOMP2_Mouse_Phenotypes_2022
## 229         4494           51                    WikiPathways_2024_Mouse
##                                link numTerms
## 19     http://biogps.org/downloads/       96
## 145   https://www.wikipathways.org/      176
## 148            https://www.kegg.jp/      303
## 187 https://maayanlab.cloud/archs4/     4216
## 188 https://maayanlab.cloud/archs4/     4216
## 191         https://www.hdinhd.org/     2579
## 206 https://www.mousephenotype.org/      529
## 229   https://www.wikipathways.org/      188
##                                      appyter categoryId
## 19  31191bfadded5f96983f93b2a113cf2110ff5ddb          5
## 145 e7750958da20f585c8b6d5bc4451a5a4305514ba          7
## 148 187eb44b2d6fa154ebf628eba1f18537f64e797c          7
## 187                                                   8
## 188                                                   8
## 191                                                   4
## 206                                                   3
## 229                                                   2
# Create a list of selected datasets
dbs <- c("KEGG_2019_Mouse")

# Run enrichr
enriched5 <- enrichr(cluster5_genes, dbs)
## Uploading data to Enrichr... Done.
##   Querying KEGG_2019_Mouse... Done.
## Parsing results... Done.
# Filter enrichment results
enriched5$KEGG_2019_Mouse <-enriched5$KEGG_2019_Mouse[enriched5$KEGG_2019_Mouse$P.value <= 0.05, ]

# Top 10 Significantly enriched pathways
head(enriched5$KEGG_2019_Mouse)
##                                        Term Overlap      P.value
## 1 Natural killer cell mediated cytotoxicity  22/118 3.572944e-16
## 2                 Graft-versus-host disease    9/64 2.643926e-06
## 3                Hematopoietic cell lineage    9/94 6.267533e-05
## 4              Systemic lupus erythematosus  11/143 7.503121e-05
## 5                                Cell cycle   9/123 4.848522e-04
## 6   Transcriptional misregulation in cancer  11/183 6.408525e-04
##   Adjusted.P.value Old.P.value Old.Adjusted.P.value Odds.Ratio Combined.Score
## 1     7.717560e-14           0                    0  12.826110      456.19873
## 2     2.855440e-04           0                    0   8.848117      113.63853
## 3     4.051685e-03           0                    0   5.716477       55.32145
## 4     4.051685e-03           0                    0   4.513194       42.86454
## 5     2.094562e-02           0                    0   4.255961       32.48008
## 6     2.307069e-02           0                    0   3.456508       25.41471
##                                                                                                                                Genes
## 1 KLRA4;CSF2;KLRA7;FCER1G;KLRC2;SYK;KLRA1;KLRA3;PRF1;GZMB;KLRA8;KLRA9;NCR1;KLRB1C;TYROBP;KLRK1;PLCG2;RAC3;CD244A;KLRD1;SH2D1B1;KLRC1
## 2                                                                              KLRA7;KLRA1;H2-DMB1;KLRA3;PRF1;GZMB;KLRD1;KLRC1;KLRA9
## 3                                                                                CSF2;ITGAM;CD8A;ITGA2;H2-DMB1;ITGA1;CD7;CD8B1;ITGA5
## 4                                 HIST1H2BJ;H2-DMB1;HIST1H2AE;HIST1H3G;HIST1H2BH;HIST1H3B;HIST1H2AP;HIST1H3C;HIST1H3D;HIST1H2AB;C1QC
## 5                                                                                   CDC20;CCNA2;CCNB2;CDKN2B;PLK1;E2F1;TTK;CDC6;BUB1
## 6                                                         HHEX;CSF2;ITGAM;EYA1;ZBTB16;HIST1H3G;GZMB;HIST1H3B;HIST1H3C;CDK14;HIST1H3D

10. Save

#save.image(file=paste(outputpath,"LCMV.RData",sep=""))
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.7
##
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base
##
## other attached packages:
##  [1] enrichR_3.2                 jsonlite_1.8.8
##  [3] httr_1.4.7                  SCassist_0.1.0
##  [5] rollama_0.1.0               scProportionTest_0.0.0.9000
##  [7] cowplot_1.1.3               lubridate_1.9.3
##  [9] forcats_1.0.0               stringr_1.5.1
## [11] dplyr_1.1.4                 purrr_1.0.2
## [13] readr_2.1.5                 tidyr_1.3.1
## [15] tibble_3.2.1                tidyverse_2.0.0
## [17] plotly_4.10.4               ggplot2_3.5.1
## [19] Seurat_5.0.0                SeuratObject_5.0.2
## [21] sp_2.1-4
##
## loaded via a namespace (and not attached):
##   [1] fs_1.6.4                matrixStats_1.3.0       spatstat.sparse_3.1-0
##   [4] enrichplot_1.24.4       RColorBrewer_1.1-3      tools_4.4.1
##   [7] sctransform_0.4.1       utf8_1.2.4              R6_2.5.1
##  [10] lazyeval_0.2.2          uwot_0.2.2              withr_3.0.0
##  [13] gridExtra_2.3           progressr_0.14.0        textshaping_0.4.0
##  [16] cli_3.6.3               Biobase_2.64.0          spatstat.explore_3.3-2
##  [19] fastDummies_1.7.4       scatterpie_0.2.4        labeling_0.4.3
##  [22] sass_0.4.9              spatstat.data_3.1-2     ggridges_0.5.6
##  [25] pbapply_1.7-2           systemfonts_1.1.0       yulab.utils_0.1.7
##  [28] gson_0.1.0              DOSE_3.30.5             R.utils_2.12.3
##  [31] WriteXLS_6.6.0          parallelly_1.38.0       rstudioapi_0.16.0
##  [34] RSQLite_2.3.7           visNetwork_2.1.2        generics_0.1.3
##  [37] gridGraphics_0.5-1      ica_1.0-3               spatstat.random_3.3-2
##  [40] GO.db_3.19.1            Matrix_1.7-0            ggbeeswarm_0.7.2
##  [43] fansi_1.0.6             S4Vectors_0.42.1        abind_1.4-5
##  [46] R.methodsS3_1.8.2       lifecycle_1.0.4         yaml_2.3.9
##  [49] qvalue_2.36.0           Rtsne_0.17              grid_4.4.1
##  [52] blob_1.2.4              promises_1.3.0          crayon_1.5.3
##  [55] miniUI_0.1.1.1          lattice_0.22-6          KEGGREST_1.44.1
##  [58] pillar_1.9.0            knitr_1.48              fgsea_1.30.0
##  [61] rjson_0.2.21            future.apply_1.11.2     codetools_0.2-20
##  [64] fastmatch_1.1-4         leiden_0.4.3.1          glue_1.7.0
##  [67] ggfun_0.1.5             spatstat.univar_3.0-1   data.table_1.15.4
##  [70] vctrs_0.6.5             png_0.1-8               treeio_1.28.0
##  [73] spam_2.10-0             gtable_0.3.5            cachem_1.1.0
##  [76] xfun_0.45               mime_0.12               tidygraph_1.3.1
##  [79] survival_3.6-4          fitdistrplus_1.2-1      ROCR_1.0-11
##  [82] nlme_3.1-164            ggtree_3.12.0           bit64_4.0.5
##  [85] RcppAnnoy_0.0.22        GenomeInfoDb_1.40.1     bslib_0.7.0
##  [88] irlba_2.3.5.1           vipor_0.4.7             KernSmooth_2.23-24
##  [91] colorspace_2.1-0        BiocGenerics_0.50.0     DBI_1.2.3
##  [94] ggrastr_1.0.2           tidyselect_1.2.1        curl_5.2.1
##  [97] bit_4.0.5               compiler_4.4.1          httr2_1.0.1
## [100] hdf5r_1.3.11            shadowtext_0.1.4        scales_1.3.0
## [103] lmtest_0.9-40           rappdirs_0.3.3          digest_0.6.36
## [106] goftest_1.2-3           spatstat.utils_3.1-0    rmarkdown_2.27
## [109] XVector_0.44.0          htmltools_0.5.8.1       pkgconfig_2.0.3
## [112] highr_0.11              fastmap_1.2.0           rlang_1.1.4
## [115] htmlwidgets_1.6.4       UCSC.utils_1.0.0        shiny_1.8.1.1
## [118] farver_2.1.2            jquerylib_0.1.4         zoo_1.8-12
## [121] BiocParallel_1.38.0     GOSemSim_2.30.2         R.oo_1.26.0
## [124] magrittr_2.0.3          GenomeInfoDbData_1.2.12 ggplotify_0.1.2
## [127] dotCall64_1.1-1         patchwork_1.2.0         munsell_0.5.1
## [130] Rcpp_1.0.12             ape_5.8                 viridis_0.6.5
## [133] reticulate_1.39.0       stringi_1.8.4           ggraph_2.2.1
## [136] zlibbioc_1.50.0         MASS_7.3-60.2           plyr_1.8.9
## [139] org.Hs.eg.db_3.19.1     parallel_4.4.1          listenv_0.9.1
## [142] ggrepel_0.9.5           deldir_2.0-4            Biostrings_2.72.1
## [145] graphlayouts_1.2.0      splines_4.4.1           tensor_1.5
## [148] hms_1.1.3               igraph_2.0.3            spatstat.geom_3.3-3
## [151] RcppHNSW_0.6.0          reshape2_1.4.4          stats4_4.4.1
## [154] evaluate_0.24.0         BiocManager_1.30.23     tzdb_0.4.0
## [157] tweenr_2.0.3            httpuv_1.6.15           RANN_2.6.2
## [160] polyclip_1.10-6         future_1.34.0           scattermore_1.2
## [163] ggforce_0.4.2           xtable_1.8-4            RSpectra_0.16-1
## [166] tidytree_0.4.6          later_1.3.2             viridisLite_0.4.2
## [169] ragg_1.3.2              clusterProfiler_4.12.6  aplot_0.2.3
## [172] memoise_2.0.1           beeswarm_0.4.0          AnnotationDbi_1.66.0
## [175] IRanges_2.38.1          cluster_2.1.6           timechange_0.3.0
## [178] globals_0.16.3