-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path3_IntegrationAndClustering.R
More file actions
178 lines (159 loc) · 8.25 KB
/
3_IntegrationAndClustering.R
File metadata and controls
178 lines (159 loc) · 8.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
library(Seurat)
library(ggplot2)
library(patchwork)
library(dplyr)
# here we don't use the filtered data after QC for demonstration
seurat_obj_A1 <- readRDS("./data/seurat_obj_A1.rds")
seurat_obj_B1 <- readRDS("./data/seurat_obj_B1.rds")
# sctransform respectively
seurat_obj_A1 <- SCTransform(seurat_obj_A1, assay = "Spatial", verbose = FALSE)
seurat_obj_B1 <- SCTransform(seurat_obj_B1, assay = "Spatial", verbose = FALSE)
# Integrate the two spatial datasets
spatial_list <- list(seurat_obj_A1, seurat_obj_B1)
features <- SelectIntegrationFeatures(object.list = spatial_list, nfeatures = 3000)
# Preprocessing for SCT integration
# reference: https://github.com/satijalab/seurat/issues/8216
spatial_list[[1]][["RNA"]] <-spatial_list[[1]][["Spatial"]]
spatial_list[[2]][["RNA"]] <-spatial_list[[2]][["Spatial"]]
# add sample information
spatial_list[[1]]$sample <- "A1_colon_d0"
spatial_list[[2]]$sample <- "B1_colon_d14"
spatial_list <- PrepSCTIntegration(object.list = spatial_list, anchor.features = features)
# Using anchors to integrate data
anchors <- FindIntegrationAnchors(
object.list = spatial_list,
normalization.method = "SCT",
anchor.features = features
)
# create the integrated data
seurat_combined <- IntegrateData(anchorset = anchors, normalization.method = "SCT")
# Run the standard workflow for visualization and clustering
seurat_combined <- RunPCA(seurat_combined, verbose = FALSE)
# Choose the best number of pc to use
pct <- seurat_combined[["pca"]]@stdev / sum(seurat_combined[["pca"]]@stdev) * 100
cumu <- cumsum(pct)
pc.use <- min(which(cumu > 90 & pct < 5)[1], sort(which((pct[1:length(pct) - 1] - pct[2:length(pct)]) > 0.1), decreasing = TRUE)[1] + 1)
p_elbow <- ElbowPlot(seurat_combined, ndims = pc.use+5)$data %>% ggplot() +
geom_point(aes(x = dims, y = stdev)) +
geom_vline(xintercept = pc.use, color = "darkred") +
theme_bw() + labs(title = "Elbow plot: quantitative approach")
p_elbow
ggsave("./results/integrated_elbow_plot.tiff", p_elbow, width = 6, height = 4, dpi = 300)
seurat_combined <- RunUMAP(seurat_combined, dims = 1:pc.use)
seurat_combined <- FindNeighbors(seurat_combined, dims = 1:pc.use)
seurat_combined <- FindClusters(seurat_combined, resolution = 0.5)
DimPlot(seurat_combined, reduction = "umap", label = TRUE, pt.size = 1) + NoLegend()
p <- DimPlot(seurat_combined, group.by = "sample", label = FALSE)
ggsave("./results/umap_integrated_by_sample.tiff", p, width = 8, height = 6)
p <- DimPlot(seurat_combined, group.by = "seurat_clusters", label = TRUE)
ggsave("./results/umap_integrated_by_cluster.tiff", p, width = 8, height = 6)
DimPlot(seurat_combined, group.by = "seurat_clusters", label = TRUE)
# Save the integrated data
saveRDS(seurat_combined, file = "./data/seurat_integrated_spatial.rds")
# visualization the clustering result on the spatial location
p <- Seurat::SpatialDimPlot(seurat_combined,
pt.size.factor = 3,
image.scale = "hires", # important when using high resolution image
group.by = "seurat_clusters"
)
ggsave("./results/spatial_cluster_integrated.tiff", p, width = 8, height = 6)
# Find markers for each cluster
cluster_markers <- FindAllMarkers(seurat_combined, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.csv(cluster_markers, file = "./results/integrated_cluster_markers.csv")
top10 <- cluster_markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
p <- DoHeatmap(seurat_combined, features = top10$gene) + NoLegend()
ggsave("./results/integrated_cluster_markers_heatmap.tiff", p, width = 18, height = 18)
# DEG and Vocalno Plot between A1_colon_d0 and B1_colon_d14
de_genes <- FindMarkers(
seurat_combined,
ident.1 = "B1_colon_d14",
ident.2 = "A1_colon_d0", #control group
group.by = "sample",
min.pct = 0.1,
logfc.threshold = 0.25
)
write.csv(de_genes, file = "./results/integrated_de_genes_A1_vs_B1.csv")
de_genes$sig <- ifelse(de_genes$avg_log2FC>0, "Up", "Down")
de_genes$sig <- ifelse(de_genes$p_val_adj < 0.05 & abs(de_genes$avg_log2FC) >= 1, de_genes$sig , "no")
ggplot(de_genes, aes(x = avg_log2FC, y = -log10(p_val_adj), color=sig)) +
geom_point(alpha = 1) +
xlab("Average Log2 Fold Change") +
ylab("-Log10 Adjusted P-value") +
ggtitle("Volcano Plot: A1_colon_d0 vs B1_colon_d14") +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black")+
scale_color_manual(values = list(
"Up" = "red",
"Down" = "blue",
"no" = "grey"
)) +
ggprism::theme_prism()
ggsave("./results/volcano_plot_A1_vs_B1.tiff", width = 6, height = 3, dpi = 300)
#optional: visualize some gene expression
p <- SpatialFeaturePlot(seurat_combined, rownames(de_genes)[1:2], image.scale = "hires", pt.size.factor = 2)
ggsave("./results/spatial_feature_de_genes.tiff", p, width = 10, height = 6, dpi=300)
# Spatially Variable Features Detection -----------------------------
# Identify spatially variable features
# attention here to run FindSpatiallyVariableFeatures on each sample separately
seurat_obj_A1 <- FindSpatiallyVariableFeatures(
seurat_obj_A1,
assay = "SCT",
features = VariableFeatures(seurat_combined)[1:1000],
selection.method = "moransi", # faster than markvariogram, install.packages('Rfast2') for faster computation
verbose = TRUE
)
top_sv_genes <- head(SpatiallyVariableFeatures(seurat_obj_A1, selection.method = "moransi"), 2)
# visualization of spatially variable features
p <- SpatialFeaturePlot(seurat_obj_A1, features = top_sv_genes, ncol = 2, image.scale = "hires", pt.size.factor = 2.5)
ggsave("./results/spatially_variable_genes_A1.tiff", p, width = 10, height = 6, dpi=300)
seurat_obj_B1 <- FindSpatiallyVariableFeatures(
seurat_obj_B1,
assay = "SCT",
features = VariableFeatures(seurat_combined)[1:1000],
selection.method = "moransi", # faster than markvariogram, install.packages('Rfast2') for faster computation
verbose = TRUE
)
top_sv_genes <- head(SpatiallyVariableFeatures(seurat_obj_B1, selection.method = "moransi"), 2)
# visualization of spatially variable features
p <- SpatialFeaturePlot(seurat_obj_B1, features = top_sv_genes, ncol = 2, image.scale = "hires", pt.size.factor = 2)
ggsave("./results/spatially_variable_genes_B1.tiff", p, width = 10, height = 6, dpi=300)
# -------------------------------------------------------------------
# Validation of Clustering via Colon Histology Layer-Specific Markers
# (Generating Figure 2H for the manuscript)
# -------------------------------------------------------------------
# Switch default assay back to "SCT" to access the full transcriptome
# (The 'integrated' assay only contains the highly variable integration features)
DefaultAssay(seurat_combined) <- "SCT"
# Define canonical layer-specific markers for colon tissue
colon_markers <- c(
"Epcam", "Krt8", "Muc2", # Mucosal Epithelium & Goblet Cells (黏膜上皮/杯状细胞)
"Col1a1", "Col1a2", "Vim", # Lamina Propria & Submucosa (固有层/黏膜下层间质)
"Acta2", "Tagln", "Myh11" # Muscularis Propria (平滑肌层)
)
# Safely subset to markers actually present in the data matrix to avoid plot errors
features_to_plot <- intersect(colon_markers, rownames(seurat_combined))
# 1. DotPlot: Quantitative expression abundance across computational clusters
p_dot <- DotPlot(seurat_combined, features = features_to_plot, group.by = "seurat_clusters") +
theme_classic() +
coord_flip() + # Flip axes: genes as rows, clusters as columns for better readability
labs(title = "Validation of Colon Histology Layers",
x = "Layer-Specific Marker Genes",
y = "Computational Clusters") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold")
)
ggsave("./results/colon_layer_markers_dotplot.tiff", p_dot, width = 7, height = 6, dpi = 300)
# 2. SpatialFeaturePlot: Spatial mapping of representative layer markers
# Extracting one representative marker per anatomical layer
spatial_markers <- intersect(c("Epcam", "Col1a1", "Acta2"), rownames(seurat_combined))
if(length(spatial_markers) > 0) {
p_spatial_val <- SpatialFeaturePlot(
seurat_combined,
features = spatial_markers,
ncol = 2,
image.scale = "hires",
pt.size.factor = 2.5
) & theme(legend.position = "right")
ggsave("./results/colon_layer_markers_spatial.tiff", p_spatial_val, width = 5, height = 5, dpi = 300)
}