We plot the expression of the 1000 most variably expressed genes before and after correction as heatmaps below to verify that batch effects have indeed been corrected.
heatmap_clean_vst_most_var <-
plot_heatmap(most_var(salmon$clean$vst, ntop), colours)
heatmap_clean_cvst_most_var <-
plot_heatmap(most_var(salmon$clean$cvst, ntop), colours)
salmon$clean$cor <- cor(assay(salmon$clean$cvst))
corplot_salmon_clean <- plot_heatmap(
salmon$clean$cor, colours, colData(salmon$clean$dds), scale = "none",
treeheight_row = 0)
salmon$clean$pca <- calc_pca(salmon$clean$cvst)
pca_salmon_clean_plots <-
names(colData(salmon$clean$dds)) %>%
stringr::str_subset("^(?!SV)") %>%
map(~plot_pca(salmon$clean$pca, .x, colours, c(1,1)))
screeplot_salmon <-
salmon$clean$pca$percent_var %>%
slice(1:10) %>%
ggplot(aes(x = pc, y = percent_var, group = "all")) +
geom_point() +
geom_line() +
labs(title = "PCA Scree Plot")
pca_salmon_clean_plots <- c(pca_salmon_clean_plots, list(screeplot_salmon))
gridExtra::grid.arrange(grobs = pca_salmon_clean_plots, ncol = 3)
heatmap_salmon_clean_mbl <-
plot_heatmap(salmon$clean$cvst[genes$mbl, ], colours)
heatmap_salmon_clean_morgan <-
plot_heatmap(salmon$clean$cvst[genes$morgan, ], colours)
heatmap_salmon_clean_wright <-
plot_heatmap(salmon$clean$cvst[genes$wright, ], colours, border_color = NA)
heatmap_salmon_clean_malaria <-
plot_heatmap(salmon$clean$cvst[genes$malaria, ], colours, border_color = NA)
heatmap_salmon_clean_ccp <-
ConsensusClusterPlus::ConsensusClusterPlus(
assay(salmon$clean$cvst), maxK = 5, pItem = 0.8,
pFeature = 0.8, seed = rng_seed)