We plot the expression of the 1000 most variably expressed genes before and after correction as heatmaps below to verify that batch effects have indeed been corrected.
heatmap_clean_vst_most_var <-
plot_heatmap(most_var(salmon$clean$vst, ntop), colours)
heatmap_clean_cvst_most_var <-
plot_heatmap(most_var(salmon$clean$cvst, ntop), colours)
ebv_genes_idx <- rownames(salmon$clean$cvst) %in% ebv_genes
heatmap_salmon_no_mz_ebv <-
salmon$clean$cvst[ebv_genes_idx,] %>%
plot_heatmap(colours, scale = "none", cutree_cols = 2,
clustering_distance_cols = "euclidean")
measured_genes <- rownames(salmon$raw$counts)
ebvpos_patients <- rownames(annotations)[annotations$ebv_type != "None"]
latency_annot <-
latency_genes %>%
mutate_at(vars(-gene), function(x) ifelse(x == 0, "Absent", "Present")) %>%
as.data.frame() %>%
column_to_rownames("gene")
latency_colours <-
names(latency_annot) %>%
set_names() %>%
map(~ c(Absent = "#bfbfbf", Present = "#666666"))
latency_genes %>%
filter(gene %in% measured_genes) %$%
salmon$raw$counts[gene, ebvpos_patients] %>%
{log10(1 + .)} %>%
plot_heatmap(c(colours, latency_colours),
metadata = annotations[, -grep("lib_date", names(annotations))],
annotation_row = rev(latency_annot), scale = "none",
cluster_rows = FALSE, clustering_distance_cols = "correlation")
salmon$clean$cor <- cor(assay(salmon$clean$cvst))
corplot_salmon_clean <- plot_heatmap(
salmon$clean$cor, colours, colData(salmon$clean$dds), scale = "none",
treeheight_row = 0)
salmon$clean$pca <- calc_pca(salmon$clean$cvst)
pca_salmon_clean_plots <-
names(colData(salmon$clean$dds)) %>%
stringr::str_subset("^(?!SV)") %>%
map(~plot_pca(salmon$clean$pca, .x, colours, c(1,1)))
screeplot_salmon <-
salmon$clean$pca$percent_var %>%
slice(1:10) %>%
ggplot(aes(x = pc, y = percent_var, group = "all")) +
geom_point() +
geom_line() +
labs(title = "PCA Scree Plot")
pca_salmon_clean_plots <- c(pca_salmon_clean_plots, list(screeplot_salmon))
gridExtra::grid.arrange(grobs = pca_salmon_clean_plots, ncol = 3)
heatmap_salmon_clean_mbl <-
plot_heatmap(salmon$clean$cvst[genes$mbl, ], colours)
heatmap_salmon_clean_morgan <-
plot_heatmap(salmon$clean$cvst[genes$morgan, ], colours)
heatmap_salmon_clean_wright <-
plot_heatmap(salmon$clean$cvst[genes$wright, ], colours, border_color = NA)
heatmap_salmon_clean_malaria <-
plot_heatmap(salmon$clean$cvst[genes$malaria, ], colours, border_color = NA)
for (var in names(salmon$clean$de)) {
plot_heatmap(salmon$clean$cvst[get_sig_genes(salmon$clean$de[[var]]$lfc_0),] %>% most_var(1000),
colours, main = var)
}
for (var in names(salmon$muts$de)) {
plot_heatmap(salmon$clean$cvst[get_sig_genes(salmon$muts$de[[var]]$lfc_0),] %>% most_var(1000),
colours, main = var)
}