Genes Associated with Cell Sorting

tumours <- colnames(salmon$clean$counts)[grepl("^BL", colnames(salmon$clean$counts))]
salmon  <- subset_salmon2(salmon, "tumours", "clean", genes = genes$no_mz, 
                          patients = tumours)

colData(salmon$tumours$dds) <- 
  colData(salmon$tumours$dds) %>% 
  as.data.frame() %>% 
  mutate_if(is.factor, droplevels) %>% 
  as("DataFrame")

design(salmon$tumours$dds) <- formula(~ sex + SV1 + SV2 + SV3 + ebv_type + 
                                        clinical_variant + cell_sorting)
salmon$tumours$dds         <- DESeq(salmon$tumours$dds, minReplicatesForReplace = 5)
salmon$tumours$de          <- list()
salmon$tumours$de$cs       <- list()
salmon$tumours$de$cs$lfc_0 <- results(
  salmon$tumours$dds, 
  contrast = list(c("cell_sortingSorted"),
                  c("cell_sortingUnsorted")))

summary(salmon$tumours$de$cs$lfc_0)

out of 36687 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)     : 4422, 12% 
LFC < 0 (down)   : 4342, 12% 
outliers [1]     : 0, 0% 
low counts [2]   : 0, 0% 
(mean count < 1)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
plotMA(salmon$tumours$de$cs$lfc_0, ylim = c(-5, 5))

Model Fitting

clean_genes <- setdiff(genes$no_mz, get_sig_genes(salmon$tumours$de$cs$lfc_0))

design(salmon$clean$dds) <- deseq_design
salmon$clean$dds         <- DESeq(salmon$clean$dds[clean_genes,], 
                                  minReplicatesForReplace = 5)

Tumour vs. Normal

salmon$clean$de          <- list()
salmon$clean$de$tn       <- list()
salmon$clean$de$tn$lfc_0 <- results(
  salmon$clean$dds, 
  contrast = list(c("clinical_variantSporadic", "clinical_variantEndemic"),
                  c("clinical_variantCentroblasts", "clinical_variantCentrocytes")))

summary(salmon$clean$de$tn$lfc_0)

out of 27923 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)     : 9956, 36% 
LFC < 0 (down)   : 8008, 29% 
outliers [1]     : 0, 0% 
low counts [2]   : 0, 0% 
(mean count < 1)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
plotMA(salmon$clean$de$tn$lfc_0, ylim = c(-5, 5))

Endemic vs. Sporadic

salmon$clean$de$cv$lfc_0 <- results(
  salmon$clean$dds, 
  contrast = list(c("clinical_variantEndemic"), 
                  c("clinical_variantSporadic")))

summary(salmon$clean$de$cv$lfc_0)

out of 27923 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)     : 2166, 7.8% 
LFC < 0 (down)   : 2690, 9.6% 
outliers [1]     : 0, 0% 
low counts [2]   : 0, 0% 
(mean count < 1)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
plotMA(salmon$clean$de$cv$lfc_0, ylim = c(-5, 5))

EBV-positive vs. EBV-negative

salmon$clean$de$ebv$lfc_0 <- results(
  salmon$clean$dds, 
  contrast = list(c("ebv_typeType.1", "ebv_typeType.2"), 
                  c("ebv_typeNone")))

summary(salmon$clean$de$ebv$lfc_0)

out of 27923 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)     : 2779, 10% 
LFC < 0 (down)   : 2133, 7.6% 
outliers [1]     : 0, 0% 
low counts [2]   : 542, 1.9% 
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
plotMA(salmon$clean$de$ebv$lfc_0, ylim = c(-5, 5))

EBV Type 1 vs. EBV Type 2

salmon$clean$de$ebvt$lfc_0 <- results(
  salmon$clean$dds, 
  contrast = list(c("ebv_typeType.1"), 
                  c("ebv_typeType.2")))

summary(salmon$clean$de$ebvt$lfc_0)

out of 27923 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)     : 17, 0.061% 
LFC < 0 (down)   : 21, 0.075% 
outliers [1]     : 0, 0% 
low counts [2]   : 542, 1.9% 
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
plotMA(salmon$clean$de$ebvt$lfc_0, ylim = c(-5, 5))

Centroblasts vs. Centrocytes

salmon$clean$de$centro$lfc_0 <- results(
  salmon$clean$dds, 
  contrast = list(c("clinical_variantCentroblasts"), 
                  c("clinical_variantCentrocytes")))

summary(salmon$clean$de$centro$lfc_0)

out of 27923 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)     : 986, 3.5% 
LFC < 0 (down)   : 1336, 4.8% 
outliers [1]     : 0, 0% 
low counts [2]   : 1083, 3.9% 
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
plotMA(salmon$clean$de$centro$lfc_0, ylim = c(-5, 5))

Gene-based Differential Gene Expression

tfs <- c("TFAP4", "DDX3X", "ARID1A", "SMARCA4")
maf_patients <- unique(maf@data$patient)
salmon <- subset_salmon2(salmon, "muts", "clean", patients = maf_patients)

# Add mutation information to colData
colData(salmon$muts$dds) <- 
  maf@data %>% 
  filter(
    is_nonsynonymous(Consequence),
    patient %in% rownames(colData(salmon$muts$dds))) %>% 
  select(patient, Hugo_Symbol) %>% 
  distinct() %>%
  mutate(status = "Mutated") %>% 
  spread(Hugo_Symbol, status, fill = "Unmutated") %>% 
  arrange(match(patient, rownames(colData(salmon$muts$dds)))) %>% 
  select(one_of(tfs)) %>% 
  mutate_all(as.factor) %>% 
  cbind(colData(salmon$muts$dds), .) %>% 
  as.list() %>% 
  map_if(is.factor, droplevels) %>% 
  DataFrame()

muts_design <- paste0("~ sex + SV1 + SV2 + SV3 + ebv_type + clinical_variant + ", 
                      paste(tfs, collapse = " + "))
design(salmon$muts$dds) <- as.formula(muts_design)
salmon$muts$dds <- DESeq(salmon$muts$dds, minReplicatesForReplace = 5)

TFAP4

salmon$muts$de$tfap4$lfc_0 <- results(
  salmon$muts$dds, 
  contrast = list(c("TFAP4Mutated"), 
                  c("TFAP4Unmutated")))

summary(salmon$muts$de$tfap4$lfc_0)
plotMA(salmon$muts$de$tfap4$lfc_0, ylim = c(-5, 5))

DDX3X

salmon$muts$de$ddx3x$lfc_0 <- results(
  salmon$muts$dds, 
  contrast = list(c("DDX3XMutated"), 
                  c("DDX3XUnmutated")))

summary(salmon$muts$de$ddx3x$lfc_0)
plotMA(salmon$muts$de$ddx3x$lfc_0, ylim = c(-5, 5))

SWI/SNF Complex

salmon$muts$de$swisnf$lfc_0 <- results(
  salmon$muts$dds, 
  contrast = list(c("ARID1AMutated", "SMARCA4Mutated"), 
                  c("ARID1AUnmutated", "SMARCA4Unmutated")))

summary(salmon$muts$de$swisnf$lfc_0)
plotMA(salmon$muts$de$swisnf$lfc_0, ylim = c(-5, 5))