## One strategy is to put some of my examples (with expected output into here) ## - but this is fragile unless I also provide a localized example DB (which I ## probably should do anyways for other reasons, but is not necessarily useful ## here). ## Another strategy is that I should probably have tests for all my helper ## functions to make sure that they are returning what is expected. ## Generally, I want to make tests for each thing that can go wrong due to ## changes elsewhere. The strategy of writing tests for the helpers will ## catch some of this, but also I need to anticipate things that will change ## in the annotations etc. ## library(AnnotationDbi);AnnotationDbi:::.test() require(RSQLite) require(org.Hs.eg.db) require(org.At.tair.db) require(org.Sc.sgd.db) require(GO.db) require(hgu95av2.db) require("RUnit") x <- org.Hs.eg.db t <- org.At.tair.db s <- org.Sc.sgd.db cols <- c("CHR","PFAM","GO") keys <- c(1,10) jointype <- "genes.gene_id" ## changed from 'gene_id' quiet <- suppressWarnings # quieten warnings from 1:many mappings in select() ## resort and friends are really important as they are generic enough to ## be reused elsewhere. test_generateExtraRows <- function(){ ttab = data.frame(warpbreaks[1:10,]) ttab$breaks <- as.character(ttab$breaks) tkeys = ttab$breaks tkeys = c(26, tkeys[1:7], tkeys[7], 30, tkeys[8:10], tkeys[10]) res <- AnnotationDbi:::.generateExtraRows(ttab, tkeys, jointype='breaks') expLen <- sum(table(tkeys) * table(ttab$breaks)) checkTrue(expLen == dim(res)[1]) } test_dropUnwantedRows <- function() { fun <- AnnotationDbi:::.dropUnwantedRows ## no duplicates, no changes keys <- letters[1:5] tab <- data.frame(x=keys, y=LETTERS[1:5], z=LETTERS[5:1], row.names=NULL) checkIdentical(tab, fun(tab, keys, "x")) ## duplicate jointype element, duplicate dropped tab1 <- tab[c(1:5, 3L),] rownames(tab1) <- NULL checkIdentical(tab, fun(tab1, keys, "x")) ## unique all NA (other than jointype column) _retained_ tab1 <- tab tab1[3, 2:3] <- NA rownames(tab1) <- NULL checkIdentical(tab1, fun(tab1, keys, "x")) ## duplicate all NA, made unique tab1 <- tab tab1[3, 2:3] <- NA tab2 <- tab1[c(1:5, 3),] checkIdentical(tab1, fun(tab2, keys, "x")) ## duplicate key, dropped keys1 <- keys[c(1:5, 3)] checkIdentical(tab, fun(tab, keys1, "x")) } test_resort <- function() { fun <- resort_base ## from AnnotationDbi ## repeat keys returned keys <- letters[1:5] tab <- data.frame(x=keys, y=LETTERS[1:5], z=LETTERS[5:1], row.names=NULL, stringsAsFactors=FALSE) keys1 <- keys[c(1:5, 1)] tab1 <- tab[c(1:5, 1),] rownames(tab1) <- NULL checkIdentical(tab1, fun(tab, keys1, "x", names(tab))) ## keys with missing values returned tab1 <- tab tab1[3, 2:3] <- NA keys1 <- tab1[["x"]] checkIdentical(tab1, fun(tab1, keys, "x", names(tab))) ## multiple keys with missing values returned tab1 <- tab[c(3,4,3,4),] tab1[c(1,3), 2:3] <- NA keys1 <- keys[c(3,4,3,4)] rownames(tab1) <- NULL checkIdentical(tab1, fun(tab1[1:2,], keys1, "x", names(tab))) cols <- c("CHR","SYMBOL", "PFAM") keys <- c(1,10) res <- AnnotationDbi:::.extractData(x, cols, keytype="ENTREZID", keys) ## jumble res to simulate trouble resRO = res[order(sort(res$genes.gene_id,decreasing=TRUE)),] reqCols <- c("genes.gene_id","chromosomes.chromosome","gene_info.symbol", "pfam.pfam_id") Rres <- fun(resRO, keys, jointype, reqCols) checkIdentical(Rres$gene_id,Rres$gene_id) checkTrue(class(Rres) =="data.frame") ## now what if we have MORE keys? keys <- c(1, keys, keys) cols <- c("CHR","SYMBOL") res <- AnnotationDbi:::.extractData(x, cols, keytype="ENTREZID", keys) reqCols <- c("genes.gene_id","chromosomes.chromosome","gene_info.symbol") res2 <- fun(res, keys, jointype, reqCols) checkIdentical(as.numeric(as.character(res2$genes.gene_id)),keys) checkTrue(class(res) =="data.frame") } test_keytypes <- function(){ checkTrue("ENTREZID" %in% keytypes(x)) checkTrue("TAIR" %in% keytypes(t)) checkTrue("ENTREZID" %in% keytypes(t)) checkTrue("ORF" %in% keytypes(s)) checkTrue("ENTREZID" %in% keytypes(s)) } test_keys <- function(){ checkException(keys(org.Hs.eg.db, keytype="PROBEID")) egHskeys <- as.numeric(head(keys(x))) checkTrue(length(egHskeys[!is.na(egHskeys)])==6) rsHskeys <- head(keys(x, "REFSEQ")) checkTrue(any(grepl("N", rsHskeys))) egAtkeys <- as.numeric(head(keys(t,"ENTREZID"))) checkTrue(length(egAtkeys[!is.na(egAtkeys)])==6) rsAtkeys <- head(keys(t, "REFSEQ")) checkTrue(any(grepl("N", rsAtkeys))) tairAtkeys <- head(keys(t, "TAIR")) checkTrue(any(grepl("AT", tairAtkeys))) egSckeys <- as.numeric(head(keys(s, "ENTREZID"))) checkTrue(length(egSckeys[!is.na(egSckeys)])==6) rsSckeys <- head(keys(s, "REFSEQ")) checkTrue(any(grepl("N", rsSckeys))) orfSckeys <- head(keys(s, "ORF")) checkTrue(any(grepl("A", orfSckeys))) } test_keys_advancedArgs <- function(){ k1 <- keys(x, keytype="SYMBOL") checkTrue("A1BG" %in% k1) k2 <- keys(x, keytype="SYMBOL", pattern="BRCA") checkTrue("BRCA1" %in% k2) checkTrue(!("A1BG" %in% k2)) checkTrue(length(k2) < length(k1)) l1 <- length(keys(x, keytype="ENTREZID", column="PATH")) l2 <- length(keys(x, keytype="ENTREZID")) checkTrue(l1 < l2) k3 <- keys(x,keytype="ENTREZID",pattern="^MSX",column="SYMBOL") res <- select(x, k3, c("ENTREZID","SYMBOL"), "ENTREZID") checkTrue(any(grep("^MSX",res$SYMBOL))) } ######################################################################### ## These ones are to test out some real use cases... test_select1 <- function(){ keys <- head(keys(hgu95av2.db, "ALIAS"),n=2) cols <- c("SYMBOL","ENTREZID","PROBEID") res <- quiet(select(hgu95av2.db, keys, cols, keytype="ALIAS")) checkIdentical(c(3L, 4L), dim(res)) checkIdentical(c("ALIAS","SYMBOL","ENTREZID","PROBEID"), colnames(res)) } test_select2 <- function(){ keys <- head(keys(org.Hs.eg.db),n=2) cols <- c("PFAM","ENTREZID", "GO") res <- quiet(select(org.Hs.eg.db, keys, cols, keytype="ENTREZID")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==5) checkIdentical(c("ENTREZID","PFAM","GO","EVIDENCE","ONTOLOGY"), colnames(res)) } test_select3 <- function(){ keys <- head(keys(org.Hs.eg.db,keytype="OMIM"),n=4) cols <- c("SYMBOL", "UNIPROT", "PATH") res <- quiet(select(hgu95av2.db, keys, cols, keytype="OMIM")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==4) checkIdentical(c("OMIM","SYMBOL","UNIPROT","PATH"), colnames(res)) } test_select4 <- function(){ keys <- head(keys(org.Hs.eg.db),n=2) cols <- c("ACCNUM","REFSEQ") res <- quiet(select(org.Hs.eg.db, keys, cols)) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==3) checkIdentical(c("ENTREZID","ACCNUM","REFSEQ"), colnames(res)) } test_select5 <- function(){ keys <- head(keys(GO.db), n=4) cols <- c("TERM","ONTOLOGY","DEFINITION") res <- select(GO.db, keys, cols) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==4) checkIdentical(c("GOID","TERM","ONTOLOGY","DEFINITION"), colnames(res)) } test_select6 <- function(){ keys <- head(keys(hgu95av2.db)) cols <- c("SYMBOL","ENTREZID", "GO") ## tests for bad keys: checkException(select(hgu95av2.db, keys, cols, keytype="ENTREZID")) ## also catch bogus keytype arguments checkException(select(hgu95av2.db, keys, cols, keytype="FOO")) checkException(keys(hgu95av2.db, keytype="FOO")) } test_select7 <- function(){ cols <- c("SYMBOL","ENTREZID") ## 1st of all cols should be 1:1 cols keys <- head(keys(org.Hs.eg.db),n=3) keys <- c(1, keys, keys) res <- select(org.Hs.eg.db, keys, cols) checkTrue(class(res) =="data.frame") checkIdentical(keys, as.character(t(res$ENTREZID))) } test_select8 <- function(){ cols <- c("ENTREZID") keys <- head(keys(org.Hs.eg.db),n=3) res <- select(org.Hs.eg.db, keys, cols) checkTrue(class(res) =="data.frame") checkTrue(dim(res)[2] ==1) checkIdentical(as.character(keys), as.character(t(res$ENTREZID))) } test_select9 <- function(){ ## What about when we need to throw away extra cols? uniKeys <- head(keys(org.Hs.eg.db, keytype="UNIPROT")) cols <- c("SYMBOL", "PATH") res <- quiet(select(org.Hs.eg.db, keys=uniKeys, columns=cols, keytype="UNIPROT")) checkTrue(class(res) =="data.frame") checkTrue(dim(res)[2] ==3) checkIdentical(c("UNIPROT","SYMBOL","PATH"), colnames(res)) } test_select10 <- function(){ ## What about when we have to get data from Arabidopsis using various ## keytypes? cols <- c("SYMBOL","CHR") keys <- head(keys(t,"TAIR")) res <- quiet(select(t, keys, cols, keytype="TAIR")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==3) checkIdentical(c("TAIR","SYMBOL","CHR"), colnames(res)) keys <- head(keys(t,"ENTREZID")) res <- quiet(select(t, keys, cols, keytype="ENTREZID")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==3) checkIdentical(c("ENTREZID","SYMBOL","CHR"), colnames(res)) keys=head(keys(t,"REFSEQ")) res <- quiet(select(t, keys, cols , keytype="REFSEQ")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==3) checkIdentical(c("REFSEQ","SYMBOL","CHR"), colnames(res)) } test_select11 <- function(){ ## how about different keytypes for yeast? keys <- head(keys(s, "REFSEQ")) cols <- c("CHR","PFAM") res <- quiet(select(s, keys, cols, keytype="REFSEQ")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==3) checkIdentical(c("REFSEQ","CHR","PFAM"), colnames(res)) keys <- head(keys(s, "ENTREZID")) cols <- c("CHR","PATH") res <- quiet(select(s, keys, cols, keytype="ENTREZID")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==3) checkIdentical(c("ENTREZID","CHR","PATH"), colnames(res)) keys <- head(keys(s, "ORF")) cols <- c("CHR","SGD") res <- select(s, keys, cols, keytype="ORF") checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==3) checkIdentical(c("ORF","CHR","SGD"), colnames(res)) ## And if you flip things the other way cols <- c("SGD","CHR") res <- select(s, keys, cols, keytype="ORF") checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==3) checkIdentical(c("ORF","SGD","CHR"), colnames(res)) ## Martins bug discoveries keys <- keys(s, keytype="GENENAME") checkTrue(length(keys) > 0) checkTrue(is.character(keys)) keys <- keys(s, keytype="CHRLOC") checkTrue(length(keys) > 0) checkTrue(is.character(keys)) res <- select(s, "YAL003W", "GENENAME") checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==3) checkIdentical(c("ORF","SGD","GENENAME"), colnames(res)) ## This works but is slow (therefore it's tested elsewhere) ## res <- select(s, keys="YAL003W", columns(s)) ## Another test to make sure we can join up to ORF properly keys <- keys(s,"ENTREZID") res <- select(s, columns="ORF", keys=keys, keytype="ENTREZID") checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==3) checkIdentical(c("ENTREZID","ORF","SGD"), colnames(res)) } test_select12 <- function(){ ## what happens when we use GO as an ID? keys <- "1" cols <- c("GO","ENTREZID") res <- quiet(select(x, keys, cols, keytype="ENTREZID")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==4) checkIdentical(c("ENTREZID","GO","EVIDENCE","ONTOLOGY"), colnames(res)) keys <- "GO:0000018" cols <- c("GO","ENTREZID") res <- quiet(select(x, keys, cols, keytype="GO")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==4) checkIdentical(c("GO","EVIDENCE","ONTOLOGY","ENTREZID"), colnames(res)) keys <- "GO:0000023" cols <- c("GO","ENTREZID") res <- quiet(select(t, keys, cols, keytype="GO")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==4) checkIdentical(c("GO","EVIDENCE","ONTOLOGY","ENTREZID"), colnames(res)) keys <- "GO:0000023" cols <- c("ENTREZID","TAIR","GO") res <- quiet(select(t, keys, cols, keytype="GO")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==5) checkIdentical(c("GO","EVIDENCE","ONTOLOGY","ENTREZID","TAIR"), colnames(res)) } test_select13 <- function(){ ## what happens with dropping unwanted rows? sym <- "ITGA7" res <- quiet(select(x, sym, "PFAM", keytype="ALIAS")) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==2) ## make sure no NAs are in res$PFAM checkTrue(length(res$PFAM)== length(res$PFAM[!is.na(res$PFAM)])) } test_select14 <- function(){ ## what happens when there are no results AT ALL? (should be all NAs) keys <- c("1001_at","1006_at","1007_s_at") res <- select(hgu95av2.db, keys, "PATH", keytype="PROBEID") checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==2) ## make sure all of res$PATH ARE NAs ## If this part fails it is a warning that the test is no longer valid, ## which would happen if some of these IDs were to be further annotated for ## PATH (unlikely since PATH is basically dead for this repos) checkTrue(length(res$PATH)== length(res$PATH[is.na(res$PATH)])) } test_select15 <- function(){ ## Another bug that seems to happen in post-processing... ## the code that resolves duplicated values is going a bit insane... ## (IOW .replaceValues()) if(!all(.Platform$OS.type == "windows", .Platform$r_arch == "i386")){ res <- select(x, keys="100008586", columns(x)) checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==26) exp <- c("ENTREZID", "ACCNUM", "ALIAS", "ENSEMBL", "ENSEMBLPROT", "ENSEMBLTRANS", "ENZYME", "EVIDENCE", "EVIDENCEALL", "GENENAME", "GENETYPE", "GO", "GOALL", "IPI", "MAP", "OMIM", "ONTOLOGY", "ONTOLOGYALL", "PATH", "PFAM", "PMID", "PROSITE", "REFSEQ", "SYMBOL", "UCSCKG", "UNIPROT") checkIdentical(exp, colnames(res)) } } test_select16 <- function(){ ## What happens if we ask for probes back... ## (and pass in something else as a key) sk = c( 'MAPK3','TIE1' ) res <- select(hgu95av2.db, keys=sk, columns = c("PROBEID"), keytype="SYMBOL") checkTrue(dim(res)[1]>0) checkTrue(dim(res)[2]==2) checkIdentical(c('SYMBOL','PROBEID'), colnames(res)) } ## NA values are now OK for legacySelect (in order to be consistent ## with other select methods which have greater respect for incoming NA values test_select_NAsInNAsOut <- function(){ ## NAs in should result in NAs out. ## Not that we like NAs but just because if possible: we want to ## preserve the geometry of the keys coming in. k=c('1', NA, NA, '10'); res <- select(x, k, 'SYMBOL', 'ENTREZID') checkTrue(dim(res)[1]==4) checkTrue(dim(res)[2]==2) checkIdentical(c('ENTREZID','SYMBOL'), colnames(res)) checkIdentical(k, res$ENTREZID) checkTrue(any(is.na(res$SYMBOL))) } ## Some new messages from AnnotationDbi:::.generateExtraRows() ## where I am calling it internally... ## these tests just make sure the right kinds of messages are being sent... test_select_XtoX <- function(){ k=c('1','1', NA, '10'); res <- tryCatch(select(org.Hs.eg.db, k, 'SYMBOL', 'ENTREZID'), message = function(x){return(x)}) checkTrue(grepl('many:1', res$message)) res <- tryCatch(select(org.Hs.eg.db, '1', 'ALIAS', 'ENTREZID'), message = function(x){return(x)}) checkTrue(grepl('1:many', res$message)) res <- tryCatch(select(org.Hs.eg.db, k, 'ALIAS', 'ENTREZID'), message = function(x){return(x)}) checkTrue(grepl('many:many', res$message)) res <- tryCatch(select(org.Hs.eg.db, c('1','10'), 'SYMBOL', 'ENTREZID'), message = function(x){return(x)}) checkTrue(grepl('1:1', res$message)) } ## TODO: deal with any fallout from having new messages in select() ## This will also cause me to have to silence select() in many places ## to avoid annoying repeats of the test_dbconn_and_dbfile <- function(){ resc <- dbconn(org.Hs.eg.db) m <- dbGetQuery(resc, "SELECT * FROM metadata") checkTrue(dim(m)[2] ==2) checkTrue(dim(m)[1] > 10) resf <- dbfile(org.Hs.eg.db) mf <- dbGetQuery(dbConnect(SQLite(), resf), "SELECT * FROM metadata") checkTrue(all(mf == m)) } ## Fast checking: ## BiocGenerics:::testPackage(pattern="^test_select.*\\.R$")