#GO enrichment analysis
R/Bioconductorの GOstats packageをもちいてGene Ontology (GO) enrichment analysisをおこなう。
-1*log10(p-value)の棒グラフや、各アノテーションに該当した遺伝子のリストも一緒に取得する。
##準備
以下の2種類の遺伝子リストをEntrez GeneのID番号(=NCBI Gene, LocusLink)で用意する。
- EntrezGeneID_interest.txt : 自分の注目する遺伝子セット
- EntrezGeneID_background.txt : その遺伝子セットをみつけてきたときのおおもとの遺伝子全体のリスト
Gene SymbolなどからBioMartなどでEntrez Gene IDに変換すると楽かも。
##Entrez Geneのリスト読み込み
BGFILE <- "EntrezGeneID_background.txt"
INFILE <- "EntrezGeneID_interest.txt"
geneIds <- as.character(read.table(INFILE,header=FALSE)[,1])
univIds <- as.character(read.table(BGFILE,header=FALSE)[,1])
##閾値などの設定
ANNOTATION <- "org.Mm.eg.db"
ONTOLOGY <- "BP"
PVALUE<-0.01
TOP <- 30
OUTNAME <- "myinterest"
ANNOTATIONはマウスなら"org.Mm.eg.db"
だしヒトなら"org.Hs.eg.db"
ONTOLOGYは"BP"
,"MF"
,"CC"
(それぞれbiological process, molecular function, cellular component)
##ライブラリの読み込み
library("GOstats")
library("GO.db")
library("annotate")
library("org.Mm.eg.db")
##関数を作成
GOHYPERG <- function(geneIds,univIds,ANNOTATION,ONTOLOGY,PVALUE,TOP,OUTNAME) {
param <- new(
"GOHyperGParams",
geneIds=geneIds,
universeGeneIds=univIds,
annotation= ANNOTATION,
ontology=ONTOLOGY
)
GOHypG <- hyperGTest(param)
DETECTGO <- sigCategories(
GOHypG,
pvalue <- PVALUE
)
if (length(DETECTGO)==0) {
RESULT_GOHYP <- data.frame(
GOid="",
pvalue="",
oddratio="",
expected="",
obserbed="",
background="",
numberMappedGenes=paste(geneMappedCount(GOHypG),length(geneIds),sep="/"),
numberMappedBkgGenes=paste(universeMappedCount(GOHypG),length(univIds),sep="/"),
GOterm="-",
observedGenes="-"
)
} else {
RESULT_GOHYP <- data.frame(
GOid=DETECTGO,
pvalue=pvalues(GOHypG)[DETECTGO],
oddratio=oddsRatios(GOHypG)[DETECTGO],
expected=expectedCounts(GOHypG)[DETECTGO],
obserbed=geneCounts(GOHypG)[DETECTGO],
background=universeCounts(GOHypG)[DETECTGO],
numberMappedGenes=paste(geneMappedCount(GOHypG),length(geneIds),sep="/"),
numberMappedBkgGenes=paste(universeMappedCount(GOHypG),length(univIds),sep="/"),
GOterm=sapply(
DETECTGO,
function(goIDs){
sapply(
goIDs,
function(goID){Term(get(goID,GOTERM))}
)
}
),
observedGenes=sapply(
DETECTGO,
function(X){
paste(
sapply(
geneIdsByCategory(GOHypG,catids = NULL)[[X]],
function(XXX){paste(get(XXX,org.Mm.egSYMBOL),"(",XXX,")",sep="")}
),
collapse=","
)
}
)
)
BARHEIGHT <- -1 * log(RESULT_GOHYP[1:TOP,2],10)[TOP:1]
BARNAME <- RESULT_GOHYP$GOterm[TOP:1]
SAVEPDFNAME <- paste("RESULT_GOHYP_",ONTOLOGY,"_",OUTNAME,".pdf",sep="")
pdf(SAVEPDFNAME)
par(oma=c(2,2,2,2),mar=c(6,16,1,1))
barplot(
BARHEIGHT,
width=1,
space=NULL,
horiz=TRUE,
las=1,
main=paste(strsplit(SAVEFILENAME,".txt")[[1]][1]),
cex.main=.7,
sub=paste("(TOP",TOP," GOs)",sep=""),
cex.sub=.7,
names.arg=BARNAME,
cex.names=.6,
xlab="-1 * log10(p-value)",
cex.axis = 1
)
dev.off()
}
SAVEFILENAME <- paste("RESULT_GOHYP_",ONTOLOGY,"_",OUTNAME,".txt",sep="")
write.table(RESULT_GOHYP,SAVEFILENAME,sep="\t",eol="\n",quote=FALSE,row.names=FALSE)
return(OUTNAME)
} ## end of func. GOHYPERG
##実行
GOHYPERG(geneIds,univIds,ANNOTATION,ONTOLOGY,PVALUE,TOP,OUTNAME)
"BP"
,"MF"
,"CC"
全部やりたいなら
for (ONTOLOGY in c("BP","MF","CC")) {
GOHYPERG(geneIds,univIds,ANNOTATION,ONTOLOGY,PVALUE,TOP,OUTNAME)
}
#KEGGでenrichment analysis
GOのかわりにKEGGでもやれる。"BP"
,"MF"
,"CC"
のかわりに"over"
,"under"
になる
##ライブラリ読み込み
library("KEGG.db")
##関数を作成
KEGGHYPERG <- function(geneIds,univIds,ANNOTATION,OVERUNDER,PVALUE,TOP,OUTNAME) {
keggparam <- new(
"KEGGHyperGParams",
geneIds=geneIds,
universeGeneIds=univIds,
annotation= ANNOTATION,
pvalueCutoff=PVALUE,
testDirection=OVERUNDER
)
KEGGHypG <- hyperGTest(keggparam)
DETECTKEGG <- sigCategories(KEGGHypG,pvalue<-PVALUE)
if (length(DETECTKEGG)==0) {
RESULT_KEGGHYP <- data.frame(
KEGGid="",
pvalue="",
oddratio="",
expected="",
obserbed="",
background="",
numberMappedGenes=paste(geneMappedCount(KEGGHypG),length(geneIds),sep="/"),
numberMappedBkgGenes=paste(universeMappedCount(KEGGHypG),length(univIds),sep="/"),
KEGGterm="-",
observedGenes="-"
)
} else {
RESULT_KEGGHYP <- data.frame(
KEGGid=DETECTKEGG,
pvalue=pvalues(KEGGHypG)[DETECTKEGG],
oddratio=oddsRatios(KEGGHypG)[DETECTKEGG],
expected=expectedCounts(KEGGHypG)[DETECTKEGG],
obserbed=geneCounts(KEGGHypG)[DETECTKEGG],
background=universeCounts(KEGGHypG)[DETECTKEGG],
numberMappedGenes=paste(geneMappedCount(KEGGHypG),length(geneIds),sep="/"),
numberMappedBkgGenes=paste(universeMappedCount(KEGGHypG),length(univIds),sep="/"),
KEGGterm=sapply(DETECTKEGG,function(keggIDs){sapply(keggIDs,function(keggID){get(keggID,KEGGPATHID2NAME)})}),
observedGenes=sapply(
DETECTKEGG,
function(X){paste(sapply(
geneIdsByCategory(KEGGHypG, catids = NULL)[[X]],
function(XXX){paste(get(XXX,org.Mm.egSYMBOL),"(",XXX,")",sep="")}),collapse=",")}
)
)
SAVEFILENAME <- paste("RESULT_KEGGHYP_",OVERUNDER,OUTNAME,".txt",sep="")
write.table(RESULT_KEGGHYP,SAVEFILENAME,sep="\t",eol="\n",quote=FALSE,row.names=FALSE)
BARHEIGHT <- -1 * log(RESULT_KEGGHYP[1:TOP,2],10)[TOP:1]
BARNAME <- RESULT_KEGGHYP$KEGGterm[TOP:1]
SAVEPDFNAME <- paste("RESULT_KEGGHYP_",OVERUNDER,OUTNAME,".pdf",sep="")
pdf(SAVEPDFNAME)
par(oma=c(2,2,2,2),mar=c(6,16,1,1))
barplot(
BARHEIGHT,
width=1,
space=NULL,
horiz=TRUE,
las=1,
main=paste(strsplit(SAVEFILENAME,".txt")[[1]][1]),
cex.main=.7,
sub=paste("(TOP",TOP," KEGGs)",sep=""),
cex.sub=.7,
names.arg=BARNAME,
cex.names=.6,
xlab="-1 * log10(p-value)",
cex.axis = 1
)
dev.off()
}
return(OUTNAME)
} ## end of func. KEGGHYPERG
##実行
for (OVERUNDER in c("over","under")) {
KEGGHYPERG(geneIds,univIds,ANNOTATION,OVERUNDER,PVALUE,TOP,OUTNAME)
}