LoginSignup
5
6

More than 5 years have passed since last update.

R/BioconductorのGOstats packageをもちいたGene Ontology(GO)やKEGGのenrichment解析

Last updated at Posted at 2016-07-13

GO enrichment analysis

R/Bioconductorの GOstats packageをもちいてGene Ontology (GO) enrichment analysisをおこなう。
-1*log10(p-value)の棒グラフや、各アノテーションに該当した遺伝子のリストも一緒に取得する。

準備

以下の2種類の遺伝子リストをEntrez GeneのID番号(=NCBI Gene, LocusLink)で用意する。

  • EntrezGeneID_interest.txt : 自分の注目する遺伝子セット
  • EntrezGeneID_background.txt : その遺伝子セットをみつけてきたときのおおもとの遺伝子全体のリスト

Gene SymbolなどからBioMartなどでEntrez Gene IDに変換すると楽かも。

Entrez Geneのリスト読み込み

BGFILE <- "EntrezGeneID_background.txt"
INFILE <- "EntrezGeneID_interest.txt"

geneIds <- as.character(read.table(INFILE,header=FALSE)[,1])
univIds <- as.character(read.table(BGFILE,header=FALSE)[,1])

閾値などの設定

ANNOTATION <- "org.Mm.eg.db"
ONTOLOGY <- "BP"
PVALUE<-0.01
TOP <- 30
OUTNAME <- "myinterest"

ANNOTATIONはマウスなら"org.Mm.eg.db"だしヒトなら"org.Hs.eg.db"
ONTOLOGYは"BP","MF","CC"
(それぞれbiological process, molecular function, cellular component)

ライブラリの読み込み

library("GOstats")
library("GO.db")
library("annotate")
library("org.Mm.eg.db")

関数を作成

GOHYPERG <- function(geneIds,univIds,ANNOTATION,ONTOLOGY,PVALUE,TOP,OUTNAME) {
    param <- new(
      "GOHyperGParams",
      geneIds=geneIds,
      universeGeneIds=univIds,
      annotation= ANNOTATION,
      ontology=ONTOLOGY
  )
  GOHypG <- hyperGTest(param)
  DETECTGO <- sigCategories(
      GOHypG,
      pvalue <- PVALUE
  )
  if (length(DETECTGO)==0) {
      RESULT_GOHYP <- data.frame(
          GOid="",
          pvalue="",
          oddratio="",
          expected="",
          obserbed="",
          background="",
          numberMappedGenes=paste(geneMappedCount(GOHypG),length(geneIds),sep="/"),
          numberMappedBkgGenes=paste(universeMappedCount(GOHypG),length(univIds),sep="/"),
          GOterm="-",
          observedGenes="-"
      )
  } else {
      RESULT_GOHYP <- data.frame(
          GOid=DETECTGO,
          pvalue=pvalues(GOHypG)[DETECTGO],
          oddratio=oddsRatios(GOHypG)[DETECTGO],
          expected=expectedCounts(GOHypG)[DETECTGO],
          obserbed=geneCounts(GOHypG)[DETECTGO],
          background=universeCounts(GOHypG)[DETECTGO],
          numberMappedGenes=paste(geneMappedCount(GOHypG),length(geneIds),sep="/"),
          numberMappedBkgGenes=paste(universeMappedCount(GOHypG),length(univIds),sep="/"),
          GOterm=sapply(
              DETECTGO,
              function(goIDs){
                  sapply(
                      goIDs,
                      function(goID){Term(get(goID,GOTERM))}
                  )
              }
          ),
          observedGenes=sapply(
              DETECTGO,
              function(X){
                  paste(
                      sapply(
                          geneIdsByCategory(GOHypG,catids = NULL)[[X]],
                          function(XXX){paste(get(XXX,org.Mm.egSYMBOL),"(",XXX,")",sep="")}
                      ),
                      collapse=","
                  )
              }
          )
      )
      
      BARHEIGHT <- -1 * log(RESULT_GOHYP[1:TOP,2],10)[TOP:1]
      BARNAME <- RESULT_GOHYP$GOterm[TOP:1]
    SAVEPDFNAME <- paste("RESULT_GOHYP_",ONTOLOGY,"_",OUTNAME,".pdf",sep="")
    pdf(SAVEPDFNAME)
    par(oma=c(2,2,2,2),mar=c(6,16,1,1))
    barplot(
          BARHEIGHT,
          width=1,
          space=NULL,
          horiz=TRUE,
          las=1,
          main=paste(strsplit(SAVEFILENAME,".txt")[[1]][1]),
          cex.main=.7,
          sub=paste("(TOP",TOP," GOs)",sep=""),
          cex.sub=.7,
          names.arg=BARNAME,
          cex.names=.6,
          xlab="-1 * log10(p-value)",
          cex.axis = 1
      )
      dev.off()
  }
          
  SAVEFILENAME <- paste("RESULT_GOHYP_",ONTOLOGY,"_",OUTNAME,".txt",sep="")
  write.table(RESULT_GOHYP,SAVEFILENAME,sep="\t",eol="\n",quote=FALSE,row.names=FALSE)
  
  return(OUTNAME)
  
} ## end of func. GOHYPERG

実行

GOHYPERG(geneIds,univIds,ANNOTATION,ONTOLOGY,PVALUE,TOP,OUTNAME)

"BP","MF","CC"全部やりたいなら

for (ONTOLOGY in c("BP","MF","CC")) {
    GOHYPERG(geneIds,univIds,ANNOTATION,ONTOLOGY,PVALUE,TOP,OUTNAME)
}

KEGGでenrichment analysis

GOのかわりにKEGGでもやれる。"BP","MF","CC"のかわりに"over","under"になる

ライブラリ読み込み

library("KEGG.db")

関数を作成

KEGGHYPERG <- function(geneIds,univIds,ANNOTATION,OVERUNDER,PVALUE,TOP,OUTNAME) {
    keggparam <- new(
        "KEGGHyperGParams",
        geneIds=geneIds,
        universeGeneIds=univIds,
        annotation= ANNOTATION,
        pvalueCutoff=PVALUE,
        testDirection=OVERUNDER
    )
    KEGGHypG <- hyperGTest(keggparam)
    DETECTKEGG <- sigCategories(KEGGHypG,pvalue<-PVALUE)
    if (length(DETECTKEGG)==0) {
        RESULT_KEGGHYP <- data.frame(
            KEGGid="",
            pvalue="",
            oddratio="",
            expected="",
            obserbed="",
            background="",
            numberMappedGenes=paste(geneMappedCount(KEGGHypG),length(geneIds),sep="/"),
            numberMappedBkgGenes=paste(universeMappedCount(KEGGHypG),length(univIds),sep="/"),
            KEGGterm="-",
            observedGenes="-"
        )
    } else {
        RESULT_KEGGHYP <- data.frame(
            KEGGid=DETECTKEGG,
            pvalue=pvalues(KEGGHypG)[DETECTKEGG],
            oddratio=oddsRatios(KEGGHypG)[DETECTKEGG],
            expected=expectedCounts(KEGGHypG)[DETECTKEGG],
            obserbed=geneCounts(KEGGHypG)[DETECTKEGG],
            background=universeCounts(KEGGHypG)[DETECTKEGG],
            numberMappedGenes=paste(geneMappedCount(KEGGHypG),length(geneIds),sep="/"),
            numberMappedBkgGenes=paste(universeMappedCount(KEGGHypG),length(univIds),sep="/"),
            KEGGterm=sapply(DETECTKEGG,function(keggIDs){sapply(keggIDs,function(keggID){get(keggID,KEGGPATHID2NAME)})}),
            observedGenes=sapply(
                DETECTKEGG,
                function(X){paste(sapply(
                    geneIdsByCategory(KEGGHypG, catids = NULL)[[X]],
                    function(XXX){paste(get(XXX,org.Mm.egSYMBOL),"(",XXX,")",sep="")}),collapse=",")}
            )
        )
        SAVEFILENAME <- paste("RESULT_KEGGHYP_",OVERUNDER,OUTNAME,".txt",sep="")
        write.table(RESULT_KEGGHYP,SAVEFILENAME,sep="\t",eol="\n",quote=FALSE,row.names=FALSE)
        BARHEIGHT <- -1 * log(RESULT_KEGGHYP[1:TOP,2],10)[TOP:1]
        BARNAME <- RESULT_KEGGHYP$KEGGterm[TOP:1]
        SAVEPDFNAME <- paste("RESULT_KEGGHYP_",OVERUNDER,OUTNAME,".pdf",sep="")
        pdf(SAVEPDFNAME)
        par(oma=c(2,2,2,2),mar=c(6,16,1,1))
        barplot(
            BARHEIGHT,
            width=1,
            space=NULL,
            horiz=TRUE,
            las=1,
            main=paste(strsplit(SAVEFILENAME,".txt")[[1]][1]),
            cex.main=.7,
            sub=paste("(TOP",TOP," KEGGs)",sep=""),
            cex.sub=.7,
            names.arg=BARNAME,
            cex.names=.6,
            xlab="-1 * log10(p-value)",
            cex.axis = 1
        )
        dev.off()
    }
    return(OUTNAME)
} ## end of func. KEGGHYPERG

実行

for (OVERUNDER in c("over","under")) {
    KEGGHYPERG(geneIds,univIds,ANNOTATION,OVERUNDER,PVALUE,TOP,OUTNAME)
}
5
6
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
5
6