0
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

PCoA biplotをggplot2で

Last updated at Posted at 2021-02-26

きっかけ

腸内細菌叢の解析でPCoAやったのでメモ

#PCAと何が違うか

主成分分析との違いを簡単に言うと、主成分分析はユークリッド距離をなるべく保ちながら低次元に落とす方法ですが、主座標分析はユークリッド距離だけでなく、他の距離や類似度*2が使えるという点にあります。(https://hoxo-m.hatenablog.com/entry/20120313/p1)

分散を最大化する際に、各点と重心間のユークリッド距離を使って分散を求めるのがPCA、その他の距離や類似度から分散を求めて最大化するのがPCoAということみたい(たぶん)

流れ

veganパッケージのvegdist関数で各種距離を導出

apeパッケージのpcoa関数でPCoA

ggplot2でbiplotをかく

やる

必要パッケージ読み込み

library(tidyverse)
library(vegan)
library(ape)
library(RColorBrewer)

##データ読み込み

tbl <- read_csv("bacteria_genus.csv")

1列目がサンプル名、2列目に群名、3列目以降は細菌の存在比
column名は"sample", "group", 以降は菌名

vegdist, pcoa

jaccard <- tbl %>% 
  select(-(sample:group)) %>%
  vegdist(method="jaccard", na.rm=TRUE)

jaccard.pcoa <- pcoa(jaccard, correction = "cailliez")

plot

# x:pcoa data from pcoa function in ape
# y:original data frame consisted only of numeric
# group:group names of the samples as factor
# label_x:
# label_y:labels of loading plot
# index_y:mode of drawing arrow labels
# legend_title
# shape:shapes attached to each group
# n_y:the number of arrows

pcoa.biplot <- function(x, y=NULL, group=NULL, label_x=NULL, label_y=NULL, n_y=10, index_y = 0, legend_title=FALSE,shape=NULL){
  x2 <- as_tibble(x$vectors)
  if(!is.null(group)){
    x2 <- x2 %>% mutate(group = group)
    plot <- ggplot(x2,aes(Axis.1,Axis.2)) + geom_point(aes(shape=group),size=2.5)
    if(!is.null(shape)){
      plot <- plot + scale_shape_manual(values=shape)
    }
  }
  else{
    plot <- ggplot(x2,aes(Axis.1,Axis.2)) + geom_point()
  }
  
  # determine arrow postion by calcurating covariance of pcoa data and original data
  if(!is.null(y)){
    n <- nrow(y)
    points.stand <- scale(x2[,c(1,2)])
    S <- cov(y,points.stand)
    U <- S %*% diag((x$values$Eigenvalues[c(1,2)]/(n-1))^(-0.5))
    colnames(U) <- colnames(x2[,c(1,2)])
    U <- as_tibble(U) %>% mutate(angle = (180/pi)*atan(Axis.2/Axis.1), length=(Axis.1**2 + Axis.2**2)**0.5)
    
    if(!is.null(label_y)){
      U <- U %>% mutate(label_y=label_y) %>% top_n(n_y,length) %>% arrange(desc(length)) %>% mutate(index=c(1:n_y))
      if(index_y==1){
        plot <- plot + geom_segment(data = U, aes(x=0,y=0,xend=Axis.1,yend=Axis.2),arrow=arrow(length=unit(0.2,"cm")),alpha=0.3)
        plot <- plot + geom_text(data=U, aes(x=Axis.1+0.05/length*Axis.1 ,y=Axis.2 + 0.05/length*Axis.2,label=index),size=4,color="red")
        write_csv(U %>% select(index,label_y),"U.csv")
      }
      else if(index_y==2){
        plot <- plot + geom_segment(data = U, aes(x=0,y=0,xend=Axis.1,yend=Axis.2),arrow=arrow(length=unit(0.2,"cm")),alpha=0.3)
        plot <- plot + geom_text(data=U, aes(x=Axis.1+0.3/length*Axis.1 ,y=Axis.2 + 0.3/length*Axis.2,label=label_y,angle=angle),size=3,color="red") + xlim(-0.5,1.25) + ylim(-1.25,0.75)
      }
      else if(index_y==3){
        mycol <- colorRampPalette(brewer.pal(8,"Set1"))(n_y)
        plot <- plot + geom_segment(data = U, aes(x=0,y=0,xend=Axis.1,yend=Axis.2, color=label_y),arrow=arrow(length=unit(0.2,"cm"))) + scale_color_manual(values=mycol)
      }
      else{
        plot <- plot + geom_segment(data = U, aes(x=0,y=0,xend=Axis.1,yend=Axis.2),arrow=arrow(length=unit(0.2,"cm")))
      }
    }
    else{
      U <- U %>% top_n(n_y,length)
      plot <- plot + geom_segment(data = U, aes(x=0,y=0,xend=Axis.1,yend=Axis.2),arrow=arrow(length=unit(0.2,"cm")))
  }
  }
  
  # no legend title, plot title in center, legend in bottom right of plot, legend background color is nothing, legend frame color is black
  if(legend_title==FALSE){
    plot <- plot + theme(legend.title=element_blank(),plot.title=element_text(hjust=0.5), legend.position = c(1,0), legend.justification = c(1,0), legend.background = element_rect(fill = NA, colour = "black"))
  }
  
  plot <- plot + coord_fixed()
  return(plot)
}

グループ情報とかつくる

legendの順序とかはfactorのlevel順なので制御できるように別に作るようにした

group <- factor(tbl$group,levels=c("WT-ctrl","WT-stim","KO-ctrl","KO-stim"))
label_y <- factor(colnames(tbl)[3:ncol(tbl)])
label_y <- str_extract(label_y,pattern = "k__.*;p__;c__;o__;f__;g__$|p__.*;c__;o__;f__;g__$|c__.*;o__;f__;g__$|o__.*;f__;g__$|f__.*;g__$|g__.*$")

出力

pcoa.biplot(jaccard.pcoa,tbl[,3:ncol(tbl)],group=group,label_y=label_y,index_y=1,shape=c(1,19,0,15),n_y=10)
 + labs(title="Jaccard", x="PC1", y="PC2")

xにはpcoaの出力を入れて、yには元tibbleのsample,group列を除いた数字のみデータを入れる

index_y=0 arrowのlabelなし
index_y=1 arrowのlabelを数字で書いて対応表を出力
index_y=2 arrowのlabelをlabel_yとして出力
index_y=3 arrowの色をlabel_yに割り当てて出力

おわり

サンプルデータ作るのがめんどかったので画像ないです

0
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?