LoginSignup
5
8

More than 5 years have passed since last update.

TDA(topological data analysis)で1次元の輪、2次元の空洞の数を数える

Last updated at Posted at 2018-04-01

TDA(topological data analysis)で、一次元の輪、2次元の空洞を数える

パッケージ


library(TDA)
library(foreach)
library(ggplot2)
library(tidyverse)
library(readxl)

SDの計算

下の画像のようにROIをとってImageJで各ROIのStDvを取得
image.png
全ROIの平均値をSDとする


> read_excel("C:/Users/owner/Desktop/sd.xlsx")
 # A tibble: 55 x 3
    Area StdDev    mA
   <dbl>  <dbl> <dbl>
 1   746   25.2  50.0
 2   746   21.9  50.0
 3   746   21.8  50.0
 4   746   21.1  50.0
 5   746   24.1  50.0
 6   746   17.6 100  
 7   746   14.6 100  
 8   746   15.1 100  
 9   746   15.0 100  
10   746   17.2 100  
 # ... with 45 more rows

read_excel("C:/Users/owner/Desktop/sd.xlsx") %>% 
  select(-Area) %>% 
  group_by(mA) %>% 
  summarise(SD = mean(StdDev)) -> SD_tbl
> SD_tbl
 # A tibble: 11 x 2
      mA    SD
   <dbl> <dbl>
 1  50.0 22.8 
 2 100   15.9 
 3 150   13.2 
 4 200   11.2 
 5 250   10.1 
 6 300    9.58
 7 350    8.46
 8 400    7.99
 9 450    7.68
10 500    7.32
11 550    7.13

ImageJで画像の座標を取得

Save XY Coordinatesで(x,y,z,Value)を取得
image.png
今回は管電流が50から550mAまで50mA刻みのCT水ファントム画像を用いました。だから11枚の画像


read_delim("20170930.txt",
           "\t",escape_double = FALSE, 
           col_names = FALSE,
           col_types = cols(X4 = col_double()),
           trim_ws = TRUE) %>% 
  rename(x = V1, y = V2, z = V3, HU = V4) %>% 
  as.tbl() %>% 
  dplyr::filter(HU >= 100) %>% 
  group_by(z) %>% 
  nest() -> dat
slice_Number <- seq_along(dat$z) ## foreachで使う

> dat$data[[1]]
# A tibble: 57,275 x 3
       x     y    HU
   <int> <int> <dbl>
 1   243   123   293
 2   244   123   302
 3   245   123   297
 4   246   123   302
 5   247   123   313
 6   248   123   268
 7   249   123   296
 8   250   123   289
 9   251   123   319
10   252   123   287
# ... with 57,265 more rows

TDA

TDA::alphashapeDiag()を使って計算


shapeDiag <- foreach(i = slice_Number) %do% alphaShapeDiag(dat$data[[i]])

パーシステント図(PD)をデータフレームに変換

mutate()でDeathとBirthの差を付け加える

shapeDiag_df <- foreach(i = slice_Number) %do% {
  tibble::tibble(
    Dimension = as.factor(shapeDiag[[i]]$diagram[,1]), 
          Birth = shapeDiag[[i]]$diagram[,2],
          Death = shapeDiag[[i]]$diagram[,3]
  ) %>% 
    mutate(SuvivalInterval = Death - Birth)
}

PDの座標からDimension毎に個数(holes)、生存半径区間のMax,Min,Median,Meanの集計

summarise_df <- foreach(i = slice_Number, .combine = rbind) %do% {
  shapeDiag_df[[i]] %>% 
    group_by(Dimension) %>% 
    summarise(., 
              holes = n(),
              Max = max(SuvivalInterval),
              Min = min(SuvivalInterval),
              Median = median(SuvivalInterval),
              Mean = mean(SuvivalInterval)
    ) %>% 
    mutate(.,mAs = mAs[i])
}

生成される半径が各画像のSD以上またはSD以下で区切って集計

Birth ≦ SD


summarise_sdun_df <- foreach(i = slice_Number, .combine = rbind) %do% {
    shapeDiag_df[[i]] %>%
      dplyr::group_by(.,Dimension) %>%
      dplyr::filter(.,Birth <= SD_tbl$SD[i]) %>%
      dplyr::summarise(.,
                       holes = n(),
                       Max = max(SuvivalInterval),
                       Min = min(SuvivalInterval),
                       Median = median(SuvivalInterval),
                       Mean = mean(SuvivalInterval)
      ) %>% 
    mutate(.,mAs = mAs[i])
}

Birth ≧ SD


summarise_sdov_df <- foreach(i = slice_Number, .combine = rbind) %do% {
    shapeDiag_df[[i]] %>%
      dplyr::group_by(.,Dimension) %>%
      dplyr::filter(.,Birth >= SD_tbl$SD[i]) %>%
      dplyr::summarise(.,
                       holes = n(),
                       Max = max(SuvivalInterval),
                       Min = min(SuvivalInterval),
                       Median = median(SuvivalInterval),
                       Mean = mean(SuvivalInterval)

      ) %>% 
    mutate(.,mAs = mAs[i])
}

Dimension毎にネストさせてggplotでmAと穴or空洞の数(holes)の関係

## dimensionでネストさせたtbl_df
Interval_tbl <- summarise_df %>% 
  group_by(Dimension) %>% 
  nest()
## Birth <= SD
Interval_sdun_tbl <- summarise_sdun_df %>% 
  group_by(Dimension) %>% 
  nest()
## Birth >= SD
Interval_sdov_tbl <- summarise_sdov_df %>% 
  group_by(Dimension) %>% 
  nest()

dim <- seq_along(Interval_tbl$Dimension)

## ggplot_穴の数
gg_dimk_holes <- foreach(k = dim) %do% {
  ggplot(Interval_tbl$data[[k]],aes(x = mAs,y = holes))+
    xlim(0,550)+
    geom_point()+
    geom_smooth(se = FALSE,method = "loess") +
    ggtitle(str_c("dim", sep = k-1, "_holes"))
}


gg_dimk_holes_sdun <- foreach(k = dim) %do% {
  ggplot(Interval_sdun_tbl$data[[k]],aes(x = mAs,y = holes))+
    xlim(0,550)+
    geom_point()+
    geom_smooth(se = FALSE,method = "loess") +
    ggtitle(str_c("dim", sep = k-1, "_holes"),subtitle = "<= under_sd")
}

gg_dimk_holes_sdov <- foreach(k = seq_along(Interval_sdov_tbl$Dimension)) %do% {
  ggplot(Interval_sdov_tbl$data[[k]],aes(x = mAs,y = holes))+
    xlim(0,550)+
    geom_point()+
    geom_smooth(se = FALSE,method = "loess") +
    ggtitle(str_c("dim", sep = k, "_holes"),subtitle = "over_sd")
}

image.png
こんな感じ

各次元(Dimension)でbirth,Death が一致した行の個数(holes)を算出することでPD作成


PD_df <- foreach(i = slice_Number) %do% {
  shapeDiag_df[[i]] %>%
    group_by(Dimension,Birth,Death) %>% 
    summarise(holes = n(),
              log =  log(n()),
              loglog = log(log(n()))
    ) %>% 
    ungroup()
}

PD_df_nest <- foreach(i = slice_Number) %do% {
  shapeDiag_df[[i]] %>%
    group_by(Dimension,Birth,Death) %>% 
    summarise(holes = n(),
              log =  log(n()),
              loglog = log(log(n()))
    ) %>% 
    group_by(Dimension) %>% 
    nest()
}

重複した点があると、ggplotではPDを書くのが遅くなる。重複が7000を超える座標が多く存在するため

PDのプロット

## PD
gg_PD <- foreach(i = slice_Number) %do% {
  PD_df[[i]] %$%
    ggplot(.,aes(x = Birth, y = Death, shape = Dimension, colour = Dimension)) +
    geom_point() +
    geom_abline(slope = 1) +
    theme_bw() +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
          ) +
    ggtitle(str_c("mAs",sep = i*50,"_PD"))
}

## holesは重複度が線形スケール

gg_PD_holes <- foreach(i = slice_Number) %do% {
  foreach(k = dim) %do% {
    PD_df_nest[[i]]$data[[k]] %$%
      ggplot(.,aes(x = Birth, y = Death, colour = holes)) +
      geom_point(size = rel(0.2)) +
      geom_abline(slope = 1) +
      theme_bw() +
      theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank()
      ) +
      viridis::scale_colour_viridis() +
      ggtitle(str_c("mAs",sep = i*50,"_PD_holes"),subtitle = str_c("dim",sep = k))
  }
}

## logは重複度がlog()スケール
gg_PD_log <- foreach(i = slice_Number) %do% {
  foreach(k = dim) %do% {
    PD_df_nest[[i]]$data[[k]] %$%
      ggplot(.,aes(x = Birth, y = Death, colour = log)) +
      geom_point(size = rel(0.1)) +
      geom_abline(slope = 1) +
      theme_bw() +
      theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank()
      ) +
      viridis::scale_colour_viridis() +
      ggtitle(str_c("mAs",sep = i*50,"_PD_log"),subtitle = str_c("dim",sep = k))
  }
}


## loglogは重複度loglogスケール
gg_PD_loglog <- foreach(i = slice_Number) %do% {
  foreach(k = dim) %do% {
    PD_df_nest[[i]]$data[[k]] %$%
      ggplot(.,aes(x = Birth, y = Death, colour = loglog)) +
      geom_point(size = rel(0.1)) +
      geom_abline(slope = 1) +
      theme_bw() +
      theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank()
      ) +
      viridis::scale_colour_viridis() +
      ggtitle(str_c("mAs",sep = i*50,"_PD_loglog"),subtitle = str_c("dim",sep = k))
  }
}

PD
image.png
重複度holes
image.png
重複度log
image.png
重複度loglog
image.png

loglogスケールでは、log1 = 0 なので log(log(1)) = Inf になりInfの部分、すなわち重複していないPDの点はグレーで表示されます

5
8
2

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
5
8