search
LoginSignup
3

More than 5 years have passed since last update.

posted at

updated at

TDA mapper and diabetes data

RでTopological data analysis
mclustのdiabeesデータを試してみる。
filter functionは以下のようにしてみた

f(x)=\sum_y exp\frac{-(distance(y-x))^2}{\epsilon}  
(\epsilon=130000)
library(mclust)
library(TDAmapper)
library(igraph)

data(diabetes)
.data <- diabetes[,2:4]
.dist <- dist(.data, diag=T, upper=T)
.mat <- as.matrix(.dist)
.kernel <- function(d) sum(exp(-(d^2)/130000))
.filter <- apply(.mat, 2, .kernel)

#
map <- mapper1D(.dist, filter=.filter, num_intervals=5, percent_overlap = 50)
g <- graph.adjacency(map$adjacency, mode="undirected")

#ノード内のデータ数に応じてノードサイズを変更
V(g)$size <- 5*sqrt(sapply(map$points_in_vertex, length))

.col <- c()
N <- length(V(g))
for(i in 1:N){
  tmp <- as.numeric((table(diabetes[map$points_in_vertex[[i]], 1])))
  t <- tmp/sum(tmp)
  .col[i] <- rgb(t[1], t[2], t[3])
}
V(g)$color <- .col
plot(g)

TDA (2).jpg

> pv <- map$points_in_vertex
> labels <- diabetes[,1]
> glucose <- diabetes[,2]
> insulin <- diabetes[,3]
> sspg <- diabetes[,4]

> rbind(
+   table(labels[pv[[9]]]),
+   table(labels[pv[[1]]]),
+   table(labels[pv[[3]]])
+ )
     Chemical Normal Overt
[1,]       29     75     2
[2,]        4      0     0
[3,]        0      0    26

> summary(diabetes[map$points_in_vertex[[9]], 2:4])
    glucose          insulin           sspg      
 Min.   : 70.00   Min.   :269.0   Min.   : 73.0  
 1st Qu.: 87.25   1st Qu.:333.2   1st Qu.:131.2  
 Median : 93.00   Median :372.0   Median :167.0  
 Mean   : 94.27   Mean   :392.6   Mean   :183.2  
 3rd Qu.: 99.75   3rd Qu.:428.2   3rd Qu.:221.0  
 Max.   :140.00   Max.   :643.0   Max.   :408.0  
> summary(diabetes[map$points_in_vertex[[1]], 2:4])
    glucose         insulin           sspg      
 Min.   : 95.0   Min.   :469.0   Min.   :564.0  
 1st Qu.: 99.5   1st Qu.:505.0   1st Qu.:596.2  
 Median :102.0   Median :527.0   Median :614.5  
 Mean   :102.5   Mean   :520.2   Mean   :635.2  
 3rd Qu.:105.0   3rd Qu.:542.2   3rd Qu.:653.5  
 Max.   :111.0   Max.   :558.0   Max.   :748.0  
> summary(diabetes[map$points_in_vertex[[3]], 2:4])
    glucose         insulin            sspg       
 Min.   :146.0   Min.   : 832.0   Min.   : 10.00  
 1st Qu.:188.5   1st Qu.: 931.8   1st Qu.: 41.25  
 Median :224.5   Median :1123.0   Median : 76.50  
 Mean   :241.7   Mean   :1152.9   Mean   : 75.69  
 3rd Qu.:302.2   3rd Qu.:1370.8   3rd Qu.:102.75  
 Max.   :353.0   Max.   :1568.0   Max.   :160.00  

もう少しchemicalの分離をよくしたいが、一応overtとその他の分類はできている。

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
What you can do with signing up
3