More than 5 years have passed since last update.

エネルギーダイアグラムを描こう

Last updated at 2017-12-22Posted at 2017-12-14

この記事は創薬 Advent Calendar 2017の16日目の記事です。

追加機能および予定

2017/12/21プログラム修正（コード構成（クラス構成）修正）
2017/12/22プログラム修正（プロット着色機能追加）

溶媒有り無しのダイアグラムの書き方
エントロピーの効果あるときないとき
反応パスの自動色付け
レベルの部分も色分けする
ディレクトリ自動生成
（R内で）pythonコードでエネルギーを取り出す時のやり方
化合物の構造貼り付けたい

んー、インタラクティブな解析用の実装（副作用なし関数、副作用あり関数）はRで書いて、データ保持用の最低限の実装（データ保持クラス）はpythonで書いた方が保守（テスト）しやすい？

きっかけ

創薬アドカレが一つ空いていたので先輩(15日目の人)の強烈なプッシュにより記事を書くことになりました。

何もネタがないので昨日書いた、化学反応のエネルギーダイアグラムのプロット作成支援のプログラム(R言語)を貼ります。まだ未完成なので突っ込みどころが満載です。修正・提案あればお願いします！
気が向けばそのうちパッケージ化します。

エネルギーダイアグラム

フォルダ名をもとにどんどんエネルギーダイアグラムをつなげていきます。
これによって、化学反応解析のレポートの作成プロセスを自動化し再現性を高くします。

フォルダ名のルールはこのようになっています。

x軸順番_状態_ラベル_メモ_反応進行前の分子(とあれば系外系中からの試薬の収支)

反応進行前の分子」に直前の構造のラベルを指定することでパスをつないでいきます。
反応物（開始時）の場合は「反応進行前の分子」は反応物の名前をそのまま使います。
反応試薬に関しては「反応進行前の分子」は入力なしにします。

サンプルフォルダはこちらにあります。
このフォルダの中でRを開き、以下のプログラムを打ち込めばプロットが出ます。

ライブラリが入っていない場合はinstall.package("R6")などしてください。

chemicalreactionprofiler.R

library(R6)
library(purrr)
library(dplyr)
library(ggplot)
library(magrittr)

Profiler <- R6Class("Profiler",
                    public = list(
                      entries = NULL,
                      link_info = NA,
                      initialize = function(){
                        self$entries <- new.env()
                      },
                      compensate_energy = function(key){
                        self$entries %>%
                          lapply(function(x) (self$calc_inout(x,key) + x$energies[[key]]) %>%
                                   x$set_corrected_energy_env_(key, .))
                      },
                      get_links = function(){
                        mat <- self$entries %>%
                          lapply(function(x) x$link_info) %>%
                          do.call(rbind,.)
                        mat <- mat[!is.na(mat[,1]),]
                        mat <- mat[!is.na(mat[,2]),]
                        mat <- mat[mat[,1] != mat[,2],]
                        mat <- cbind(mat, "black")
                        rownames(mat) <- NULL
                        self$link_info <- mat
                      },
                      get_entries = function(){
                        ls(self$entries) %>%
                          sapply(function(x) self$entries[[x]])
                      },
                      add_entry = function(entry){
                        id <- entry$id
                        self$entries[[id]] <- entry
                      },
                      get_inouts_extended_for_profiler = function(entry, strings = NULL ,inouts = NULL){
                        if(is.null(strings)){
                          strings <- strsplit(entry$atoms_inout, split = "&")[[1]][1]
                          if(is.na(strings) || is_empty(strings)) return(NA)
                          inouts <- strsplit(strings, split = "\\+|\\-")[[1]]
                        }
                        if(is_empty(inouts) || is.na(inouts)) return(strings)
                        firstInout <- self$entries[[inouts[1]]]$atoms_inout
                        if(length(inouts) == 1 && (is.na(firstInout) || firstInout == inouts[1]))
                          return(Recall(entry, strings, NA))
                        if(firstInout == inouts[1] || is.na(firstInout))
                          return(Recall(entry, strings, inouts[-1]))
                        strings <- gsub(inouts[1], firstInout, strings)
                        inouts <- c(strsplit(firstInout, split = "\\+|\\-")[[1]], inouts[-1])
                        return(Recall(entry, strings, inouts))
                      },
                      eval_inouts_for_correction_energy_ = function(strings, key, list = NULL){
                        if(is.null(list)){
                          if(is.na(strings)) return(NA)
                          list <- strsplit(strings, split = "\\+|\\-")[[1]]
                          strings <- strings %>%
                            gsub("\\+", "=", .) %>%
                            gsub("\\-", "\\+", .) %>%
                            gsub("=", "\\-", .)
                          strings <- paste0("-", strings)
                        }
                        if(is_empty(list)) return(eval(parse(text = strings)))
                        if(is.na(list[1])) return(Recall(strings, key, list[-1]))
                        energy <- self$entries[[list[1]]]$energies[[key]]
                        strings <- gsub(list[1], energy, strings)
                        return(Recall(strings, key, list[-1]))
                      },
                      calc_inout = function(entry, key){
                        entry %>%
                          self$get_inouts_extended_for_profiler() %>%
                          self$eval_inouts_for_correction_energy_(key)
                      },
                      extract_energy_with_ = function(key, energy_parser){
                        self$entries %>% lapply(function(x) x$set_energy_env_(key, energy_parser(x$path)))
                      },

                      plot = function(key, labelpos = 1){
                        if(is.na(self$link_info[1])) self$get_links()
                        seq <- self$link_info[,c(1, 2)] %>% as.vector() %>% unique()
                        seq %<>% .[order(.)]
                        kv <- seq %>% sapply(function(x) self$entries[[x]]$corrected_energies[[key]])
                        gp <- ggplot()
                        gp <- gp + ylim(min(kv) - abs(labelpos), max(kv) + abs(labelpos)) + xlim(0,length(seq)*2)

                        for(i in 1:length(seq)){
                          env <- list(x_ = 2 * i - 2,
                                      y_ = kv[seq[i]],
                                      xend_ = 2 * i - 1,
                                      yend_ = kv[seq[i]])
                          eval(substitute(gp <- gp + geom_segment(aes(x = x_, y= y_, xend = xend_, yend = yend_)), env))
                        }
                        for(i in 1:length(seq)){
                          env <- list(x_ = 2 * i - 1.5,
                                      y_ = kv[seq[i]] + labelpos,
                                      label_ = self$entries[[seq[i]]]$label)
                          eval(substitute(gp <- gp + geom_text(aes(x = x_, y = y_, label = label_)), env))
                        }
                        for(i in 1:length(self$link_info[,1])){
                          env <- list(x_ = 2 * (1:length(seq))[seq == self$link_info[i,1]] - 1,
                                      y_ = kv[self$link_info[i,1]],
                                      xend_ = 2 * (1:length(seq))[seq == self$link_info[i,2]] - 2,
                                      yend_ = kv[self$link_info[i,2]],
                                      color_ = self$link_info[i,4],
                                      linetype_ = self$link_info[i,3])
                          eval(substitute(gp <- gp + geom_segment(aes(x = x_, y= y_, xend = xend_, yend = yend_), color = color_, linetype = linetype_), env))
                        }
                        title <- getwd() %>% {function(x) rev(strsplit(x,split = "/")[[1]])[1]}()
                        title <- paste(title, key)
                        gp <- gp + labs(title = title) + xlab("") + ylab("")
                        gp
                      }
                    )
)


profiler_add_entry <- function(profiler, entry){
  profiler$add_entry(entry)
}

profiler_get_entries <- function(profiler){
  lprofiler$get_entries()
}

profiler_extract_energy_with_ <- function(profiler, key, energy_parser){
  profiler$extract_energy_with_(key, energy_parser)
}


Entry <- R6Class("Entry",
                 public = list(
                   path = "",
                   file = "",
                   id = "",
                   title = "",
                   atoms_inout = "",
                   link_info = "",
                   energies = NULL,
                   corrected_energies = NULL,
                   point_group = "",
                   label = "",

                   initialize = function(path, info_parser = self$info_parser){
                     self$path <- path
                     self$file <- self$path2file(self$path)
                     self$parse_info(info_parser)
                     self$energies <- new.env()
                     self$corrected_energies <- new.env()
                   },
                   set_energy_env_ = function(key, energy){
                     self$energies[[key]] <- energy
                   },
                   set_corrected_energy_env_ = function(key, energy){
                     self$corrected_energies[[key]] <- energy
                   },
                   info_parser = function(dirpath){
                     dirpath %>%
                     {function(x) strsplit(x,"_")[[1]]}() %>%
                     {function(x) c(paste0(x[1:3],collapse = ""),x[4],x[5])}()
                   },
                   parse_info = function(parser){
                     res <- parser(self$file)

                     self$id <- res[1]
                     self$title <- res[2]
                     self$atoms_inout <- res[3]

                     inout_list <- strsplit(self$atoms_inout, split = "&")[[1]]
                     link_info <- strsplit(inout_list, split = "\\+|\\-") %>%
                       lapply(function(x) c(x[1], self$id, ifelse(length(x) == 1, "solid", "dashed"))) %>%
                       do.call(rbind, .)
                     names(link_info) <- NULL
                     self$link_info <- link_info
                     label <- strsplit(self$id, split = "EQ|TS")[[1]][2]
                     self$label <- ifelse(is.na(label), "", label)
                   },
                   parse_pointgroup = function(parser){
                     self$point_group <- parser(self$path)
                   },
                   path2file = function(path) tail(strsplit(path, split = "/")[[1]], n = 1)
                 )
)

entry_set_energy_ <- function(entry, key, value){
  entry$set_energy_env_(key, value)
}

main.R


get_energy_by <- function(dirpath){
  dirpath %>%
    paste0("/file") %>%
    read_lines %>%
    as.numeric
}#ここのget_energy_by関数は自由に置き換え可能なので、gaussianやgamessのログから直接データをとりたしたいときなど、適宜関数を書き換えてください。

a <- Profiler$new()
dir() %>% lapply(Entry$new) %>% lapply(a$add_entry)
a$get_links()

a %>% profiler_extract_energy_with_("HF", get_energy_by)
a$compensate_energy("HF")
a$plot("HF")

a$link_info %<>% rbind(c("33TS", "90EQ4", "solid", "black"))
a$link_info
a$plot("HF")

a$link_info[c(1,2,4,6,9),4] <- "red"
a$link_info[c(5,7,8,10),4] <- "blue"
a$link_info
a$plot("HF")

はい出来ました！

補足　エネルギーダイアグラムは何が嬉しいのか

化学合成において最適化すべき場所が分かります。

化学合成において、最終的に同じ物質が出来る場合でも複数の経路が考えられます。
こういうときに、エネルギーダイアグラムをつくってあげると、どちらの経路が重要な反応であるかが理解できます。
今回の例だと、図でいう赤の経路の方が青の経路よりもエネルギーの高低差（活性化エネルギー）が小さく、反応において主要な経路であるといえます。

そのため合成を最適化し収率などを高めたい場合、赤の経路の最適化を行えばよいことが分かります。
逆に青の経路は主要な経路ではないので、いくら最適化しても収率などに変化はないと考えられ、時間や資金の無駄になります。

そういった実験計画の判断基準をエネルギーダイアグラムは与えてくれ、リソースの無駄な消費を抑えることができます。

参考

エネルギーダイアグラム作成時の計算手順
http://xn--qck0d2a9as2853cudbqy0lc6cfz4a0e7e.xyz/theory/how-to-write-ezu
似たようなことをされてる方
http://pc-chem-basics.blog.jp/archives/2454069.html

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up

エネルギーダイアグラムを描こう

追加機能および予定

きっかけ

エネルギーダイアグラム

補足 エネルギーダイアグラムは何が嬉しいのか

参考

補足　エネルギーダイアグラムは何が嬉しいのか