More than 3 years have passed since last update.

複数のRグループに異なる側鎖群を指定して化合物を生成する

Last updated at 2020-05-22Posted at 2020-05-17

※この記事のコードは "Structure Generator based on R-Group (SGRG)" (URL: https://github.com/hkaneko1985/structure_generator_based_on_r_group )のコード(MITライセンス)を流用しています。

#はじめに
「Structure Generator based on R-Group (SGRG) を使って何ができますか？」(https://datachemeng.com/how_to_use_sgrg/ )では、「主骨格はそのままで、側鎖を変えた化学構造を生成する」プログラムが公開されている。大変素晴らしい。

ただ、全Rグループに、同じ側鎖群しか指定することができないようである。例えば、Rグループとして、R1とR2を定義し、それぞれに異なる側鎖群を指定することができないようだ。

そこで今回は、上で公開されているプログラムのうち, "structure_generator_based_on_r_group_all_combinations.py" を改変し、R1とR2にそれぞれ別の側鎖群を定義したsmiファイル(側鎖群がSMILESで列挙されたもの）を指定し、R1, R2それぞれ自身の側鎖群の側鎖しか生成されないようにしてみた。

#ソース
RDKitを駆使しまくっており、相当難易度の高いプログラムであったが、何とか意図の動作になるよう改変してみた。
ただ、Rグループは"R1", "R2"に限定されており、汎用性はまだ低い。

structure_generator_based_on_r_group_all_combinations_mod.py

import numpy as np
from numpy import matlib
from rdkit import Chem

bond_list = [Chem.rdchem.BondType.UNSPECIFIED, Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE,
             Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.QUADRUPLE, Chem.rdchem.BondType.QUINTUPLE,
             Chem.rdchem.BondType.HEXTUPLE, Chem.rdchem.BondType.ONEANDAHALF, Chem.rdchem.BondType.TWOANDAHALF,
             Chem.rdchem.BondType.THREEANDAHALF, Chem.rdchem.BondType.FOURANDAHALF, Chem.rdchem.BondType.FIVEANDAHALF,
             Chem.rdchem.BondType.AROMATIC, Chem.rdchem.BondType.IONIC, Chem.rdchem.BondType.HYDROGEN,
             Chem.rdchem.BondType.THREECENTER, Chem.rdchem.BondType.DATIVEONE, Chem.rdchem.BondType.DATIVE,
             Chem.rdchem.BondType.DATIVEL, Chem.rdchem.BondType.DATIVER, Chem.rdchem.BondType.OTHER,
             Chem.rdchem.BondType.ZERO]


# 主骨格
main_molecule = Chem.MolFromMolFile('main.sdf')
#R1 用側鎖
fragment_molecules = [molecule for molecule in Chem.SDMolSupplier('fragments1.sdf') if molecule is not None] 
#R2 用側鎖
fragment_molecules2 = [molecule for molecule in Chem.SDMolSupplier('fragments2.sdf') if molecule is not None] 

number_of_fragment_molecules = len(fragment_molecules)
number_of_fragment_molecules2 = len(fragment_molecules2)

main_adjacency_matrix = Chem.rdmolops.GetAdjacencyMatrix(main_molecule)

for bond in main_molecule.GetBonds():
    main_adjacency_matrix[bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] = bond_list.index(bond.GetBondType())
    main_adjacency_matrix[bond.GetEndAtomIdx(), bond.GetBeginAtomIdx()] = bond_list.index(bond.GetBondType())

main_atoms = []
for atom in main_molecule.GetAtoms():
    print(atom.GetSymbol())
    main_atoms.append(atom.GetSymbol())

#ここをR1, R2に変更
#r_index_in_main_molecule_old = [index for index, atom in enumerate(main_atoms) if atom == '*']
r_index_in_main_molecule_old = [index for index, atom in enumerate(main_atoms) if atom == 'R1' or atom == 'R2']

for index, r_index in enumerate(r_index_in_main_molecule_old):
    modified_index = r_index - index
    atom = main_atoms.pop(modified_index)
    main_atoms.append(atom)
    tmp = main_adjacency_matrix[:, modified_index:modified_index + 1].copy()
    main_adjacency_matrix = np.delete(main_adjacency_matrix, modified_index, 1)
    main_adjacency_matrix = np.c_[main_adjacency_matrix, tmp]
    tmp = main_adjacency_matrix[modified_index:modified_index + 1, :].copy()
    main_adjacency_matrix = np.delete(main_adjacency_matrix, modified_index, 0)
    main_adjacency_matrix = np.r_[main_adjacency_matrix, tmp]


#ここをR1, R2に変更
#r_index_in_main_molecule_new = [index for index, atom in enumerate(main_atoms) if atom == '*']
r_index_in_main_molecule_new = [index for index, atom in enumerate(main_atoms) if atom == 'R1' or atom == 'R2']

r_bonded_atom_index_in_main_molecule = []
for number in r_index_in_main_molecule_new:
    r_bonded_atom_index_in_main_molecule.append(np.where(main_adjacency_matrix[number, :] != 0)[0][0])

r_bond_number_in_main_molecule = main_adjacency_matrix[
    r_index_in_main_molecule_new, r_bonded_atom_index_in_main_molecule]

main_adjacency_matrix = np.delete(main_adjacency_matrix, r_index_in_main_molecule_new, 0)
main_adjacency_matrix = np.delete(main_adjacency_matrix, r_index_in_main_molecule_new, 1)

for i in range(len(r_index_in_main_molecule_new)):
    #ここをR1, R2に変更
    #main_atoms.remove('*')
    try:
        main_atoms.remove('R1')
        main_atoms.remove('R2')
    except:
        pass

main_size = main_adjacency_matrix.shape[0]
all_fragment_combinations = np.arange(number_of_fragment_molecules)
all_fragment_combinations = np.reshape(all_fragment_combinations, (all_fragment_combinations.shape[0], 1))

#ここを修正
#if len(r_index_in_main_molecule_new) > 1:
#    for variable_number in range(2, len(r_index_in_main_molecule_new) + 1):
#        grid_seed_tmp = matlib.repmat(np.arange(number_of_fragment_molecules), #all_fragment_combinations.shape[0], 1)
#        all_fragment_combinations = np.c_[
#            matlib.repmat(all_fragment_combinations, #len(np.arange(number_of_fragment_molecules)), 1),
#            np.reshape(grid_seed_tmp.T, (np.prod(grid_seed_tmp.shape), 1))]

grid_seed_tmp = matlib.repmat(np.arange(number_of_fragment_molecules2), all_fragment_combinations.shape[0], 1)

all_fragment_combinations = np.c_[
    matlib.repmat(all_fragment_combinations, len(np.arange(number_of_fragment_molecules2)), 1),
    np.reshape(grid_seed_tmp.T, (np.prod(grid_seed_tmp.shape), 1))]

generated_structures = []

for generated_structure_number in range(all_fragment_combinations.shape[0]):
    generated_molecule_atoms = main_atoms[:]
    generated_adjacency_matrix = main_adjacency_matrix.copy()

    for r_number_in_molecule in range(len(r_index_in_main_molecule_new)):
        #ここを1R1, R2に対応
        #fragment_molecule = fragment_molecules[
        #    all_fragment_combinations[generated_structure_number, r_number_in_molecule]]
        if r_number_in_molecule == 0:
            fragment_molecule = fragment_molecules[
                all_fragment_combinations[generated_structure_number, r_number_in_molecule]]
        else:
            fragment_molecule = fragment_molecules2[
                all_fragment_combinations[generated_structure_number, r_number_in_molecule]]

        fragment_adjacency_matrix = Chem.rdmolops.GetAdjacencyMatrix(fragment_molecule)
        for bond in fragment_molecule.GetBonds():
            fragment_adjacency_matrix[bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] = bond_list.index(
                bond.GetBondType())
            fragment_adjacency_matrix[bond.GetEndAtomIdx(), bond.GetBeginAtomIdx()] = bond_list.index(
                bond.GetBondType())
        fragment_atoms = []
        for atom in fragment_molecule.GetAtoms():
            fragment_atoms.append(atom.GetSymbol())

        # integrate adjacency matrix
        r_index_in_fragment_molecule = fragment_atoms.index('*')

        r_bonded_atom_index_in_fragment_molecule = \
            np.where(fragment_adjacency_matrix[r_index_in_fragment_molecule, :] != 0)[0][0]

        r_bond_number_in_fragment_molecule = fragment_adjacency_matrix[
            r_index_in_fragment_molecule, r_bonded_atom_index_in_fragment_molecule]

        if r_bonded_atom_index_in_fragment_molecule > r_index_in_fragment_molecule:
            r_bonded_atom_index_in_fragment_molecule -= 1

        fragment_atoms.remove('*')
        fragment_adjacency_matrix = np.delete(fragment_adjacency_matrix, r_index_in_fragment_molecule, 0)
        fragment_adjacency_matrix = np.delete(fragment_adjacency_matrix, r_index_in_fragment_molecule, 1)

        fragment_size = fragment_adjacency_matrix.shape[0]

        main_size = generated_adjacency_matrix.shape[0]
        generated_adjacency_matrix = np.c_[generated_adjacency_matrix, np.zeros(
            [generated_adjacency_matrix.shape[0], fragment_adjacency_matrix.shape[0]], dtype='int32')]

        generated_adjacency_matrix = np.r_[generated_adjacency_matrix, np.zeros(
            [fragment_adjacency_matrix.shape[0], generated_adjacency_matrix.shape[1]], dtype='int32')]

        generated_adjacency_matrix[
            r_bonded_atom_index_in_main_molecule[r_number_in_molecule],
            r_bonded_atom_index_in_fragment_molecule + main_size] = \
            r_bond_number_in_main_molecule[r_number_in_molecule]

        generated_adjacency_matrix[
            r_bonded_atom_index_in_fragment_molecule + main_size,
            r_bonded_atom_index_in_main_molecule[ r_number_in_molecule]] = r_bond_number_in_main_molecule[r_number_in_molecule]

        generated_adjacency_matrix[main_size:, main_size:] = fragment_adjacency_matrix

        # integrate atoms
        generated_molecule_atoms += fragment_atoms

    # generate structures 
    generated_molecule = Chem.RWMol()
    atom_index = []
    for atom_number in range(len(generated_molecule_atoms)):
        atom = Chem.Atom(generated_molecule_atoms[atom_number])
        molecular_index = generated_molecule.AddAtom(atom)
        atom_index.append(molecular_index)

    for index_x, row_vector in enumerate(generated_adjacency_matrix):
        for index_y, bond in enumerate(row_vector):
            if index_y <= index_x:
                continue
            if bond == 0:
                continue
            else:
                generated_molecule.AddBond(atom_index[index_x], atom_index[index_y], bond_list[bond])

    generated_molecule = generated_molecule.GetMol()
    generated_structures.append(Chem.MolToSmiles(generated_molecule))

    if (generated_structure_number + 1) % 1000 == 0 or (generated_structure_number + 1) == \
            all_fragment_combinations.shape[0]:
        print(generated_structure_number + 1, '/', all_fragment_combinations.shape[0])


str_ = '\n'.join(generated_structures)
with open('generated_structures.smi', 'wt') as writer:
    writer.write(str_)
writer.close()

#最後に
Rグループ毎に、異なるsmiファイルを指定できるよう汎用化してみたい。
今回は、元のソースから化合物を編集する際のRDkitの新しい使い方も勉強させていただいた。この使い方で何ができるか考えて見たい。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up