0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

複数のRグループに異なる側鎖群を指定して化合物を生成する

Last updated at Posted at 2020-05-17

※この記事のコードは "Structure Generator based on R-Group (SGRG)" (URL: https://github.com/hkaneko1985/structure_generator_based_on_r_group )のコード(MITライセンス)を流用しています。

#はじめに
「Structure Generator based on R-Group (SGRG) を使って何ができますか?」(https://datachemeng.com/how_to_use_sgrg/ )では、「主骨格はそのままで、側鎖を変えた化学構造を生成する」プログラムが公開されている。大変素晴らしい。

ただ、全Rグループに、同じ側鎖群しか指定することができないようである。例えば、Rグループとして、R1とR2を定義し、それぞれに異なる側鎖群を指定することができないようだ。

そこで今回は、上で公開されているプログラムのうち, "structure_generator_based_on_r_group_all_combinations.py" を改変し、R1とR2にそれぞれ別の側鎖群を定義したsmiファイル(側鎖群がSMILESで列挙されたもの)を指定し、R1, R2それぞれ自身の側鎖群の側鎖しか生成されないようにしてみた。

#ソース
RDKitを駆使しまくっており、相当難易度の高いプログラムであったが、何とか意図の動作になるよう改変してみた。
ただ、Rグループは"R1", "R2"に限定されており、汎用性はまだ低い。

structure_generator_based_on_r_group_all_combinations_mod.py
import numpy as np
from numpy import matlib
from rdkit import Chem

bond_list = [Chem.rdchem.BondType.UNSPECIFIED, Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE,
             Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.QUADRUPLE, Chem.rdchem.BondType.QUINTUPLE,
             Chem.rdchem.BondType.HEXTUPLE, Chem.rdchem.BondType.ONEANDAHALF, Chem.rdchem.BondType.TWOANDAHALF,
             Chem.rdchem.BondType.THREEANDAHALF, Chem.rdchem.BondType.FOURANDAHALF, Chem.rdchem.BondType.FIVEANDAHALF,
             Chem.rdchem.BondType.AROMATIC, Chem.rdchem.BondType.IONIC, Chem.rdchem.BondType.HYDROGEN,
             Chem.rdchem.BondType.THREECENTER, Chem.rdchem.BondType.DATIVEONE, Chem.rdchem.BondType.DATIVE,
             Chem.rdchem.BondType.DATIVEL, Chem.rdchem.BondType.DATIVER, Chem.rdchem.BondType.OTHER,
             Chem.rdchem.BondType.ZERO]


# 主骨格
main_molecule = Chem.MolFromMolFile('main.sdf')
#R1 用側鎖
fragment_molecules = [molecule for molecule in Chem.SDMolSupplier('fragments1.sdf') if molecule is not None] 
#R2 用側鎖
fragment_molecules2 = [molecule for molecule in Chem.SDMolSupplier('fragments2.sdf') if molecule is not None] 

number_of_fragment_molecules = len(fragment_molecules)
number_of_fragment_molecules2 = len(fragment_molecules2)

main_adjacency_matrix = Chem.rdmolops.GetAdjacencyMatrix(main_molecule)

for bond in main_molecule.GetBonds():
    main_adjacency_matrix[bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] = bond_list.index(bond.GetBondType())
    main_adjacency_matrix[bond.GetEndAtomIdx(), bond.GetBeginAtomIdx()] = bond_list.index(bond.GetBondType())

main_atoms = []
for atom in main_molecule.GetAtoms():
    print(atom.GetSymbol())
    main_atoms.append(atom.GetSymbol())

#ここをR1, R2に変更
#r_index_in_main_molecule_old = [index for index, atom in enumerate(main_atoms) if atom == '*']
r_index_in_main_molecule_old = [index for index, atom in enumerate(main_atoms) if atom == 'R1' or atom == 'R2']

for index, r_index in enumerate(r_index_in_main_molecule_old):
    modified_index = r_index - index
    atom = main_atoms.pop(modified_index)
    main_atoms.append(atom)
    tmp = main_adjacency_matrix[:, modified_index:modified_index + 1].copy()
    main_adjacency_matrix = np.delete(main_adjacency_matrix, modified_index, 1)
    main_adjacency_matrix = np.c_[main_adjacency_matrix, tmp]
    tmp = main_adjacency_matrix[modified_index:modified_index + 1, :].copy()
    main_adjacency_matrix = np.delete(main_adjacency_matrix, modified_index, 0)
    main_adjacency_matrix = np.r_[main_adjacency_matrix, tmp]


#ここをR1, R2に変更
#r_index_in_main_molecule_new = [index for index, atom in enumerate(main_atoms) if atom == '*']
r_index_in_main_molecule_new = [index for index, atom in enumerate(main_atoms) if atom == 'R1' or atom == 'R2']

r_bonded_atom_index_in_main_molecule = []
for number in r_index_in_main_molecule_new:
    r_bonded_atom_index_in_main_molecule.append(np.where(main_adjacency_matrix[number, :] != 0)[0][0])

r_bond_number_in_main_molecule = main_adjacency_matrix[
    r_index_in_main_molecule_new, r_bonded_atom_index_in_main_molecule]

main_adjacency_matrix = np.delete(main_adjacency_matrix, r_index_in_main_molecule_new, 0)
main_adjacency_matrix = np.delete(main_adjacency_matrix, r_index_in_main_molecule_new, 1)

for i in range(len(r_index_in_main_molecule_new)):
    #ここをR1, R2に変更
    #main_atoms.remove('*')
    try:
        main_atoms.remove('R1')
        main_atoms.remove('R2')
    except:
        pass

main_size = main_adjacency_matrix.shape[0]
all_fragment_combinations = np.arange(number_of_fragment_molecules)
all_fragment_combinations = np.reshape(all_fragment_combinations, (all_fragment_combinations.shape[0], 1))

#ここを修正
#if len(r_index_in_main_molecule_new) > 1:
#    for variable_number in range(2, len(r_index_in_main_molecule_new) + 1):
#        grid_seed_tmp = matlib.repmat(np.arange(number_of_fragment_molecules), #all_fragment_combinations.shape[0], 1)
#        all_fragment_combinations = np.c_[
#            matlib.repmat(all_fragment_combinations, #len(np.arange(number_of_fragment_molecules)), 1),
#            np.reshape(grid_seed_tmp.T, (np.prod(grid_seed_tmp.shape), 1))]

grid_seed_tmp = matlib.repmat(np.arange(number_of_fragment_molecules2), all_fragment_combinations.shape[0], 1)

all_fragment_combinations = np.c_[
    matlib.repmat(all_fragment_combinations, len(np.arange(number_of_fragment_molecules2)), 1),
    np.reshape(grid_seed_tmp.T, (np.prod(grid_seed_tmp.shape), 1))]

generated_structures = []

for generated_structure_number in range(all_fragment_combinations.shape[0]):
    generated_molecule_atoms = main_atoms[:]
    generated_adjacency_matrix = main_adjacency_matrix.copy()

    for r_number_in_molecule in range(len(r_index_in_main_molecule_new)):
        #ここを1R1, R2に対応
        #fragment_molecule = fragment_molecules[
        #    all_fragment_combinations[generated_structure_number, r_number_in_molecule]]
        if r_number_in_molecule == 0:
            fragment_molecule = fragment_molecules[
                all_fragment_combinations[generated_structure_number, r_number_in_molecule]]
        else:
            fragment_molecule = fragment_molecules2[
                all_fragment_combinations[generated_structure_number, r_number_in_molecule]]

        fragment_adjacency_matrix = Chem.rdmolops.GetAdjacencyMatrix(fragment_molecule)
        for bond in fragment_molecule.GetBonds():
            fragment_adjacency_matrix[bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] = bond_list.index(
                bond.GetBondType())
            fragment_adjacency_matrix[bond.GetEndAtomIdx(), bond.GetBeginAtomIdx()] = bond_list.index(
                bond.GetBondType())
        fragment_atoms = []
        for atom in fragment_molecule.GetAtoms():
            fragment_atoms.append(atom.GetSymbol())

        # integrate adjacency matrix
        r_index_in_fragment_molecule = fragment_atoms.index('*')

        r_bonded_atom_index_in_fragment_molecule = \
            np.where(fragment_adjacency_matrix[r_index_in_fragment_molecule, :] != 0)[0][0]

        r_bond_number_in_fragment_molecule = fragment_adjacency_matrix[
            r_index_in_fragment_molecule, r_bonded_atom_index_in_fragment_molecule]

        if r_bonded_atom_index_in_fragment_molecule > r_index_in_fragment_molecule:
            r_bonded_atom_index_in_fragment_molecule -= 1

        fragment_atoms.remove('*')
        fragment_adjacency_matrix = np.delete(fragment_adjacency_matrix, r_index_in_fragment_molecule, 0)
        fragment_adjacency_matrix = np.delete(fragment_adjacency_matrix, r_index_in_fragment_molecule, 1)

        fragment_size = fragment_adjacency_matrix.shape[0]

        main_size = generated_adjacency_matrix.shape[0]
        generated_adjacency_matrix = np.c_[generated_adjacency_matrix, np.zeros(
            [generated_adjacency_matrix.shape[0], fragment_adjacency_matrix.shape[0]], dtype='int32')]

        generated_adjacency_matrix = np.r_[generated_adjacency_matrix, np.zeros(
            [fragment_adjacency_matrix.shape[0], generated_adjacency_matrix.shape[1]], dtype='int32')]

        generated_adjacency_matrix[
            r_bonded_atom_index_in_main_molecule[r_number_in_molecule],
            r_bonded_atom_index_in_fragment_molecule + main_size] = \
            r_bond_number_in_main_molecule[r_number_in_molecule]

        generated_adjacency_matrix[
            r_bonded_atom_index_in_fragment_molecule + main_size,
            r_bonded_atom_index_in_main_molecule[ r_number_in_molecule]] = r_bond_number_in_main_molecule[r_number_in_molecule]

        generated_adjacency_matrix[main_size:, main_size:] = fragment_adjacency_matrix

        # integrate atoms
        generated_molecule_atoms += fragment_atoms

    # generate structures 
    generated_molecule = Chem.RWMol()
    atom_index = []
    for atom_number in range(len(generated_molecule_atoms)):
        atom = Chem.Atom(generated_molecule_atoms[atom_number])
        molecular_index = generated_molecule.AddAtom(atom)
        atom_index.append(molecular_index)

    for index_x, row_vector in enumerate(generated_adjacency_matrix):
        for index_y, bond in enumerate(row_vector):
            if index_y <= index_x:
                continue
            if bond == 0:
                continue
            else:
                generated_molecule.AddBond(atom_index[index_x], atom_index[index_y], bond_list[bond])

    generated_molecule = generated_molecule.GetMol()
    generated_structures.append(Chem.MolToSmiles(generated_molecule))

    if (generated_structure_number + 1) % 1000 == 0 or (generated_structure_number + 1) == \
            all_fragment_combinations.shape[0]:
        print(generated_structure_number + 1, '/', all_fragment_combinations.shape[0])


str_ = '\n'.join(generated_structures)
with open('generated_structures.smi', 'wt') as writer:
    writer.write(str_)
writer.close()

#最後に
Rグループ毎に、異なるsmiファイルを指定できるよう汎用化してみたい。
今回は、元のソースから化合物を編集する際のRDkitの新しい使い方も勉強させていただいた。この使い方で何ができるか考えて見たい。

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?