# coding: UTF-8
from Bio import SeqIO
import datetime
import subprocess
import sys
"""
使い方
python blastget.py blastn query.fasta db.fasta num(default:0)
で走らせると以下のコマンドを実行し、ヒットした配列を取得
blastn -query query -db db.fasta -outfmt 6
numは取得する周辺配列の長さ。
num = 100 にすればヒットした配列の前後100aaを取得。
前後が100aa以下の場合は取れるだけの長さを取る
makeblastdbは入れてないので先にやっておいてください
"""
now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
argvs = sys.argv # コマンドライン引数を格納したリストの取得
argc = len(argvs) # 引数の個数
blast = argvs[1]
# num変数の設定
if argc > 4:
num = int(argvs[4])
else:
num = 0
# blastの実行
print ("blast with following command:")
print('%s -query %s -db %s -outfmt 6 > BlastResult_%s'%(blast,argvs[2],argvs[3],now))
subprocess.call('%s -query %s -db %s -outfmt 6 > BlastResult_%s'%(blast,argvs[2],argvs[3],now), shell=True)
outputFasta = open("Output_%s.fasta"%now,"w")
# 配列の取得
for line in open("BlastResult_%s"%now) :
#print line
if len(line) > 2:
ID = line.split("\t")[1]
seqstart = min(int(line.split("\t")[8]),int(line.split("\t")[9]))
seqend = max(int(line.split("\t")[8]),int(line.split("\t")[9]))
index = 0
handle = open(argvs[3], "rU")
for record in SeqIO.parse(handle, "fasta"):
if record.id == ID:
if seqstart < num and seqend > len(record.seq)-num:
outputFasta.write( ">"+record.id +"\n" )
outputFasta.write( str(record.seq ) +"\n" )
elif seqstart < num:
outputFasta.write( ">"+record.id +"\n" )
outputFasta.write( str(record.seq[0:seqend + num] ) +"\n" )
elif seqend > len(record.seq)-num:
outputFasta.write( ">"+record.id +"\n" )
outputFasta.write( str(record.seq[seqstart-num:] ) +"\n" )
else:
outputFasta.write( ">"+record.id +"\n" )
outputFasta.write( str(record.seq[seqstart-num:seqend + num]) +"\n" )
handle.close()
outputFasta.close()
More than 5 years have passed since last update.
blastでヒットした配列の前後を取る
0
Last updated at Posted at 2017-05-29
Register as a new user and use Qiita more conveniently
- You get articles that match your needs
- You can efficiently read back useful information
- You can use dark theme