例えば、AAARCCCWTTTNGGG
とかって IUPAC 配列があり、 R
, W
, N
を [ACGT]
に変換した全ての配列が欲しくなった場合。
普通に書く
my %nbc = (
M => [ qw( A C ) ],
R => [ qw( A G ) ],
W => [ qw( A T ) ],
S => [ qw( C G ) ],
Y => [ qw( C T ) ],
K => [ qw( G T ) ],
V => [ qw( A C G ) ],
H => [ qw( A C T ) ],
D => [ qw( A G T ) ],
B => [ qw( C G T ) ],
N => [ qw( A C G T ) ],
A => [ qw( A ) ],
C => [ qw( C ) ],
G => [ qw( G ) ],
T => [ qw( T ) ],
) ;
my $str = q{AAARCCCWTTTNGGG} ;
my @ret = ( q{} ) ;
for my $s ( split //, uc $str ){
@ret = map { my $r = $_ ; map{ $r . $_ } @{$nbc{$s}} } @ret
}
printf "%s\n", $_ for @ret ;
Bio::Tools::IUPAC
BioPerl の これ の例文ほぼそのまんま。
use Bio::PrimarySeq;
use Bio::Tools::IUPAC;
my $ambiseq = Bio::PrimarySeq->new(-seq => 'AAARCCCWTTTNGGG', -alphabet => 'dna');
my $iupac = Bio::Tools::IUPAC->new(-seq => $ambiseq);
while (my $uniqueseq = $iupac->next_seq()) {
print $uniqueseq->seq . "\n" ;
}