Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




  • BioScripts
  • Jit
  • Insert the sequence at desire location in multi-fasta file with Perl

Insert the sequence at desire location in multi-fasta file with Perl

  • Public
By Jit 2307 days ago
#!/usr/bin/perl use warnings; use strict; use Bio::SeqIO; use Bio::Seq; use File::Copy; #ARGV[0] should be in following format --- Keep the coordinate sorted by name+location #GenomechrName locationStart AlienGene AlienLength # The coordinate should not overlaps --- next postition shold be bigger than firstpos+alienLen open(my $fh, '<:encoding(UTF-8)', $ARGV[0]) or die "Could not open file $ARGV[0] $!"; my $genome = $ARGV[1]; # input fasta file (genome file) my $out = 'tmp.fa'; # output fasta file while (<$fh>) { chomp; my @tmpLine = split '\t', $_; my $chr=$tmpLine[0]; #insertion chromosome my $pos=$tmpLine[1]; # position of the insertion my $seqI = $tmpLine[2]; #sequence of the insertion my $alienLen=$tmpLine[3]; my $seq_in = Bio::SeqIO->new( -format => 'fasta',-file => $genome); my $seq_out = Bio::SeqIO->new( -format => 'fasta',-file => ">".$out); while( my $seq = $seq_in->next_seq() ) { if($seq->primary_id eq $chr){ my $length = length($seq->seq); my $upstream=substr($seq->seq, 0, $pos); my $downstream=substr($seq->seq, $pos,$length); my $seq_obj = Bio::Seq->new(-seq => $upstream.$seqI.$downstream,-display_id => $seq->primary_id,-alphabet => "dna" ); $seq_out->write_seq($seq_obj); } else{ $seq_out->write_seq($seq); } } my $newLoc = $pos+$alienLen; print "$_\t$pos\t$newLoc\n"; move("$out","$genome"); }