Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




Perl script to read multi fasta sequence one by one

  • Public
By Jit 2215 days ago
#!/usr/bin/env perl use strict; use warnings; #USAGE #perl rohanRun.pl seq.fa my $outfile='tmp.fa'; my $fastaSeq_ref = readfasta ("$ARGV[0]"); my %fastaSeq = %$fastaSeq_ref; foreach my $key ( keys %fastaSeq) { open (OUT, ">$outfile") or die "couldn't open the file $outfile $!"; print OUT "$key\n$fastaSeq{$key}\n"; } sub readfasta { (my $file)=@_; my %sequence; my $header; my $temp_seq; #suppose fasta files contains multiple sequences; open (IN, "<$file") or die "couldn't open the file $file $!"; while (<IN>) { chop; next if /^\s*$/; #skip empty line if ($_ =~ /^>/) #when see head line { $header= $_; if ($sequence{$header}){print colored("#CAUTION: SAME FASTA HAS BEEN READ MULTIPLE TIMES.\n#CAUTION: PLEASE CHECK FASTA SEQUENCE:$header\n","red")}; if ($temp_seq) {$temp_seq=""} # If there is alreay sequence in temp_seq, empty the sequence file } else # when see the sequence line { s/\s+//g; $temp_seq .= $_; $sequence{$header}=$temp_seq; #update the contents } } return \%sequence; }