Perl script to extract sequence by Ids from multifasta file !

Abhimanyu Singh — Thu, 09 Aug 2018 10:39:27 -0500

#!/usr/bin/perl -w

use strict;

my $idsfile = "$ARGV[0]";
my $seqfile = "$ARGV[1]";
my %ids  = ();

open FILE, $idsfile;
while() {
  chomp;
  $ids{$_} += 1;
}
close FILE;

local $/ = "\n>";  # read by FASTA record

open FASTA, $seqfile;
while () {
    chomp;
    my $seq = $_;
    my ($id) = $seq =~ /^>*(\S+)/;  # parse ID as first word in FASTA header
    if (exists($ids{$id})) {
        $seq =~ s/^>*.+\n//;  # remove FASTA header
        $seq =~ s/\n//g;  # remove endlines
        print ">$id\n$seq\n";
    }
}
close FASTA;

BOL: Perl script to extract sequence by Ids from multifasta file !

Perl script to extract sequence by Ids from multifasta file !