#!/usr/bin/perl
use strict;
use warnings;
#Usage: perl <list_of_ids_one_per_line> <fasta> <outfile>
my $list = shift @ARGV;
my $fasta = shift @ARGV;
my $out = shift @ARGV;
my %select;
open LIST, "$list" or die;
while (<LIST>) {
chomp;
s/>//g;
$select{$_} = 1;
}
close LIST;
$/ = "\n>";
open OUT, ">$out" or die;
open FASTA, "$fasta" or die;
while (<FASTA>) {
s/>//g;
my ($id) = split (/\n/, $_);
print OUT ">$_" if (defined $select{$id});
}
close FASTA;
close OUT;
Comments
If you have a large number of sequences that you want to extract, then you most likely have the sequence identifiers in a separate file. Assuming that you have one sequence identifier per line in the file ids.file, then you can use this one line: