#!/usr/bin/perl
use Bio::DB::Fasta;
#USAGE
perl extractFASTAwithSIZE.pl finalSample_filtered.fa 0 1000 > aaaaaa.fa
my $fastaFile = shift;
my $querySizeST = shift;
my $querySizeED = shift;
my $db = Bio::DB::Fasta->new( $fastaFile );
my @ids = $db->get_all_primary_ids;
foreach my $id (@ids) {
my $sequence = $db->seq($id, $querySizeST => $querySizeED);
if (!defined( $sequence )) {
die "Sequence $seq not found. \n"
}
print ">$id\n", "$sequence\n";
}
__END__
use Bio::DB::Fasta;
# Create database from a directory of Fasta files
my $db = Bio::DB::Fasta->new('/path/to/fasta/files/');
my @ids = $db->get_all_primary_ids;
# Simple access
my $seqstr = $db->seq('CHROMOSOME_I', 4_000_000 => 4_100_000);
my $revseq = $db->seq('CHROMOSOME_I', 4_100_000 => 4_000_000);
my $length = $db->length('CHROMOSOME_I');
my $header = $db->header('CHROMOSOME_I');
my $alphabet = $db->alphabet('CHROMOSOME_I');
# Access to sequence objects. See Bio::PrimarySeqI.
my $seq = $db->get_Seq_by_id('CHROMOSOME_I');
my $seqstr = $seq->seq;
my $subseq = $seq->subseq(4_000_000 => 4_100_000);
my $trunc = $seq->trunc(4_000_000 => 4_100_000);
my $length = $seq->length;
# Loop through sequence objects
my $stream = $db->get_PrimarySeq_stream;
while (my $seq = $stream->next_seq) {
# Bio::PrimarySeqI stuff
}
# Filehandle access
my $fh = Bio::DB::Fasta->newFh('/path/to/fasta/files/');
while (my $seq = <$fh>) {
# Bio::PrimarySeqI stuff
}
# Tied hash access
tie %sequences,'Bio::DB::Fasta','/path/to/fasta/files/';
print $sequences{'CHROMOSOME_I:1,20000'};