Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




Extract sequence from UCSC

  • Public
By Jit 3103 days ago
#!/usr/bin/env perl use strict; use warnings; use LWP::Simple; use XML::XPath; use XML::XPath::XMLParser; # Use DAS of UCSC to fetch specific sequence by its given chromosome position # From here: https://www.biostars.org/p/6156/ my $chr = shift; my $pos = shift; my $size = shift; my $usage = "Example: perl extract_seq_from_ucsc.pl 14 482780 1000\n"; if (! $size) { die "ERROR: You must pass three arguments: chr. num., position, and size.\n$usage"; } chomp $size; my $start = $pos - ($size/2); my $end = $pos + ($size/2); # Figure out URL for the DAS server. Example: # http://genome.ucsc.edu/cgi-bin/das/calJac3/dna?segment=chr14:482280,483280 my $URL_gene ="http://genome.ucsc.edu/cgi-bin/das/papAnu2/dna?segment=chr"; $URL_gene .= $chr . ":" . $start . "," . $end; my $xml = get($URL_gene); my $xp = XML::XPath->new(xml=>$xml); my $nodeset = $xp->find('/DASDNA/SEQUENCE/DNA/text()'); # find all sequences # there should be only one node, anyway: foreach my $node ($nodeset->get_nodelist) { my $seq = $node->getValue; $seq =~ s/\s//g; # remove white spaces print ">papAnu2_chr" . $chr . ":" . $start . "-" . $end . "\n"; print $seq, "\n"; }