<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Owner]]></title>
	<link>https://bioinformaticsonline.com/snippets/owner/lege?offset=50</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/owner/lege?offset=50" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/42892/parse-the-ncbi-taxonomy-database-with-perl</guid>
	<pubDate>Wed, 17 Feb 2021 21:17:15 -0600</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/42892/parse-the-ncbi-taxonomy-database-with-perl</link>
	<title><![CDATA[Parse the NCBI taxonomy database with Perl !]]></title>
	<description><![CDATA[<code>use Bio::DB::Taxonomy;
use warnings;

my $sps=&quot;human&quot;;
# Get one from a NCBI taxonomy database
my $dbh = Bio::DB::Taxonomy-&gt;new(-source   =&gt; &#039;flatfile&#039;,
                                 -directory=&gt; &quot;taxdump&quot;,
                                 -nodesfile=&gt; &quot;taxdump/nodes.dmp&quot;,
                                 -namesfile=&gt; &quot;taxdump/names.dmp&quot;);
#to check
#parseTax(&#039;homo_sapiens&#039;, &#039;metazoa&#039;);
#exit;
#my $nam = $dbh-&gt;get_taxon(-name =&gt; $sps);
$nam = $dbh-&gt;get_taxon(-taxonid =&gt; 9606);
if(!$nam) { print &quot;Try again with correct scientific name\n&quot;; exit;}
print &quot;Eureka we found $sps, id is &quot;, $nam-&gt;id, &quot;\n&quot;; # 9606

#$abc = $names{$speciesId};

my @taxa = $dbh-&gt;each_Descendent($nam);

#foreach (@taxa) {print $_-&gt;scientific_name ; print &quot;--&quot;;}

my @taxa2 = $dbh-&gt;get_all_Descendents($nam);

#foreach (@taxa2) {print $_-&gt;scientific_name; print &quot;**&quot;;}

#my $ancestor_taxon = $dbh-&gt;ancestor($nam); print &quot;$ancestor_taxon-&gt;id ****&quot;;

#my $tree = $dbh-&gt;get_tree(&#039;human&#039;); print $tree-&gt;name;
#print $nam-&gt;ancestor;
my $ancestor_taxon = $dbh-&gt;ancestor($nam);
#print $ancestor_taxon;

use Bio::Tree::Tree;
my $tree_functions = Bio::Tree::Tree-&gt;new();
my @lineage = $tree_functions-&gt;get_lineage_nodes($nam);
my $lineage = $tree_functions-&gt;get_lineage_string($nam);

#print $lineage;
foreach (@lineage) {print $_-&gt;scientific_name; print &quot;\n&quot;;}

my $decision =recheck_class (1974465, &quot;metazoa&quot;);
print $decision;

#It return 1 of matches otherwise 0 #case-insesitive
#Spelling mistake on classNode may leads to error
sub recheck_class {
my ($lineageId,$classNode)=@_;
use Bio::Tree::Tree;
my $tree_functions = Bio::Tree::Tree-&gt;new();
my $nam = $dbh-&gt;get_taxon(-taxonid =&gt; $lineageId);
my @lineage = $tree_functions-&gt;get_lineage_nodes($nam);
return 1 if ( grep { lc ($_-&gt;scientific_name) eq lc ($classNode)} @lineage )
}</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>
<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/39962/perl-script-to-run-in-parellel</guid>
	<pubDate>Sun, 22 Sep 2019 22:08:20 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/39962/perl-script-to-run-in-parellel</link>
	<title><![CDATA[Perl script to run in parellel !]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl

use strict;
use warnings;
use Parallel::ForkManager;
use Bio::SeqIO;

my ($sequence_data_ref) = parse_genome_files($ARGV[0]);
my %genome=%{$sequence_data_ref};

my $n_processes = 4;
my $pm = Parallel::ForkManager-&gt;new( $n_processes );
for my $i ( 1 .. $n_processes ) {
    $pm-&gt;start and next;

    my $count = 0;
    foreach my $chr_set (keys %genome) {         
        $count++;
        if ( ( $count % $i ) == 0 ) {
            if ( !output_exists($genome{$chr_set}{name}) ) {
                start_new_XFOIL_instance($genome{$chr_set}{name}, $genome{$chr_set}{nuc_seq});
            }
        }
    }

    $pm-&gt;finish;
}
$pm-&gt;wait_all_children;

sub output_exists {
    my $chr_set = shift;
    return ( -f &quot;$chr_set.out&quot; );
}

sub start_new_XFOIL_instance {
    my ($chr_set, $chr_seq) = @_;
    print &quot;starting XFOIL instance with parameters $chr_set!\n&quot;;
    touch( &quot;$chr_set.out&quot;, $chr_seq );
    print &quot;finished run with parameters $chr_set!\n&quot;;
}

sub touch {
    my ($fn, $seq) = @_;
    open FILE, &quot;&gt;$fn&quot; or die $!;
    system (&quot;augustus --species=caenorhabditis --outfile=$fn $seq --AUGUSTUS_CONFIG_PATH=/home/urbe/Tools/Alienomics_v1.1/augustus.2.5.5/config&quot;);
    close FILE or die $!;
}

sub parse_genome_files {
    my $file=shift;
    my (%sequence_data);
    my $file_content = new Bio::SeqIO(-format =&gt; &#039;fasta&#039;,-file =&gt; &quot;$file&quot;);
    my $out_content = Bio::SeqIO-&gt;newFh(-format =&gt; &#039;fasta&#039;, ,-file =&gt; &quot;&gt;genomeRES.fa&quot;);
    while (my $gene_info = $file_content-&gt;next_seq()) {
      my $sequence = $gene_info-&gt;seq();
      my $accession_number = $gene_info-&gt;display_id; 
      my $len = $gene_info-&gt;length;
      my $GCcount = $sequence =~ tr/GC|gc//;
      my $GCcontent = ($GCcount / $len) * 100;
      $sequence_data{$accession_number}{status} = &quot;OK&quot;; #everybody starts fine
      $sequence_data{$accession_number}{problem_desc} = &quot;-&quot;; #everybody starts fine
      if ($sequence_data{$accession_number}{status} eq &quot;OK&quot;) { # Add check points here &lt;&lt;&lt;&lt;&lt;&lt;
        $sequence_data{$accession_number}{nuc_seq} = $sequence;
	$sequence_data{$accession_number}{len} = $len;
	$sequence_data{$accession_number}{gc} = $GCcontent;
	$sequence_data{$accession_number}{name} = $accession_number;
	print $out_content $gene_info;
      }
    }
  return (\%sequence_data);
}</code>]]></description>
	<dc:creator>LEGE</dc:creator>
</item>

</channel>
</rss>