<?xml version='1.0'?><rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" xmlns:atom="http://www.w3.org/2005/Atom" >
<channel>
	<title><![CDATA[BOL: Find and replace ambiguous characters in fasta file with Perl and Bioperl]]></title>
	<link>https://bioinformaticsonline.com/snippets/view/27313/find-and-replace-ambiguous-characters-in-fasta-file-with-perl-and-bioperl?</link>
	<atom:link href="https://bioinformaticsonline.com/snippets/view/27313/find-and-replace-ambiguous-characters-in-fasta-file-with-perl-and-bioperl?" rel="self" type="application/rss+xml" />
	<description><![CDATA[]]></description>
	
	<item>
	<guid isPermaLink="true">https://bioinformaticsonline.com/snippets/view/27313/find-and-replace-ambiguous-characters-in-fasta-file-with-perl-and-bioperl</guid>
	<pubDate>Fri, 13 May 2016 03:20:09 -0500</pubDate>
	<link>https://bioinformaticsonline.com/snippets/view/27313/find-and-replace-ambiguous-characters-in-fasta-file-with-perl-and-bioperl</link>
	<title><![CDATA[Find and replace ambiguous characters in fasta file with Perl and Bioperl]]></title>
	<description><![CDATA[<code>#!/usr/bin/perl -w

my $usage=&quot;\nUsage: $0 [-h] [-m char] [fastaFileName1 ...]\n&quot;.
    &quot;  -h: help\n&quot;.
    &quot;  -m: missing character\n&quot;.
    &quot;Print out the name of sequences with characters other than ATGC-.\n&quot;.
    &quot;If -m is specified, the ambiguous characters are repleced with the\n&quot;.
    &quot;specified character.  e.g. -m &#039;?&#039; will place ? to the ambigous characters.\n&quot; .
    &quot;If multiple files are given, sequences in all files are marged.  If no \n&quot;.
    &quot;argument is given, it will take STDIN as the input\n&quot;;

our($opt_h, $opt_m);

use Bio::SeqIO;

use Getopt::Std;
getopts(&#039;hm:&#039;) || die &quot;$usage\n&quot;;
die &quot;$usage\n&quot; if (defined($opt_h));

my $format = &quot;fasta&quot;;
my @seqArr = ();

@ARGV = (&#039;-&#039;) unless @ARGV;
while (my $file = shift) {
    my $seqio_obj = Bio::SeqIO-&gt;new(-file =&gt; $file, -format =&gt; $format);
    while (my $seq = $seqio_obj-&gt;next_seq()) {
	push(@seqArr, $seq);
    }
}

#@seqArr = sort { $a-&gt;id() cmp $b-&gt;id() } @seqArr;

foreach my $s (@seqArr) {
    my $thisSeq = $s-&gt;seq();
    my $ambig = AmbiguousChar($thisSeq);
    if ($ambig ne &quot;&quot;) {
	print STDERR $s-&gt;id(), &quot;\t$ambig\n&quot;;
	if (defined($opt_m)) {
	    $thisSeq = ReplaceAmbiguousChar($thisSeq, $opt_m);
	    $s-&gt;seq($thisSeq);
	}
    }
}

if (defined($opt_m)) {
    my $seqOut = Bio::SeqIO-&gt;new(-fs =&gt; \*STDOUT, -format =&gt; $format);
    foreach my $s (@seqArr) {
	$seqOut-&gt;write_seq($s);
    }
}
exit;


sub AmbiguousChar {
    my $string = shift;
    $string =~ s/[ATGC-]//g;

    $string =~ s/\s+//g;
    return $string;
}

sub ReplaceAmbiguousChar {
    my ($string, $char) = @_;
    $string =~ s/[^ATGC-]/$char/g;
    return $string;
}</code>]]></description>
	<dc:creator>Radha Agarkar</dc:creator>
</item>

</channel>
</rss>