#!/usr/bin/perl
use warnings;
use strict;
use Bio::SeqIO;
use Bio::Seq;
use File::Copy;
#ARGV[0] should be in following format --- Keep the coordinate sorted by name+location
#GenomechrName locationStart AlienGene AlienLength
# The coordinate should not overlaps --- next postition shold be bigger than firstpos+alienLen
open(my $fh, '<:encoding(UTF-8)', $ARGV[0])
or die "Could not open file $ARGV[0] $!";
my $genome = $ARGV[1]; # input fasta file (genome file)
my $out = 'tmp.fa'; # output fasta file
while (<$fh>) {
chomp;
my @tmpLine = split '\t', $_;
my $chr=$tmpLine[0]; #insertion chromosome
my $pos=$tmpLine[1]; # position of the insertion
my $seqI = $tmpLine[2]; #sequence of the insertion
my $alienLen=$tmpLine[3];
my $seq_in = Bio::SeqIO->new( -format => 'fasta',-file => $genome);
my $seq_out = Bio::SeqIO->new( -format => 'fasta',-file => ">".$out);
while( my $seq = $seq_in->next_seq() ) {
if($seq->primary_id eq $chr){
my $length = length($seq->seq);
my $upstream=substr($seq->seq, 0, $pos);
my $downstream=substr($seq->seq, $pos,$length);
my $seq_obj = Bio::Seq->new(-seq => $upstream.$seqI.$downstream,-display_id => $seq->primary_id,-alphabet => "dna" );
$seq_out->write_seq($seq_obj);
}
else{
$seq_out->write_seq($seq);
}
}
my $newLoc = $pos+$alienLen;
print "$_\t$pos\t$newLoc\n";
move("$out","$genome");
}