#!/bin/bash
# Bacterial Comparative Genomics Pipeline Script
# This script automates key steps in bacterial comparative genomics using popular bioinformatics tools.
# Ensure the script stops on error
set -e
# Define paths
WORKDIR="./bacterial_genomics_pipeline"
INPUT_FASTA_DIR="./input_genomes"
OUTPUT_DIR="./output"
CORE_PAN_DIR="$OUTPUT_DIR/core_pan_analysis"
PHYLOGENY_DIR="$OUTPUT_DIR/phylogeny"
ALIGNMENT_DIR="$OUTPUT_DIR/genome_alignment"
RESISTANCE_DIR="$OUTPUT_DIR/antibiotic_resistance"
SYNTENY_DIR="$OUTPUT_DIR/synteny_analysis"
# Create directories if they do not exist
mkdir -p $WORKDIR $OUTPUT_DIR $CORE_PAN_DIR $PHYLOGENY_DIR $ALIGNMENT_DIR $RESISTANCE_DIR $SYNTENY_DIR
# Tools required
PROKKA="prokka"
ROARY="roary"
MAUVE="progressiveMauve"
IQTREE="iqtree"
ABRICATE="abricate"
MCSCANX="mcscanx"
# Step 1: Genome Annotation using Prokka
annotate_genomes() {
echo "\n=== Annotating Genomes with Prokka ==="
for fasta in $INPUT_FASTA_DIR/*.fasta; do
basename=$(basename $fasta .fasta)
output_path="$OUTPUT_DIR/annotation_$basename"
echo "Annotating $basename..."
$PROKKA --outdir $output_path --prefix $basename $fasta
done
}
# Step 2: Core and Pan-genome Analysis using Roary
core_pan_analysis() {
echo "\n=== Performing Core and Pan-genome Analysis with Roary ==="
gff_files=$(find $OUTPUT_DIR -name "*.gff")
roary_output="$CORE_PAN_DIR/pan_genome_analysis"
mkdir -p $roary_output
$ROARY -e -n -v -p 8 -o $roary_output $gff_files
}
# Step 3: Whole Genome Alignment using Mauve
align_genomes() {
echo "\n=== Aligning Genomes with Mauve ==="
alignment_output="$ALIGNMENT_DIR/aligned_genomes.xmfa"
echo "Running Mauve on input genomes..."
$MAUVE --output=$alignment_output $(find $INPUT_FASTA_DIR -name "*.fasta")
echo "Alignment saved to $alignment_output"
}
# Step 4: Phylogenetic Tree Construction using IQ-TREE
construct_phylogeny() {
echo "\n=== Constructing Phylogenetic Tree with IQ-TREE ==="
alignment="$ALIGNMENT_DIR/aligned_genomes.xmfa"
phylo_output="$PHYLOGENY_DIR/phylogeny_tree"
iqtree_output="$phylo_output.treefile"
echo "Running IQ-TREE on aligned genomes..."
$IQTREE -s $alignment -m GTR+G -nt AUTO -pre $phylo_output
echo "Phylogenetic tree saved to $iqtree_output"
}
# Step 5: Antibiotic Resistance Gene Identification using ABRicate
identify_resistance_genes() {
echo "\n=== Identifying Antibiotic Resistance Genes with ABRicate ==="
for fasta in $INPUT_FASTA_DIR/*.fasta; do
basename=$(basename $fasta .fasta)
output_path="$RESISTANCE_DIR/${basename}_resistance.txt"
echo "Analyzing $basename for resistance genes..."
abricate $fasta > $output_path
done
}
# Step 6: Synteny Analysis using MCScanX
synteny_analysis() {
echo "\n=== Performing Synteny Analysis with MCScanX ==="
synteny_output="$SYNTENY_DIR/synteny_results"
mkdir -p $synteny_output
echo "Running MCScanX on annotated genomes..."
MCScanX $OUTPUT_DIR > "$synteny_output/results.txt"
echo "Synteny analysis results saved to $synteny_output"
}
# Main workflow
annotate_genomes
core_pan_analysis
align_genomes
construct_phylogeny
identify_resistance_genes
synteny_analysis
echo "\n=== Bacterial Comparative Genomics Pipeline Complete ==="
echo "Results saved in $OUTPUT_DIR"