X BOL wishing you a very and Happy New year

Alternative content

Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




Bacterial Comparative Genomics Pipeline Bash Script

  • Public
By LEGE 7 days ago
#!/bin/bash # Bacterial Comparative Genomics Pipeline Script # This script automates key steps in bacterial comparative genomics using popular bioinformatics tools. # Ensure the script stops on error set -e # Define paths WORKDIR="./bacterial_genomics_pipeline" INPUT_FASTA_DIR="./input_genomes" OUTPUT_DIR="./output" CORE_PAN_DIR="$OUTPUT_DIR/core_pan_analysis" PHYLOGENY_DIR="$OUTPUT_DIR/phylogeny" ALIGNMENT_DIR="$OUTPUT_DIR/genome_alignment" RESISTANCE_DIR="$OUTPUT_DIR/antibiotic_resistance" SYNTENY_DIR="$OUTPUT_DIR/synteny_analysis" # Create directories if they do not exist mkdir -p $WORKDIR $OUTPUT_DIR $CORE_PAN_DIR $PHYLOGENY_DIR $ALIGNMENT_DIR $RESISTANCE_DIR $SYNTENY_DIR # Tools required PROKKA="prokka" ROARY="roary" MAUVE="progressiveMauve" IQTREE="iqtree" ABRICATE="abricate" MCSCANX="mcscanx" # Step 1: Genome Annotation using Prokka annotate_genomes() { echo "\n=== Annotating Genomes with Prokka ===" for fasta in $INPUT_FASTA_DIR/*.fasta; do basename=$(basename $fasta .fasta) output_path="$OUTPUT_DIR/annotation_$basename" echo "Annotating $basename..." $PROKKA --outdir $output_path --prefix $basename $fasta done } # Step 2: Core and Pan-genome Analysis using Roary core_pan_analysis() { echo "\n=== Performing Core and Pan-genome Analysis with Roary ===" gff_files=$(find $OUTPUT_DIR -name "*.gff") roary_output="$CORE_PAN_DIR/pan_genome_analysis" mkdir -p $roary_output $ROARY -e -n -v -p 8 -o $roary_output $gff_files } # Step 3: Whole Genome Alignment using Mauve align_genomes() { echo "\n=== Aligning Genomes with Mauve ===" alignment_output="$ALIGNMENT_DIR/aligned_genomes.xmfa" echo "Running Mauve on input genomes..." $MAUVE --output=$alignment_output $(find $INPUT_FASTA_DIR -name "*.fasta") echo "Alignment saved to $alignment_output" } # Step 4: Phylogenetic Tree Construction using IQ-TREE construct_phylogeny() { echo "\n=== Constructing Phylogenetic Tree with IQ-TREE ===" alignment="$ALIGNMENT_DIR/aligned_genomes.xmfa" phylo_output="$PHYLOGENY_DIR/phylogeny_tree" iqtree_output="$phylo_output.treefile" echo "Running IQ-TREE on aligned genomes..." $IQTREE -s $alignment -m GTR+G -nt AUTO -pre $phylo_output echo "Phylogenetic tree saved to $iqtree_output" } # Step 5: Antibiotic Resistance Gene Identification using ABRicate identify_resistance_genes() { echo "\n=== Identifying Antibiotic Resistance Genes with ABRicate ===" for fasta in $INPUT_FASTA_DIR/*.fasta; do basename=$(basename $fasta .fasta) output_path="$RESISTANCE_DIR/${basename}_resistance.txt" echo "Analyzing $basename for resistance genes..." abricate $fasta > $output_path done } # Step 6: Synteny Analysis using MCScanX synteny_analysis() { echo "\n=== Performing Synteny Analysis with MCScanX ===" synteny_output="$SYNTENY_DIR/synteny_results" mkdir -p $synteny_output echo "Running MCScanX on annotated genomes..." MCScanX $OUTPUT_DIR > "$synteny_output/results.txt" echo "Synteny analysis results saved to $synteny_output" } # Main workflow annotate_genomes core_pan_analysis align_genomes construct_phylogeny identify_resistance_genes synteny_analysis echo "\n=== Bacterial Comparative Genomics Pipeline Complete ===" echo "Results saved in $OUTPUT_DIR"