X BOL wishing you a very and Happy New year

Alternative content

Our Sponsors



Download BioinformaticsOnline(BOL) Apps in your chrome browser.




  • BioScripts
  • LEGE
  • Python script to split a DNA sequence into words of varying lengths

Python script to split a DNA sequence into words of varying lengths

  • Public
By LEGE 9 days ago
# Script to split a DNA sequence into words of varying lengths def split_dna_into_words(dna_sequence, min_length, max_length): """ Splits a DNA sequence into words of lengths ranging from min_length to max_length. Parameters: dna_sequence (str): The DNA sequence to split (e.g., "ATGCGTAC"). min_length (int): The minimum length of each word. max_length (int): The maximum length of each word. Returns: dict: A dictionary where keys are word lengths and values are lists of DNA words of that length. """ if not dna_sequence: raise ValueError("The DNA sequence cannot be empty.") if min_length <= 0 or max_length <= 0: raise ValueError("Word lengths must be positive integers.") if min_length > max_length: raise ValueError("Minimum length cannot be greater than maximum length.") # Ensure the DNA sequence contains valid nucleotides for nucleotide in dna_sequence: if nucleotide.upper() not in "ATCG": raise ValueError(f"Invalid character '{nucleotide}' found in DNA sequence.") # Generate words of varying lengths words_by_length = {} for length in range(min_length, max_length + 1): words_by_length[length] = [dna_sequence[i:i+length] for i in range(0, len(dna_sequence) - length + 1)] return words_by_length # Example usage def main(): dna_sequence = "ATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTA" min_length = 3 max_length = 99 try: words_by_length = split_dna_into_words(dna_sequence, min_length, max_length) for length, words in words_by_length.items(): print(f"Words of length {length}:", words) except ValueError as e: print("Error:", e) if __name__ == "__main__": main()