Alternative content
# Script to split a DNA sequence into words of varying lengths
def split_dna_into_words(dna_sequence, min_length, max_length):
"""
Splits a DNA sequence into words of lengths ranging from min_length to max_length.
Parameters:
dna_sequence (str): The DNA sequence to split (e.g., "ATGCGTAC").
min_length (int): The minimum length of each word.
max_length (int): The maximum length of each word.
Returns:
dict: A dictionary where keys are word lengths and values are lists of DNA words of that length.
"""
if not dna_sequence:
raise ValueError("The DNA sequence cannot be empty.")
if min_length <= 0 or max_length <= 0:
raise ValueError("Word lengths must be positive integers.")
if min_length > max_length:
raise ValueError("Minimum length cannot be greater than maximum length.")
# Ensure the DNA sequence contains valid nucleotides
for nucleotide in dna_sequence:
if nucleotide.upper() not in "ATCG":
raise ValueError(f"Invalid character '{nucleotide}' found in DNA sequence.")
# Generate words of varying lengths
words_by_length = {}
for length in range(min_length, max_length + 1):
words_by_length[length] = [dna_sequence[i:i+length] for i in range(0, len(dna_sequence) - length + 1)]
return words_by_length
# Example usage
def main():
dna_sequence = "ATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTAATGCGTACGCTA"
min_length = 3
max_length = 99
try:
words_by_length = split_dna_into_words(dna_sequence, min_length, max_length)
for length, words in words_by_length.items():
print(f"Words of length {length}:", words)
except ValueError as e:
print("Error:", e)
if __name__ == "__main__":
main()