A Rust implementation of the DNA to Protein conversion program given in Python 3 at: https://www.geeksforgeeks.org/dna-protein-python-3/
This program is a demonstration of file-handling and use of a HashMap.
There are two functions: read_seq
reads text from a given filename,
and returns it as a string with newline characters removed; and translate
which uses a HashMap to convert character triples into single characters.
For read_seq
, we use read_to_string
to read a filename into a string,
before replacing the newline characters.
For translate
, we build a HashMap from an array of tuples, with collect
doing the necessary work to build a HashMap.
Download code: dna-protein-source.zip
// This is a copy of the DNA translation program from // https://www.geeksforgeeks.org/dna-protein-python-3/ // Except that dna_sequence file is edited to only use the required text // instead of including a range limit in code. use std::collections::HashMap; use std::{env, fs}; // Requires a filename in argv to convert. fn main() { let mut args = env::args(); args.next(); // ignore program name if let Some(filename) = args.next() { let dna_seq = read_seq(&filename); let protein_seq = translate(&dna_seq); if let Some(protein_filename) = args.next() { // if a second filename is given, check it as target sequence let target_seq = read_seq(&protein_filename); println!("Translated sequence compared to target: {}", protein_seq == target_seq); } else { println!("Translated sequence: {}", protein_seq); } } else { println!("Provide filename of DNA sequence to translate"); } } // Reads text from given filename, and returns a string without newlines. fn read_seq(filename: &str) -> String { let contents = fs::read_to_string(filename) .expect("Something went wrong reading the file"); contents.replace('\n',"").replace('\r',"") } // Translates a given DNA sequence into a string of protein names. // Given sequence should be divisible into triplets. fn translate(seq: &str) -> String { let table: HashMap<&str, char> = [ ("ATA", 'I'), ("ATC", 'I'), ("ATT", 'I'), ("ATG", 'M'), ("ACA", 'T'), ("ACC", 'T'), ("ACG", 'T'), ("ACT", 'T'), ("AAC", 'N'), ("AAT", 'N'), ("AAA", 'K'), ("AAG", 'K'), ("AGC", 'S'), ("AGT", 'S'), ("AGA", 'R'), ("AGG", 'R'), ("CTA", 'L'), ("CTC", 'L'), ("CTG", 'L'), ("CTT", 'L'), ("CCA", 'P'), ("CCC", 'P'), ("CCG", 'P'), ("CCT", 'P'), ("CAC", 'H'), ("CAT", 'H'), ("CAA", 'Q'), ("CAG", 'Q'), ("CGA", 'R'), ("CGC", 'R'), ("CGG", 'R'), ("CGT", 'R'), ("GTA", 'V'), ("GTC", 'V'), ("GTG", 'V'), ("GTT", 'V'), ("GCA", 'A'), ("GCC", 'A'), ("GCG", 'A'), ("GCT", 'A'), ("GAC", 'D'), ("GAT", 'D'), ("GAA", 'E'), ("GAG", 'E'), ("GGA", 'G'), ("GGC", 'G'), ("GGG", 'G'), ("GGT", 'G'), ("TCA", 'S'), ("TCC", 'S'), ("TCG", 'S'), ("TCT", 'S'), ("TTC", 'F'), ("TTT", 'F'), ("TTA", 'L'), ("TTG", 'L'), ("TAC", 'Y'), ("TAT", 'Y'), ("TAA", '_'), ("TAG", '_'), ("TGC", 'C'), ("TGT", 'C'), ("TGA", '_'), ("TGG", 'W'), ].iter().cloned().collect(); let mut protein = String::new(); // Note: Bounds for i tolerate non-triplet seq values for i in (3..=seq.len()).step_by(3) { let codon = &seq[(i-3)..i]; if let Some(p) = table.get(&codon) { protein.push(*p); } } protein } #[cfg(test)] mod tests { use super::*; #[test] fn test_translate() { assert_eq!("", translate("")); assert_eq!("", translate("TA")); assert_eq!("I", translate("ATA")); assert_eq!("IL", translate("ATATTG")); assert_eq!("IL", translate("ATATTGT")); } }