From 850092871e8c4733c588c1e8a288cf3a328bc820 Mon Sep 17 00:00:00 2001 From: Nenad Date: Thu, 12 Sep 2024 17:59:09 +0200 Subject: [PATCH] Sync exercise with canonical data --- .../protein-translation/.meta/example.cairo | 153 +++++++----------- .../protein-translation/src/lib.cairo | 22 +-- .../tests/protein_translation.cairo | 116 ++++--------- 3 files changed, 96 insertions(+), 195 deletions(-) diff --git a/exercises/practice/protein-translation/.meta/example.cairo b/exercises/practice/protein-translation/.meta/example.cairo index 12412d0c..68b11651 100644 --- a/exercises/practice/protein-translation/.meta/example.cairo +++ b/exercises/practice/protein-translation/.meta/example.cairo @@ -1,104 +1,75 @@ use core::dict::{Felt252Dict, Felt252DictEntryTrait}; -#[derive(Destruct)] -struct CodonsInfo { - actual_codons: Felt252Dict>, -} - -enum TranslateResult { - Invalid, - Stopped, - Ok -} - -pub fn parse(pairs: Array<(felt252, ByteArray)>) -> CodonsInfo { - let mut actual_codons: Felt252Dict> = Default::default(); - for (codon, name) in pairs - .span() { - actual_codons.insert(codon.clone(), NullableTrait::new(name.clone())); - }; - CodonsInfo { actual_codons, } -} +pub fn proteins(strand: ByteArray) -> Array { + let mut result: Array = array![]; + let mut codons_map = codons_map(); -#[generate_trait] -pub impl CodonsInfoImpl of CodonsInfoTrait { - fn name_for(ref self: CodonsInfo, codon: felt252) -> ByteArray { - let (entry, _name) = self.actual_codons.entry(codon); - let name = _name.deref_or(""); - let res = name.clone(); - self.actual_codons = entry.finalize(NullableTrait::new(name)); - res - } - - fn of_rna(ref self: CodonsInfo, strand: ByteArray) -> Array { - let mut result: Array = array![]; - - let mut codon_index = 0; - let translate_result = loop { - if codon_index == strand.len() { - break TranslateResult::Ok; - } + let mut stopped = false; + let mut codon_index = 0; + while let Option::Some(codon) = codon_chunk(@strand, codon_index) { + let name = name_for_codon(ref codons_map, codon); + if name == "" { + break; + } else if name == "STOP" { + stopped = true; + break; + } else { + result.append(name); + codon_index += 3; + } + }; - if let Option::Some(codon) = strand.codon_chunk(codon_index) { - let name = self.name_for(codon); - if name == "" { - break TranslateResult::Invalid; - } else if name == "stop codon" { - break TranslateResult::Stopped; - } + assert(codon_index >= strand.len() || stopped, 'Invalid codon'); - result.append(name); - codon_index += 3; - } else { - break TranslateResult::Invalid; - } - }; + result +} - match translate_result { - TranslateResult::Invalid => core::panic_with_felt252('Invalid codon'), - _ => result - } - } +fn codons_map() -> Felt252Dict> { + let mut codons_map: Felt252Dict> = Default::default(); + codons_map.insert('AUG', NullableTrait::new("Methionine")); + codons_map.insert('UUU', NullableTrait::new("Phenylalanine")); + codons_map.insert('UUC', NullableTrait::new("Phenylalanine")); + codons_map.insert('UUA', NullableTrait::new("Leucine")); + codons_map.insert('UUG', NullableTrait::new("Leucine")); + codons_map.insert('UCU', NullableTrait::new("Serine")); + codons_map.insert('UCC', NullableTrait::new("Serine")); + codons_map.insert('UCA', NullableTrait::new("Serine")); + codons_map.insert('UCG', NullableTrait::new("Serine")); + codons_map.insert('UAU', NullableTrait::new("Tyrosine")); + codons_map.insert('UAC', NullableTrait::new("Tyrosine")); + codons_map.insert('UGU', NullableTrait::new("Cysteine")); + codons_map.insert('UGC', NullableTrait::new("Cysteine")); + codons_map.insert('UGG', NullableTrait::new("Tryptophan")); + codons_map.insert('UAA', NullableTrait::new("STOP")); + codons_map.insert('UAG', NullableTrait::new("STOP")); + codons_map.insert('UGA', NullableTrait::new("STOP")); + codons_map } const TWO_POW_8: u32 = 0x100; const TWO_POW_16: u32 = 0x10000; -/// Extracts a codon from a given ByteArray from index `from`. -/// Needs to extract 3 ByteArray characters and convert them to the appropriate -/// felt252 value. It does this by taking the characters' byte value and moving -/// their bits to the left depending on their position in the codon. -/// -/// Example: -/// 1. Method call: "AUG".codon_chunk(0) -/// 2. Chars and their byte (hex) values: -/// - "A" = 0x41 -/// - "U" = 0x55 -/// - "G" = 0x47 -/// 3. "A" is the leftmost character, so we "move" it 2 bytes to the left by -/// multiplying it by 2^16 (hex value: 0x10000) -/// 4. "U" is the middle character, so we "move" it 1 byte to the left by -/// multiplying it by 2^8 (hex value: 0x100) -/// 5. "G" is the rightmost character, so we leave it in place -/// 6. Codon = "A" * 2^16 + "U" * 2^8 + "G" -/// = 0x41 * 0x10000 + 0x55 * 0x100 * 0x47 -/// = 0x415547 -/// 7. (41)(55)(47) are hex values for (A)(U)(G) -/// -/// Returns: -/// - Option::Some(codon) -> if the extraction was successful -/// - Option::None -> if the ByteArray was too short from the given index -#[generate_trait] -impl CodonChunk of CodonChunkTrait { - fn codon_chunk(self: @ByteArray, from: usize) -> Option { - if let Option::Some(char) = self.at(from + 2) { - let codon = char.into() - + self[from - + 1].into() * TWO_POW_8 - + self[from].into() * TWO_POW_16; - Option::Some(codon.into()) - } else { - Option::None - } +fn byte_to_felt252(codon: ByteArray) -> felt252 { + (codon[0].into() * TWO_POW_16 + codon[1].into() * TWO_POW_8 + codon[2].into()).into() +} + +fn name_for_codon(ref self: Felt252Dict>, codon: ByteArray) -> ByteArray { + let codon = byte_to_felt252(codon); + let (entry, _name) = self.entry(codon); + let name = _name.deref_or(""); + let res = name.clone(); + self = entry.finalize(NullableTrait::new(name)); + res +} + +fn codon_chunk(self: @ByteArray, from: u32) -> Option { + if let Option::Some(char) = self.at(from + 2) { + let mut codon = ""; + codon.append_byte(self[from]); + codon.append_byte(self[from + 1]); + codon.append_byte(char); + Option::Some(codon) + } else { + Option::None } } diff --git a/exercises/practice/protein-translation/src/lib.cairo b/exercises/practice/protein-translation/src/lib.cairo index a8ae7981..808714a8 100644 --- a/exercises/practice/protein-translation/src/lib.cairo +++ b/exercises/practice/protein-translation/src/lib.cairo @@ -1,21 +1,3 @@ -#[derive(Destruct)] -struct CodonsInfo {} - -pub fn parse(pairs: Array<(felt252, ByteArray)>) -> CodonsInfo { - // constructs a new CodonsInfo struct - panic!("implement `parse`") -} - -#[generate_trait] -pub impl CodonsInfoImpl of CodonsInfoTrait { - fn name_for(ref self: CodonsInfo, codon: felt252) -> ByteArray { - // return name for {codon} - panic!("implement `name_for`") - } - - fn of_rna(ref self: CodonsInfo, strand: ByteArray) -> Option> { - // return the array of codon names that correspond to the given strand of RNA (represented - // as a string of codons) - panic!("implement `of_rna`") - } +pub fn proteins(strand: ByteArray) -> Array { + panic!("implement 'proteins'") } diff --git a/exercises/practice/protein-translation/tests/protein_translation.cairo b/exercises/practice/protein-translation/tests/protein_translation.cairo index b030f2a9..0cad56ad 100644 --- a/exercises/practice/protein-translation/tests/protein_translation.cairo +++ b/exercises/practice/protein-translation/tests/protein_translation.cairo @@ -1,241 +1,189 @@ -use protein_translation::{parse, CodonsInfoTrait}; +use protein_translation::proteins; #[test] fn empty_rna_sequence_results_in_no_proteins() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna(""), array![]); + assert_eq!(proteins(""), array![]); } #[test] #[ignore] fn methionine_rna_sequence() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("AUG"), array!["Methionine"]); + assert_eq!(proteins("AUG"), array!["Methionine"]); } #[test] #[ignore] fn phenylalanine_rna_sequence_1() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUU"), array!["Phenylalanine"]); + assert_eq!(proteins("UUU"), array!["Phenylalanine"]); } #[test] #[ignore] fn phenylalanine_rna_sequence_2() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUC"), array!["Phenylalanine"]); + assert_eq!(proteins("UUC"), array!["Phenylalanine"]); } #[test] #[ignore] fn leucine_rna_sequence_1() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUA"), array!["Leucine"]); + assert_eq!(proteins("UUA"), array!["Leucine"]); } #[test] #[ignore] fn leucine_rna_sequence_2() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUG"), array!["Leucine"]); + assert_eq!(proteins("UUG"), array!["Leucine"]); } #[test] #[ignore] fn serine_rna_sequence_1() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UCU"), array!["Serine"]); + assert_eq!(proteins("UCU"), array!["Serine"]); } #[test] #[ignore] fn serine_rna_sequence_2() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UCC"), array!["Serine"]); + assert_eq!(proteins("UCC"), array!["Serine"]); } #[test] #[ignore] fn serine_rna_sequence_3() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UCA"), array!["Serine"]); + assert_eq!(proteins("UCA"), array!["Serine"]); } #[test] #[ignore] fn serine_rna_sequence_4() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UCG"), array!["Serine"]); + assert_eq!(proteins("UCG"), array!["Serine"]); } #[test] #[ignore] fn tyrosine_rna_sequence_1() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UAU"), array!["Tyrosine"]); + assert_eq!(proteins("UAU"), array!["Tyrosine"]); } #[test] #[ignore] fn tyrosine_rna_sequence_2() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UAC"), array!["Tyrosine"]); + assert_eq!(proteins("UAC"), array!["Tyrosine"]); } #[test] #[ignore] fn cysteine_rna_sequence_1() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGU"), array!["Cysteine"]); + assert_eq!(proteins("UGU"), array!["Cysteine"]); } #[test] #[ignore] fn cysteine_rna_sequence_2() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGC"), array!["Cysteine"]); + assert_eq!(proteins("UGC"), array!["Cysteine"]); } #[test] #[ignore] fn tryptophan_rna_sequence() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGG"), array!["Tryptophan"]); + assert_eq!(proteins("UGG"), array!["Tryptophan"]); } #[test] #[ignore] fn stop_codon_rna_sequence_1() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UAA"), array![]); + assert_eq!(proteins("UAA"), array![]); } #[test] #[ignore] fn stop_codon_rna_sequence_2() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UAG"), array![]); + assert_eq!(proteins("UAG"), array![]); } #[test] #[ignore] fn stop_codon_rna_sequence_3() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGA"), array![]); + assert_eq!(proteins("UGA"), array![]); } #[test] #[ignore] fn sequence_of_two_protein_codons_translates_into_proteins() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUUUUU"), array!["Phenylalanine", "Phenylalanine"]); + assert_eq!(proteins("UUUUUU"), array!["Phenylalanine", "Phenylalanine"]); } #[test] #[ignore] fn sequence_of_two_different_protein_codons_translates_into_proteins() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUAUUG"), array!["Leucine", "Leucine"]); + assert_eq!(proteins("UUAUUG"), array!["Leucine", "Leucine"]); } #[test] #[ignore] fn translate_rna_strand_into_correct_protein_list() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("AUGUUUUGG"), array!["Methionine", "Phenylalanine", "Tryptophan"]); + assert_eq!(proteins("AUGUUUUGG"), array!["Methionine", "Phenylalanine", "Tryptophan"]); } #[test] #[ignore] fn translation_stops_if_stop_codon_at_beginning_of_sequence() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UAGUGG"), array![]); + assert_eq!(proteins("UAGUGG"), array![]); } #[test] #[ignore] fn translation_stops_if_stop_codon_at_end_of_two_codon_sequence() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGGUAG"), array!["Tryptophan"]); + assert_eq!(proteins("UGGUAG"), array!["Tryptophan"]); } #[test] #[ignore] fn translation_stops_if_stop_codon_at_end_of_three_codon_sequence() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("AUGUUUUAA"), array!["Methionine", "Phenylalanine"]); + assert_eq!(proteins("AUGUUUUAA"), array!["Methionine", "Phenylalanine"]); } #[test] #[ignore] fn translation_stops_if_stop_codon_in_middle_of_three_codon_sequence() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGGUAGUGG"), array!["Tryptophan"]); + assert_eq!(proteins("UGGUAGUGG"), array!["Tryptophan"]); } #[test] #[ignore] fn translation_stops_if_stop_codon_in_middle_of_six_codon_sequence() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGGUGUUAUUAAUGGUUU"), array!["Tryptophan", "Cysteine", "Tyrosine"]); + assert_eq!(proteins("UGGUGUUAUUAAUGGUUU"), array!["Tryptophan", "Cysteine", "Tyrosine"]); } #[test] #[ignore] fn sequence_of_two_non_stop_codons_does_not_translate_to_a_stop_codon() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("AUGAUG"), array!["Methionine", "Methionine"]); + assert_eq!(proteins("AUGAUG"), array!["Methionine", "Methionine"]); } #[test] #[ignore] #[should_panic(expected: ('Invalid codon',))] fn non_existing_codon_cant_translate() { - let mut info = parse(make_pairs()); - info.of_rna("AAA"); + proteins("AAA"); } #[test] #[ignore] #[should_panic(expected: ('Invalid codon',))] fn unknown_amino_acids_not_part_of_a_codon_cant_translate() { - let mut info = parse(make_pairs()); - info.of_rna("XYZ"); + proteins("XYZ"); } #[test] #[ignore] #[should_panic(expected: ('Invalid codon',))] fn incomplete_rna_sequence_cant_translate() { - let mut info = parse(make_pairs()); - info.of_rna("AUGU"); + proteins("AUGU"); } #[test] #[ignore] fn incomplete_rna_sequence_cantranslate_if_valid_until_a_stop_codon() { - let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUCUUCUAAUGGU"), array!["Phenylalanine", "Phenylalanine"]); -} - -// The input data constructor. Returns a list of codon, name pairs. -fn make_pairs() -> Array<(felt252, ByteArray)> { - let grouped: Array<(ByteArray, Array)> = array![ - ("Methionine", array!['AUG']), - ("Phenylalanine", array!['UUU', 'UUC']), - ("Leucine", array!['UUA', 'UUG']), - ("Serine", array!['UCU', 'UCC', 'UCA', 'UCG']), - ("Tyrosine", array!['UAU', 'UAC']), - ("Cysteine", array!['UGU', 'UGC']), - ("Tryptophan", array!['UGG']), - ("stop codon", array!['UAA', 'UAG', 'UGA']), - ]; - let mut pairs = ArrayTrait::<(felt252, ByteArray)>::new(); - for (name, codons) in grouped { - for codon in codons { - pairs.append((codon, name.clone())); - }; - }; - pairs + assert_eq!(proteins("UUCUUCUAAUGGU"), array!["Phenylalanine", "Phenylalanine"]); }