From e7d29bb19e216dba4ba1c1cd5f74de5aa9a6788c Mon Sep 17 00:00:00 2001 From: shiridikumar Date: Wed, 13 Nov 2024 12:30:50 +0530 Subject: [PATCH] updated random DNA/RNA sequence generator --- PAMI/extras/syntheticDataGenerator/create | 0 .../createSyntheticNucleotideSequence.py | 94 +++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 PAMI/extras/syntheticDataGenerator/create create mode 100644 PAMI/extras/syntheticDataGenerator/createSyntheticNucleotideSequence.py diff --git a/PAMI/extras/syntheticDataGenerator/create b/PAMI/extras/syntheticDataGenerator/create new file mode 100644 index 00000000..e69de29b diff --git a/PAMI/extras/syntheticDataGenerator/createSyntheticNucleotideSequence.py b/PAMI/extras/syntheticDataGenerator/createSyntheticNucleotideSequence.py new file mode 100644 index 00000000..b8c8cd50 --- /dev/null +++ b/PAMI/extras/syntheticDataGenerator/createSyntheticNucleotideSequence.py @@ -0,0 +1,94 @@ +import random +import time +import psutil +import os + +class NucleotideSequenceGenerator: + """ + :Description: NucleotideSequenceGenerator generates a random DNA or RNA sequence with specified GC content and length. + :Attributes: + sequence_length: int + Length of the generated sequence. + gc_content: float + Desired GC content as a decimal (e.g., 0.5 for 50%). + is_rna: bool + True for RNA sequence, False for DNA sequence. + sequence: str + Generated DNA or RNA sequence. + memoryUSS : float + Stores the total amount of USS memory consumed by the program. + memoryRSS : float + Stores the total amount of RSS memory consumed by the program. + startTime : float + Records the start time of the sequence generation process. + endTime : float + Records the completion time of the sequence generation process. + :Methods: + create: + Generates the random DNA or RNA sequence. + save: + Saves the generated sequence to a user-specified file. + getSequence: + Returns the generated sequence. + getMemoryUSS: + Retrieves the total amount of USS memory consumed by the process. + getMemoryRSS: + Retrieves the total amount of RSS memory consumed by the process. + getRuntime: + Retrieves the total runtime taken by the sequence generation process. + """ + + def __init__(self, sequence_length, gc_content, is_rna=False): + self.sequence_length = sequence_length + self.gc_content = gc_content + self.is_rna = is_rna + self.sequence = "" + self._startTime = float() + self._endTime = float() + self._memoryUSS = float() + self._memoryRSS = float() + + def create(self): + self._startTime = time.time() + + # Define nucleotide choices based on DNA or RNA + nucleotides = "GCAU" if self.is_rna else "GCAT" + + # Calculate number of G/C and A/T (or A/U) bases + gc_count = int(self.sequence_length * self.gc_content) + at_count = self.sequence_length - gc_count + + # Build the sequence with the correct proportion of GC and AT + sequence_list = ( + random.choices("GC", k=gc_count) + + random.choices("AU" if self.is_rna else "AT", k=at_count) + ) + + # Shuffle to randomize order + random.shuffle(sequence_list) + + # Convert list to string + self.sequence = "".join(sequence_list) + + self._endTime = time.time() + + def save(self, filename): + with open(filename, 'w') as file: + file.write(self.sequence) + + def getSequence(self): + return self.sequence + + def getMemoryUSS(self) -> float: + process = psutil.Process(os.getpid()) + self._memoryUSS = process.memory_full_info().uss + return self._memoryUSS + + def getMemoryRSS(self) -> float: + process = psutil.Process(os.getpid()) + self._memoryRSS = process.memory_info().rss + return self._memoryRSS + + def getRuntime(self) -> float: + return self._endTime - self._startTime +