cdisc-org · RamilCDISC · Jan 3, 2025 · Jan 3, 2025 · Jan 26, 2025 · Jan 27, 2025
diff --git a/.flake8 b/.flake8
@@ -2,10 +2,14 @@
 max-line-length = 120
 max_complexity = 10
 ignore = E203, W503
-
+select = E9,F63,F7,F82
+statistics = True
+count = True
+show-source = True
 exclude = .github,
     .pytest_cache,
     cdisc_rules_engine/resources,
+    tests/PerformanceTest.py,
     venv,
     build,
     dist
diff --git a/.github/workflows/automated-ci.yml b/.github/workflows/automated-ci.yml
@@ -69,8 +69,7 @@ jobs:
 
       - name: Run flake8
         run: |
-          flake8 ${{needs.get_changed_files.outputs.py}} --count --select=E9,F63,F7,F82 --show-source --statistics
-          flake8 ${{needs.get_changed_files.outputs.py}} --ignore E203,W503 --count --statistics
+          flake8 ${{needs.get_changed_files.outputs.py}} --statistics
 
       - name: Run black
         run: |

diff --git a/README.md b/README.md
@@ -86,6 +86,25 @@ From the root of the project run the following command (this will run both the u
 
 `python -m pytest tests`
 
+### **Performance Testing**
+
+This repository includes a performance testing script located in the `tests` folder under the filename `PerformanceTest.py`. The script is designed to evaluate the execution time of rules against datasets by running multiple test iterations.
+
+  #### Running the Performance Test
+
+  To execute the performance test, navigate to the root directory of the project and run the following command:
+
+  ```sh
+  python tests/PerformanceTest.py -d <DATASET_DIRECTORY> -lr <RULES_DIRECTORY> -total_calls <NUMBER_OF_CALLS> -od <OUTPUT_DIRECTORY>
+  ```
+  #### Performance Test Command-Line Flags
+  ```
+  -d TEXT                  The directory containing the dataset files in `.json` or `.xpt` format.
+  -lr TEXT                 The directory containing rule files.
+  -total_calls INTEGER     The number of times each rule should be executed for performance analysis.
+  -od TEXT                 The directory where the output report (`rule_execution_report.xlsx`) will be saved. By default, the report is saved in the current working directory.
+```
+
 ### **Running a validation**
 
 #### From the command line

diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py
@@ -28,14 +28,17 @@
 from cdisc_rules_engine.services import logger
 from functools import wraps
 import traceback
+import time
 
 
 def log_operator_execution(func):
     @wraps(func)
     def wrapper(self, other_value, *args, **kwargs):
         try:
             logger.info(f"Starting check operator: {func.__name__}")
+            logger.log(rf"\n\OPRT{time.time()}-operator {func.__name__} starts")
             result = func(self, other_value)
+            logger.log(rf"\n\OPRT{time.time()}-operator {func.__name__} ends")
             logger.info(f"Completed check operator: {func.__name__}")
             return result
         except Exception as e:

diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py
@@ -47,6 +47,7 @@
 )
 from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata
 import traceback
+import time
 
 
 class RulesEngine:
@@ -351,10 +352,14 @@ def execute_rule(
         # Adding copy for now to avoid updating cached dataset
         dataset = deepcopy(dataset)
         # preprocess dataset
+
+        logger.log(rf"\n\ST{time.time()}-Dataset Preprocessing Starts")
         dataset_preprocessor = DatasetPreprocessor(
             dataset, dataset_metadata, dataset_path, self.data_service, self.cache
         )
         dataset = dataset_preprocessor.preprocess(rule_copy, datasets)
+        logger.log(rf"\n\ST{time.time()}-Dataset Preprocessing Ends")
+        logger.log(rf"\n\OPRNT{time.time()}-Operation Starts")
         dataset = self.rule_processor.perform_rule_operations(
             rule_copy,
             dataset,
@@ -367,6 +372,7 @@ def execute_rule(
             external_dictionaries=self.external_dictionaries,
             ct_packages=ct_packages,
         )
+        logger.log(rf"\n\OPRNT{time.time()}-Operation Ends")
         relationship_data = {}
         if (
             dataset_metadata is not None