Merge pull request #38 from kircherlab/develop

Develop
kircherlab · Nov 20, 2023 · 3904cf7 · 3904cf7
2 parents 4d5bb06 + ac99ba7
commit 3904cf7
Show file tree

Hide file tree

Showing 5 changed files with 12 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@ cfDNA UniFlow is a unified, standardized, and ready-to-use workflow for processi
 </div>
 
 <figure>
- <img loading="lazy" src="supplement/cfDNA_unifyed_preprocessing.drawio.png">
+ <img loading="lazy" src="supplement/cfDNA_unified_preprocessing_overview.png">
  <figcaption>
  <div align="justify">
   <strong>Figure S1: Overview of cfDNA Uniflow.</strong> Functionalities are color coded by task. Red boxes represent rules for the automatic download of public resources. Grey boxes are optional steps. Blue boxes contain the core functionalty of cfDNA Uniflow. Green boxes are optional, but highly recommended steps and yellow boxes summarize the Quality Control and reporting steps.

diff --git a/supplement/cfDNA_unified_preprocessing_overview.png b/supplement/cfDNA_unified_preprocessing_overview.png
diff --git a/supplement/cfDNA_unifyed_preprocessing.drawio.png b/supplement/cfDNA_unifyed_preprocessing.drawio.png
diff --git a/workflow/rules/QualityControl.smk b/workflow/rules/QualityControl.smk
@@ -1,4 +1,3 @@
-
 rule fastqc:
     input:
         "results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam",
@@ -8,6 +7,8 @@ rule fastqc:
     log:
         "results/logs/{ID}/fastqc/{SAMPLE}_all.{GENOME}.log",
     threads: 8
+    resources:
+        mem_mb = lambda wildcards,threads: 1024*threads
     wrapper:
         "v2.2.1/bio/fastqc"
 

diff --git a/workflow/scripts/plot_overlay_GCcorrection.py b/workflow/scripts/plot_overlay_GCcorrection.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 
 import click
-import matplotlib
 import numpy as np
 import pandas as pd
 import seaborn as sns
@@ -151,25 +150,17 @@ def process_sample(
         pd.DataFrame: Processed sample.
     """
 
-    # print(locals())
-    if window % 2 == 0:
-        fstart = int(window / 2 + 1)
-        fstop = int(-window / 2)
-    elif window % 2 == 1:
-        fstart = int(window / 2 - 0.5 + 1)
-        fstop = int(-window / 2 + 0.5)
-
     if edge_norm:
         flank_start, flank_end = get_window_slice(len(sample.columns), flank * 2)
 
         helper = abs(sample.iloc[:, flank_start:flank_end].mean(axis=1))
         if (helper == 0).any():
             zero_mask = helper == 0
-            print(
+            logger.info(
                 f"Regions with zero mean in Normalization slice [{flank_start}, {flank_end}] encountered. Removing respective regions."
             )
             for zero_region in zero_mask[zero_mask].index:
-                print(f"Removing region: {zero_region}")
+                logger.info(f"Removing region: {zero_region}")
             helper.drop(helper[zero_mask].index, inplace=True)
             sample.drop(sample[zero_mask].index, inplace=True)
         sample = sample.div(helper, axis=0)
@@ -205,8 +196,6 @@ def process_sample(
     sample["position"] = calculate_flanking_regions(len(sample))
     sample = sample.set_index("position")
 
-    # sample = sample.iloc[fstop:fstart, :]
-
     return sample
 
 
@@ -455,10 +444,11 @@ def main(
     else:
         name_func = make_name_regex_func(name_regex)
 
+    logger.info("Loading uncorrected samples.")
     uncorrected_df = pd.DataFrame()
     for sample in uncorrected_samples:
         name = name_func(sample)
-        print(f"loading uncorrected sample: {name}")
+        logger.info(f"Loading uncorrected sample: {name}")
         tmpdf = load_table(sample)
         tmpdf = process_sample(
             sample=tmpdf,
@@ -474,10 +464,11 @@ def main(
         uncorrected_df[name] = tmpdf
         del tmpdf
 
+    logger.info("Loading corrected samples.")
     corrected_df = pd.DataFrame()
     for sample in corrected_samples:
         name = name_func(sample)
-        print(f"loading corrected sample: {name}")
+        logger.info(f"loading corrected sample: {name}")
         tmpdf = load_table(sample)
         tmpdf = process_sample(
             sample=tmpdf,
@@ -492,6 +483,7 @@ def main(
         corrected_df[name] = tmpdf
         del tmpdf
 
+    logger.info("Adjusting display window.")
     if display_window:
         uncorr_min = uncorrected_df.index.min()
         uncorr_max = uncorrected_df.index.max()
@@ -508,6 +500,7 @@ def main(
 
     sns.set_palette("hls", len(corrected_df.columns))
 
+    logger.info("Plotting figure.")
     Fig = plot_correction_overlay(
         uncorrected=uncorrected_df,
         corrected=corrected_df,
@@ -517,6 +510,7 @@ def main(
         lower_limit=lower_limit,
         upper_limit=upper_limit,
     )
+    logger.info("Saving figure.")
     Fig.savefig(output, bbox_inches="tight")