Skip to content

Commit

Permalink
Merge pull request #38 from kircherlab/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
sroener authored Nov 20, 2023
2 parents 4d5bb06 + ac99ba7 commit 3904cf7
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 17 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cfDNA UniFlow is a unified, standardized, and ready-to-use workflow for processi
</div>

<figure>
<img loading="lazy" src="supplement/cfDNA_unifyed_preprocessing.drawio.png">
<img loading="lazy" src="supplement/cfDNA_unified_preprocessing_overview.png">
<figcaption>
<div align="justify">
<strong>Figure S1: Overview of cfDNA Uniflow.</strong> Functionalities are color coded by task. Red boxes represent rules for the automatic download of public resources. Grey boxes are optional steps. Blue boxes contain the core functionalty of cfDNA Uniflow. Green boxes are optional, but highly recommended steps and yellow boxes summarize the Quality Control and reporting steps.
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed supplement/cfDNA_unifyed_preprocessing.drawio.png
Binary file not shown.
3 changes: 2 additions & 1 deletion workflow/rules/QualityControl.smk
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

rule fastqc:
input:
"results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam",
Expand All @@ -8,6 +7,8 @@ rule fastqc:
log:
"results/logs/{ID}/fastqc/{SAMPLE}_all.{GENOME}.log",
threads: 8
resources:
mem_mb = lambda wildcards,threads: 1024*threads
wrapper:
"v2.2.1/bio/fastqc"

Expand Down
24 changes: 9 additions & 15 deletions workflow/scripts/plot_overlay_GCcorrection.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python

import click
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
Expand Down Expand Up @@ -151,25 +150,17 @@ def process_sample(
pd.DataFrame: Processed sample.
"""

# print(locals())
if window % 2 == 0:
fstart = int(window / 2 + 1)
fstop = int(-window / 2)
elif window % 2 == 1:
fstart = int(window / 2 - 0.5 + 1)
fstop = int(-window / 2 + 0.5)

if edge_norm:
flank_start, flank_end = get_window_slice(len(sample.columns), flank * 2)

helper = abs(sample.iloc[:, flank_start:flank_end].mean(axis=1))
if (helper == 0).any():
zero_mask = helper == 0
print(
logger.info(
f"Regions with zero mean in Normalization slice [{flank_start}, {flank_end}] encountered. Removing respective regions."
)
for zero_region in zero_mask[zero_mask].index:
print(f"Removing region: {zero_region}")
logger.info(f"Removing region: {zero_region}")
helper.drop(helper[zero_mask].index, inplace=True)
sample.drop(sample[zero_mask].index, inplace=True)
sample = sample.div(helper, axis=0)
Expand Down Expand Up @@ -205,8 +196,6 @@ def process_sample(
sample["position"] = calculate_flanking_regions(len(sample))
sample = sample.set_index("position")

# sample = sample.iloc[fstop:fstart, :]

return sample


Expand Down Expand Up @@ -455,10 +444,11 @@ def main(
else:
name_func = make_name_regex_func(name_regex)

logger.info("Loading uncorrected samples.")
uncorrected_df = pd.DataFrame()
for sample in uncorrected_samples:
name = name_func(sample)
print(f"loading uncorrected sample: {name}")
logger.info(f"Loading uncorrected sample: {name}")
tmpdf = load_table(sample)
tmpdf = process_sample(
sample=tmpdf,
Expand All @@ -474,10 +464,11 @@ def main(
uncorrected_df[name] = tmpdf
del tmpdf

logger.info("Loading corrected samples.")
corrected_df = pd.DataFrame()
for sample in corrected_samples:
name = name_func(sample)
print(f"loading corrected sample: {name}")
logger.info(f"loading corrected sample: {name}")
tmpdf = load_table(sample)
tmpdf = process_sample(
sample=tmpdf,
Expand All @@ -492,6 +483,7 @@ def main(
corrected_df[name] = tmpdf
del tmpdf

logger.info("Adjusting display window.")
if display_window:
uncorr_min = uncorrected_df.index.min()
uncorr_max = uncorrected_df.index.max()
Expand All @@ -508,6 +500,7 @@ def main(

sns.set_palette("hls", len(corrected_df.columns))

logger.info("Plotting figure.")
Fig = plot_correction_overlay(
uncorrected=uncorrected_df,
corrected=corrected_df,
Expand All @@ -517,6 +510,7 @@ def main(
lower_limit=lower_limit,
upper_limit=upper_limit,
)
logger.info("Saving figure.")
Fig.savefig(output, bbox_inches="tight")


Expand Down

0 comments on commit 3904cf7

Please sign in to comment.