Skip to content

Commit

Permalink
fix: Change regex to string literal
Browse files Browse the repository at this point in the history
  • Loading branch information
KHajji committed Nov 17, 2022
1 parent cf71355 commit c156c64
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 75 deletions.
102 changes: 42 additions & 60 deletions juno_library/juno_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
from datetime import datetime
from pandas import read_csv
import pathlib
from pathlib import Path
import re
from snakemake import snakemake
import subprocess
from uuid import uuid4
import yaml
from dataclasses import dataclass


class PipelineStartup:
Expand Down Expand Up @@ -156,7 +158,7 @@ def __enlist_fastq_samples(self):
# because they get confused with the identifiers of forward and reverse
# reads.
pattern = re.compile(
"(.*?)(?:_S\d+_|_S\d+.|_|\.)(?:_L555_)?(?:p)?R?(1|2)(?:_.*\.|\..*\.|\.)f(ast)?q(\.gz)?"
r"(.*?)(?:_S\d+_|_S\d+.|_|\.)(?:_L555_)?(?:p)?R?(1|2)(?:_.*\.|\..*\.|\.)f(ast)?q(\.gz)?"
)
samples = {}
for file_ in self.__subdirs_["fastq"].iterdir():
Expand Down Expand Up @@ -279,75 +281,55 @@ def get_metadata_from_csv_file(
self.juno_metadata = juno_metadata.to_dict(orient="index")


@dataclass
class RunSnakemake:
"""
Class with necessary input to actually run Snakemake. It is basically a
wrapper for the snakemake function (of the snakemake package) but with some
customization that is used in all our Juno pipelines
"""

def __init__(
self,
pipeline_name,
pipeline_version,
output_dir,
workdir,
exclusion_file=None,
sample_sheet=pathlib.Path("config/sample_sheet.yaml"),
user_parameters=pathlib.Path("config/user_parameters.yaml"),
fixed_parameters=pathlib.Path("config/pipeline_parameters.yaml"),
snakefile="Snakefile",
cores=300,
local=False,
queue="bio",
unlock=False,
rerunincomplete=True,
dryrun=False,
useconda=True,
conda_prefix=None,
usesingularity=True,
singularityargs="",
singularity_prefix=None,
restarttimes=0,
latency_wait=60,
time_limit=60,
name_snakemake_report="snakemake_report.html",
**kwargs,
):
pipeline_name: str
pipeline_version: str
output_dir: Path
workdir: Path
exclusion_file: None | Path
sample_sheet: Path = pathlib.Path("config/sample_sheet.yaml")
user_parameters: Path = pathlib.Path("config/user_parameters.yaml")
fixed_parameters: Path = pathlib.Path("config/pipeline_parameters.yaml")
snakefile: str = "Snakefile"
cores: int = 300
local: bool = False
queue: str = "bio"
unlock: bool = False
rerunincomplete: bool = True
dryrun: bool = False
useconda: bool = True
conda_prefix: None | str = None
usesingularity = True
singularityargs = ""
singularity_prefix = None
restarttimes: int = 0
latency_wait: int = 60
time_limit: int = 60
name_snakemake_report: str = "snakemake_report.html"
conda_frontend: str = "mamba"

def __post_init__(self, **kwargs):
"""Constructor"""
self.pipeline_name = pipeline_name
self.pipeline_version = pipeline_version
self.output_dir = pathlib.Path(output_dir)
self.workdir = pathlib.Path(workdir)
self.sample_sheet = sample_sheet
self.user_parameters = user_parameters
self.fixed_parameters = fixed_parameters
self.snakefile = snakefile
self.path_to_audit = self.output_dir.joinpath("audit_trail")
self.snakemake_report = str(self.path_to_audit.joinpath(name_snakemake_report))
self.cores = cores
self.local = local
self.queue = queue
self.unlock = unlock
self.dryrun = dryrun
self.rerunincomplete = rerunincomplete
self.useconda = useconda
self.conda_frontend = "mamba"
self.conda_prefix = conda_prefix
self.usesingularity = usesingularity
self.singularityargs = singularityargs
self.singularity_prefix = singularity_prefix
self.restarttimes = restarttimes
self.latency = latency_wait
self.time_limit = time_limit
self.kwargs = kwargs

if exclusion_file is None:
print("exclude is none")
self.exclusion_file = None
self.path_to_audit = self.output_dir.joinpath("audit_trail")
self.output_dir = Path(self.output_dir)
self.workdir = Path(self.workdir)
self.fixed_parameters = Path(self.fixed_parameters)
self.snakemake_report = str(
self.path_to_audit.joinpath(self.name_snakemake_report)
)
if self.exclusion_file is None:
print("There is no exclude file")
else:
print("found exclude file")
self.exclusion_file = pathlib.Path(exclusion_file)
self.exclusion_file = pathlib.Path(self.exclusion_file)
print("exclude file = ", self.exclusion_file)

def get_run_info(self):
Expand Down Expand Up @@ -517,7 +499,7 @@ def run_snakemake(self):
printshellcmds=True,
force_incomplete=self.rerunincomplete,
restart_times=self.restarttimes,
latency_wait=self.latency,
latency_wait=self.latency_wait,
unlock=self.unlock,
dryrun=self.dryrun,
**self.kwargs,
Expand Down
31 changes: 16 additions & 15 deletions tests/library_tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import os
import pathlib
from pathlib import Path
from sys import path
import subprocess
import unittest
Expand Down Expand Up @@ -191,7 +192,7 @@ def setUpClass(self):
if fake_file == "exclusion_file.exclude":
make_non_empty_file(fake_file, content="sample1")

def tearDownClass(slef):
def tearDownClass(self):
"""Removing fake directories/files"""

fake_dirs = [
Expand Down Expand Up @@ -497,9 +498,9 @@ def test_fake_dryrun_setup(self):
fake_run = juno_library.RunSnakemake(
pipeline_name="fake_pipeline",
pipeline_version="0.1",
output_dir="fake_output_dir",
workdir=main_script_path,
exclusion_file="exclusion_file.exclude",
output_dir=Path("fake_output_dir"),
workdir=Path(main_script_path),
exclusion_file=Path("exclusion_file.exclude"),
sample_sheet=pathlib.Path("sample_sheet.yaml"),
user_parameters=pathlib.Path("user_parameters.yaml"),
fixed_parameters=pathlib.Path("fixed_parameters.yaml"),
Expand All @@ -522,9 +523,9 @@ def test_fake_run_setup(self):
fake_run = juno_library.RunSnakemake(
pipeline_name="fake_pipeline",
pipeline_version="0.1",
output_dir="fake_output_dir",
workdir=main_script_path,
exclusion_file="exclusion_file.exclude",
output_dir=Path("fake_output_dir"),
workdir=Path(main_script_path),
exclusion_file=Path("exclusion_file.exclude"),
sample_sheet=pathlib.Path("sample_sheet.yaml"),
user_parameters=pathlib.Path("user_parameters.yaml"),
fixed_parameters=pathlib.Path("fixed_parameters.yaml"),
Expand Down Expand Up @@ -580,9 +581,9 @@ def test_pipeline(self):
fake_run = juno_library.RunSnakemake(
pipeline_name="fake_pipeline",
pipeline_version="0.1",
output_dir="fake_output_dir",
workdir=main_script_path,
exclusion_file="exclusion_file.exclude",
output_dir=Path("fake_output_dir"),
workdir=Path(main_script_path),
exclusion_file=Path("exclusion_file.exclude"),
sample_sheet=pathlib.Path("sample_sheet.yaml"),
user_parameters=pathlib.Path("user_parameters.yaml"),
fixed_parameters=pathlib.Path("fixed_parameters.yaml"),
Expand Down Expand Up @@ -611,11 +612,11 @@ def test_pipeline_in_hpcRIVM(self):
pipeline_name="fake_pipeline",
pipeline_version="0.1",
output_dir=output_dir,
workdir=main_script_path,
exclusion_file="exclusion_file.exclude",
sample_sheet=pathlib.Path("sample_sheet.yaml"),
user_parameters=pathlib.Path("user_parameters.yaml"),
fixed_parameters=pathlib.Path("fixed_parameters.yaml"),
workdir=Path(main_script_path),
exclusion_file=Path("exclusion_file.exclude"),
sample_sheet=Path("sample_sheet.yaml"),
user_parameters=Path("user_parameters.yaml"),
fixed_parameters=Path("fixed_parameters.yaml"),
snakefile="tests/Snakefile",
name_snakemake_report="fake_snakemake_report.html",
local=False,
Expand Down

0 comments on commit c156c64

Please sign in to comment.