Skip to content

Commit

Permalink
add ADT, TCR and BCR for Integrated or Grouped analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
mAGLAVE committed May 14, 2021
1 parent 5e0d97b commit cf77427
Show file tree
Hide file tree
Showing 16 changed files with 1,753 additions and 141 deletions.
320 changes: 209 additions & 111 deletions Snakefile

Large diffs are not rendered by default.

86 changes: 86 additions & 0 deletions rules/Grp_Adding_ADT.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
##########################################################################
This rule add adt information to expression gene analysis in grouped single-cell RNA-seq.
##########################################################################
"""

wildcard_constraints:
grp_add_adt_output = "|".join(GRP_ADD_ADT_OUTPUT)

"""
This function allows to determine the input .rda ge file and kallisto adt folder.
"""
def grp_add_adt_input(wildcards):
sys.stderr.write(str(wildcards.grp_add_adt_output)+"\n")
ge_rda_file = dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_INPUT_RDA']
kallisto_folder = list(dict.fromkeys(dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_INPUT_DIR_ADT'].split(",")))
kallisto_folder.insert(0,ge_rda_file)
return kallisto_folder

"""
This function allows to determine the singularity binding parameters.
"""
def grp_add_adt_params_sing(wildcards):
rda_folder = os.path.dirname(dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_INPUT_RDA']) # output_folder too
concat = " -B " + PIPELINE_FOLDER + ":" + os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER) + " -B " + rda_folder + ":" + os.path.normpath("/WORKDIR/" + rda_folder)
for kallisto_folder in list(dict.fromkeys(dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_INPUT_DIR_ADT'].split(","))):
kallisto_folder = os.path.dirname(kallisto_folder)
concat = concat + " -B " + kallisto_folder + ":" + os.path.normpath("/WORKDIR/" + kallisto_folder)
return concat

"""
This function allows to determine the input alignment folder for params section.
"""
def grp_add_adt_params_input_folder(wildcards):
return ",".join([ os.path.normpath("/WORKDIR/" + kallisto_folder + "/") for kallisto_folder in list(dict.fromkeys(dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_INPUT_DIR_ADT'].split(","))) ])

"""
This function allows to determine the output folder for params (os.path.dirname() not allowed in params slot).
"""
def grp_add_adt_params_output_folder(wildcards):
return os.path.normpath("/WORKDIR/" + os.path.dirname(wildcards.grp_add_adt_output)) + "/"

"""
This function allows to determine the sample.name.adt for params.
"""
def grp_add_adt_params_sample_name_adt(wildcards):
return dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_SAMPLE_NAME_ADT']


"""
This rule launches the R script to add adt information to expression gene analysis.
"""
rule grp_add_adt_ge:
input:
grp_add_adt_file = grp_add_adt_input
output:
grp_add_adt_rda_file = "{grp_add_adt_output}" + "_ADT.rda"
params:
sing_bind = grp_add_adt_params_sing,
pipeline_folder = os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER),
input_rda = lambda wildcards, input: os.path.normpath("/WORKDIR/" + input[0]),
kallisto_folder = grp_add_adt_params_input_folder,
output_folder = grp_add_adt_params_output_folder,
sample_name_adt = grp_add_adt_params_sample_name_adt
threads:
1
resources:
mem_mb = lambda wildcards, attempt: min(5120 + attempt * 3072, 20480),
time_min = lambda wildcards, attempt: min(attempt * 120, 200)
shell:
"""
singularity exec --no-home {params.sing_bind} \
{SINGULARITY_ENV} \
Rscript {params.pipeline_folder}/scripts/Int_Grp_pipeline_ADT.R \
--samples.name.adt {params.sample_name_adt} \
--input.rda.ge {params.input_rda} \
--output.dir {params.output_folder} \
--input.dirs.adt {params.kallisto_folder} \
--author.name {GRP_ADD_ADT_AUTHOR_NAME} \
--author.mail {GRP_ADD_ADT_AUTHOR_MAIL} \
--nthreads {threads} \
--pipeline.path {params.pipeline_folder} \
--gene.names {GRP_ADD_ADT_GENE_NAMES} \
--ADT.min.cutoff {GRP_ADD_ADT_MIN_CUTOFF} \
--ADT.max.cutoff {GRP_ADD_ADT_MAX_CUTOFF}
"""
71 changes: 71 additions & 0 deletions rules/Grp_Adding_BCR.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
##########################################################################
This rule add bcr information to expression gene analysis in single-cell RNA-seq.
##########################################################################
"""
wildcard_constraints:
grp_add_bcr_output = "|".join(GRP_ADD_BCR_OUTPUT)

"""
This function allows to determine the input .rda file and csv file from cellranger vdj.
"""
def grp_add_bcr_input(wildcards):
rda_file = dic_GRP_ADD_BCR_INFO[wildcards.grp_add_bcr_output]['GRP_ADD_BCR_INPUT_RDA']
csv_file = list(dict.fromkeys(dic_GRP_ADD_BCR_INFO[wildcards.grp_add_bcr_output]['GRP_ADD_BCR_INPUT_CSV_BCR'].split(",")))
csv_file.insert(0, rda_file)
return csv_file

"""
This function allows to determine the singularity binding parameters.
"""
def grp_add_bcr_params_sing(wildcards):
rda_folder = os.path.dirname(dic_GRP_ADD_BCR_INFO[wildcards.grp_add_bcr_output]['GRP_ADD_BCR_INPUT_RDA']) # output_folder too
concat = " -B " + PIPELINE_FOLDER + ":" + os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER) + " -B " + rda_folder + ":" + os.path.normpath("/WORKDIR/" + rda_folder)
for bcrfile in list(dict.fromkeys(dic_GRP_ADD_BCR_INFO[wildcards.grp_add_bcr_output]['GRP_ADD_BCR_INPUT_CSV_BCR'].split(","))):
bcrfile = os.path.dirname(bcrfile)
concat = concat + " -B " + bcrfile + ":" + os.path.normpath("/WORKDIR/" + bcrfile)
return concat

"""
This function allows to determine the bcr files folders for params.
"""
def grp_add_bcr_params_bcr_files(wildcards):
return ",".join([ os.path.normpath("/WORKDIR/" + bcrfile) for bcrfile in list(dict.fromkeys(dic_GRP_ADD_BCR_INFO[wildcards.grp_add_bcr_output]['GRP_ADD_BCR_INPUT_CSV_BCR'].split(","))) ])

"""
This function allows to determine the output folder for params (os.path.dirname() not allowed in params slot).
"""
def grp_add_bcr_params_output_folder(wildcards):
return os.path.normpath("/WORKDIR/" + os.path.dirname(wildcards.grp_add_bcr_output)) + "/"

"""
This rule launches the R script to add adt information to expression gene analysis.
"""
rule grp_add_bcr_ge:
input:
grp_add_bcr_file = grp_add_bcr_input
output:
grp_add_bcr_rda_file = "{grp_add_bcr_output}" + "_BCR.rda"
params:
sing_bind = grp_add_bcr_params_sing,
pipeline_folder = os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER),
input_rda = lambda wildcards, input: os.path.normpath("/WORKDIR/" + input[0]),
input_csv = grp_add_bcr_params_bcr_files,
output_folder = grp_add_bcr_params_output_folder
threads:
1
resources:
mem_mb = lambda wildcards, attempt: min(5120 + attempt * 3072, 20480),
time_min = lambda wildcards, attempt: min(attempt * 120, 200)
shell:
"""
singularity exec --no-home {params.sing_bind} \
{SINGULARITY_ENV_TCR_BCR} \
Rscript {params.pipeline_folder}/scripts/Int_Grp_pipeline_BCR.R \
--input.rda {params.input_rda} \
--output.dir {params.output_folder} \
--vdj.input.files.bcr {params.input_csv} \
--author.name {GRP_ADD_BCR_AUTHOR_NAME} \
--author.mail {GRP_ADD_BCR_AUTHOR_MAIL} \
--pipeline.path {params.pipeline_folder}
"""
71 changes: 71 additions & 0 deletions rules/Grp_Adding_TCR.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
##########################################################################
This rule add tcr information to expression gene analysis in single-cell RNA-seq.
##########################################################################
"""
wildcard_constraints:
grp_add_tcr_output = "|".join(GRP_ADD_TCR_OUTPUT)

"""
This function allows to determine the input .rda file and csv file from cellranger vdj.
"""
def grp_add_tcr_input(wildcards):
rda_file = dic_GRP_ADD_TCR_INFO[wildcards.grp_add_tcr_output]['GRP_ADD_TCR_INPUT_RDA']
csv_file = list(dict.fromkeys(dic_GRP_ADD_TCR_INFO[wildcards.grp_add_tcr_output]['GRP_ADD_TCR_INPUT_CSV_TCR'].split(",")))
csv_file.insert(0, rda_file)
return csv_file

"""
This function allows to determine the singularity binding parameters.
"""
def grp_add_tcr_params_sing(wildcards):
rda_folder = os.path.dirname(dic_GRP_ADD_TCR_INFO[wildcards.grp_add_tcr_output]['GRP_ADD_TCR_INPUT_RDA']) # output_folder too
concat = " -B " + PIPELINE_FOLDER + ":" + os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER) + " -B " + rda_folder + ":" + os.path.normpath("/WORKDIR/" + rda_folder)
for tcrfile in list(dict.fromkeys(dic_GRP_ADD_TCR_INFO[wildcards.grp_add_tcr_output]['GRP_ADD_TCR_INPUT_CSV_TCR'].split(","))):
tcrfile = os.path.dirname(tcrfile)
concat = concat + " -B " + tcrfile + ":" + os.path.normpath("/WORKDIR/" + tcrfile)
return concat

"""
This function allows to determine the tcr files folders for params.
"""
def grp_add_tcr_params_tcr_files(wildcards):
return ",".join([ os.path.normpath("/WORKDIR/" + tcrfile) for tcrfile in list(dict.fromkeys(dic_GRP_ADD_TCR_INFO[wildcards.grp_add_tcr_output]['GRP_ADD_TCR_INPUT_CSV_TCR'].split(","))) ])

"""
This function allows to determine the output folder for params (os.path.dirname() not allowed in params slot).
"""
def grp_add_tcr_params_output_folder(wildcards):
return os.path.normpath("/WORKDIR/" + os.path.dirname(wildcards.grp_add_tcr_output)) + "/"

"""
This rule launches the R script to add adt information to expression gene analysis.
"""
rule grp_add_tcr_ge:
input:
grp_add_tcr_file = grp_add_tcr_input
output:
grp_add_tcr_rda_file = "{grp_add_tcr_output}" + "_TCR.rda"
params:
sing_bind = grp_add_tcr_params_sing,
pipeline_folder = os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER),
input_rda = lambda wildcards, input: os.path.normpath("/WORKDIR/" + input[0]),
input_csv = grp_add_tcr_params_tcr_files,
output_folder = grp_add_tcr_params_output_folder
threads:
1
resources:
mem_mb = lambda wildcards, attempt: min(5120 + attempt * 3072, 20480),
time_min = lambda wildcards, attempt: min(attempt * 120, 200)
shell:
"""
singularity exec --no-home {params.sing_bind} \
{SINGULARITY_ENV_TCR_BCR} \
Rscript {params.pipeline_folder}/scripts/Int_Grp_pipeline_TCR.R \
--input.rda {params.input_rda} \
--output.dir {params.output_folder} \
--vdj.input.files.tcr {params.input_csv} \
--author.name {GRP_ADD_TCR_AUTHOR_NAME} \
--author.mail {GRP_ADD_TCR_AUTHOR_MAIL} \
--pipeline.path {params.pipeline_folder}
"""
84 changes: 84 additions & 0 deletions rules/Int_Adding_ADT.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""
##########################################################################
This rule add adt information to expression gene analysis in integrated single-cell RNA-seq.
##########################################################################
"""
wildcard_constraints:
int_add_adt_output = "|".join(INT_ADD_ADT_OUTPUT)

"""
This function allows to determine the input .rda ge file and kallisto adt folder.
"""
def int_add_adt_input(wildcards):
ge_rda_file = dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_INPUT_RDA']
kallisto_folder = list(dict.fromkeys(dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_INPUT_DIR_ADT'].split(",")))
kallisto_folder.insert(0,ge_rda_file)
return kallisto_folder

"""
This function allows to determine the singularity binding parameters.
"""
def int_add_adt_params_sing(wildcards):
rda_folder = os.path.dirname(dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_INPUT_RDA']) # output_folder too
concat = " -B " + PIPELINE_FOLDER + ":" + os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER) + " -B " + rda_folder + ":" + os.path.normpath("/WORKDIR/" + rda_folder)
for kallisto_folder in list(dict.fromkeys(dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_INPUT_DIR_ADT'].split(","))):
kallisto_folder = os.path.dirname(kallisto_folder)
concat = concat + " -B " + kallisto_folder + ":" + os.path.normpath("/WORKDIR/" + kallisto_folder)
return concat

"""
This function allows to determine the input alignment folder for params section.
"""
def int_add_adt_params_input_folder(wildcards):
return ",".join([ os.path.normpath("/WORKDIR/" + kallisto_folder + "/") for kallisto_folder in list(dict.fromkeys(dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_INPUT_DIR_ADT'].split(","))) ])

"""
This function allows to determine the output folder for params (os.path.dirname() not allowed in params slot).
"""
def int_add_adt_params_output_folder(wildcards):
return os.path.normpath("/WORKDIR/" + os.path.dirname(wildcards.int_add_adt_output)) + "/"

"""
This function allows to determine the sample.name.adt for params.
"""
def int_add_adt_params_sample_name_adt(wildcards):
return dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_SAMPLE_NAME_ADT']


"""
This rule launches the R script to add adt information to expression gene analysis.
"""
rule int_add_adt_ge:
input:
int_add_adt_file = int_add_adt_input
output:
int_add_adt_rda_file = "{int_add_adt_output}" + "_ADT.rda"
params:
sing_bind = int_add_adt_params_sing,
pipeline_folder = os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER),
input_rda = lambda wildcards, input: os.path.normpath("/WORKDIR/" + input[0]),
kallisto_folder = int_add_adt_params_input_folder,
output_folder = int_add_adt_params_output_folder,
sample_name_adt = int_add_adt_params_sample_name_adt
threads:
1
resources:
mem_mb = lambda wildcards, attempt: min(5120 + attempt * 3072, 20480),
time_min = lambda wildcards, attempt: min(attempt * 120, 200)
shell:
"""
singularity exec --no-home {params.sing_bind} \
{SINGULARITY_ENV} \
Rscript {params.pipeline_folder}/scripts/Int_Grp_pipeline_ADT.R \
--samples.name.adt {params.sample_name_adt} \
--input.rda.ge {params.input_rda} \
--output.dir {params.output_folder} \
--input.dirs.adt {params.kallisto_folder} \
--author.name {INT_ADD_ADT_AUTHOR_NAME} \
--author.mail {INT_ADD_ADT_AUTHOR_MAIL} \
--nthreads {threads} \
--pipeline.path {params.pipeline_folder} \
--gene.names {INT_ADD_ADT_GENE_NAMES} \
--ADT.min.cutoff {INT_ADD_ADT_MIN_CUTOFF} \
--ADT.max.cutoff {INT_ADD_ADT_MAX_CUTOFF}
"""
71 changes: 71 additions & 0 deletions rules/Int_Adding_BCR.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
##########################################################################
This rule add bcr information to expression gene analysis in single-cell RNA-seq.
##########################################################################
"""
wildcard_constraints:
int_add_bcr_output = "|".join(INT_ADD_BCR_OUTPUT)

"""
This function allows to determine the input .rda file and csv file from cellranger vdj.
"""
def int_add_bcr_input(wildcards):
rda_file = dic_INT_ADD_BCR_INFO[wildcards.int_add_bcr_output]['INT_ADD_BCR_INPUT_RDA']
csv_file = list(dict.fromkeys(dic_INT_ADD_BCR_INFO[wildcards.int_add_bcr_output]['INT_ADD_BCR_INPUT_CSV_BCR'].split(",")))
csv_file.insert(0, rda_file)
return csv_file

"""
This function allows to determine the singularity binding parameters.
"""
def int_add_bcr_params_sing(wildcards):
rda_folder = os.path.dirname(dic_INT_ADD_BCR_INFO[wildcards.int_add_bcr_output]['INT_ADD_BCR_INPUT_RDA']) # output_folder too
concat = " -B " + PIPELINE_FOLDER + ":" + os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER) + " -B " + rda_folder + ":" + os.path.normpath("/WORKDIR/" + rda_folder)
for bcrfile in list(dict.fromkeys(dic_INT_ADD_BCR_INFO[wildcards.int_add_bcr_output]['INT_ADD_BCR_INPUT_CSV_BCR'].split(","))):
bcrfile = os.path.dirname(bcrfile)
concat = concat + " -B " + bcrfile + ":" + os.path.normpath("/WORKDIR/" + bcrfile)
return concat

"""
This function allows to determine the bcr files folders for params.
"""
def int_add_bcr_params_bcr_files(wildcards):
return ",".join([ os.path.normpath("/WORKDIR/" + bcrfile) for bcrfile in list(dict.fromkeys(dic_INT_ADD_BCR_INFO[wildcards.int_add_bcr_output]['INT_ADD_BCR_INPUT_CSV_BCR'].split(","))) ])

"""
This function allows to determine the output folder for params (os.path.dirname() not allowed in params slot).
"""
def int_add_bcr_params_output_folder(wildcards):
return os.path.normpath("/WORKDIR/" + os.path.dirname(wildcards.int_add_bcr_output)) + "/"

"""
This rule launches the R script to add adt information to expression gene analysis.
"""
rule int_add_bcr_ge:
input:
int_add_bcr_file = int_add_bcr_input
output:
int_add_bcr_rda_file = "{int_add_bcr_output}" + "_BCR.rda"
params:
sing_bind = int_add_bcr_params_sing,
pipeline_folder = os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER),
input_rda = lambda wildcards, input: os.path.normpath("/WORKDIR/" + input[0]),
input_csv = int_add_bcr_params_bcr_files,
output_folder = int_add_bcr_params_output_folder
threads:
1
resources:
mem_mb = lambda wildcards, attempt: min(5120 + attempt * 3072, 20480),
time_min = lambda wildcards, attempt: min(attempt * 120, 200)
shell:
"""
singularity exec --no-home {params.sing_bind} \
{SINGULARITY_ENV_TCR_BCR} \
Rscript {params.pipeline_folder}/scripts/Int_Grp_pipeline_BCR.R \
--input.rda {params.input_rda} \
--output.dir {params.output_folder} \
--vdj.input.files.bcr {params.input_csv} \
--author.name {INT_ADD_BCR_AUTHOR_NAME} \
--author.mail {INT_ADD_BCR_AUTHOR_MAIL} \
--pipeline.path {params.pipeline_folder}
"""
Loading

0 comments on commit cf77427

Please sign in to comment.