Skip to content

Commit

Permalink
Merge pull request #17 from Puriney/master
Browse files Browse the repository at this point in the history
0.5.1
  • Loading branch information
Puriney authored Apr 6, 2018
2 parents 51c02b7 + 4a9d008 commit 8601214
Show file tree
Hide file tree
Showing 12 changed files with 925 additions and 409 deletions.
31 changes: 22 additions & 9 deletions celseq2/celseq2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
* -p
* -r
* -T
* -w 1800 # 30min
* --keep-going
* --restart-times 1
* -w 300 # 5min
# * --keep-going (no longer True)
* --restart-times 2
Select optional parameters from snakemake.snakemake():
* --ri v.s. --ii
Expand All @@ -27,6 +27,7 @@
* -j
* --cluster
* -n
* --notemp (--nt)
Refs:
- https://snakemake.readthedocs.io/en/stable/api_reference/snakemake.html
Expand Down Expand Up @@ -64,6 +65,11 @@ def get_argument_parser():
action="store_true", default=False,
help="Read has to be mapped to the opposite strand as the feature")

parser.add_argument(
"--celseq2-to-st", "--st",
action="store_true", default=False,
help="Rotate the UMI-count matrix to a shape of spots by genes.")

parser.add_argument(
"--cores", "--jobs", "-j",
action="store",
Expand Down Expand Up @@ -98,7 +104,11 @@ def get_argument_parser():
"--ignore-incomplete", "--ii",
action="store_true",
help="Do not check for incomplete output files.")

parser.add_argument(
"--keep-temp", dest='keep_temp',
action="store_true",
help="Keep the intermediate files after run.")
parser.set_defaults(keep_temp=False)
parser.add_argument(
"--version", "-v",
action="version",
Expand All @@ -120,15 +130,17 @@ def main():
configfile=args.config_file,
config={'output_dir': args.output_dir,
'experiment_table': args.experiment_table,
'stranded': stranded},
'stranded': stranded,
'run_celseq2_to_st': args.celseq2_to_st,
'keep_intermediate': args.keep_temp},

printshellcmds=True,
printreason=True,
timestamp=True,
latency_wait=1800,
latency_wait=300,
jobname="celseq2_job.{rulename}.{jobid}.sh",
keepgoing=True,
restart_times=1,
keepgoing=False,
restart_times=2,

dryrun=args.dryrun,
lock=not args.nolock,
Expand All @@ -139,7 +151,8 @@ def main():
nodes=args.cores,

force_incomplete=args.rerun_incomplete,
ignore_incomplete=args.ignore_incomplete)
ignore_incomplete=args.ignore_incomplete,
notemp=args.keep_temp)

sys.exit(0 if success else 1)

Expand Down
6 changes: 4 additions & 2 deletions celseq2/cook_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ def main_new_experiment_table():

def get_workflow_file_string(mode='multiple'):
if mode == 'multiple':
fname = 'celseq2.snakemake'
# fname = 'celseq2.snakemake'
fname = 'celseq2_beta.snakemake'
else:
fname = 'celseq2_single_lib.snakemake'
fstring = resource_string('celseq2', 'workflow/{}'.format(fname))
Expand All @@ -86,7 +87,8 @@ def get_workflow_file_string(mode='multiple'):

def get_workflow_file_fpath(mode='multiple'):
if mode == 'multiple':
fname = 'celseq2.snakemake'
# fname = 'celseq2.snakemake'
fname = 'celseq2_beta.snakemake'
else:
fname = 'celseq2_single_lib.snakemake'
fpath = resource_filename('celseq2', 'workflow/{}'.format(fname))
Expand Down
113 changes: 113 additions & 0 deletions celseq2/demultiplex_sam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/usr/bin/env python3

import pysam

import argparse

from celseq2.helper import print_logger
from celseq2.helper import join_path
from celseq2.demultiplex import bc_dict_id2seq, str2int


def _cell_seq (name, length=6):
# BC-TCTGAG_UMI-CGTTAC => TCTGAG
try:
out = name.split('_')[0][3:3 + length]
except Exception as e:
raise(e)
return(out)


def demultiplex_sam (samfile, outdir, bc_length):
if not samfile:
return
samobj = pysam.AlignmentFile(samfile, 'rb')

dict_samout = {}

for aln in samobj:
bc = _cell_seq(aln.query_name, length=bc_length)
fh = dict_samout.get(bc, None)
if not fh:
outsam = join_path(outdir, bc + '.sam')
fh = pysam.AlignmentFile(outsam, 'w', template=samobj)
dict_samout[bc] = fh

fh.write(aln)

for _, fh in dict_samout.items():
fh.close()


def demultiplex_sam_with_claim (samfile, outdir, bc_length, claimed_bc):
if not samfile:
return
if not claimed_bc:
return

samobj = pysam.AlignmentFile(samfile, 'rb')

dict_samout = {}
for bc in claimed_bc:
fh = pysam.AlignmentFile(
join_path(outdir, bc + '.sam'),
'w', template=samobj)
dict_samout[bc] = fh

for aln in samobj:
bc = _cell_seq(aln.query_name, length=bc_length)
fh = dict_samout.get(bc, None)
if not fh:
continue
fh.write(aln)

for _, fh in dict_samout.items():
fh.close()


def main():
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('--sbam', type=str, metavar='FILENAME',
help='File path to SAM/BAM file')
parser.add_argument('--savetodir', type=str, metavar='DIRNAME',
help='Directory path to save the demultiplexed SAMs.',
default='.')
parser.add_argument('--bc-length', type=int, metavar='N',
help='Length of cell barcode.', default=6)
parser.add_argument('--claim', action='store_true', dest='claim')
parser.set_defaults(claim=False)
parser.add_argument('--bc-index', type=str, metavar='FILENAME',
help='File path to barcode dictionary.')
parser.add_argument('--bc-seq-column', type=int, metavar='N',
default=0,
help=('Column of cell barcode dictionary file '
'which tells the actual sequences.'))
parser.add_argument('--bc-index-used', type=str, metavar='string',
default='1-96',
help='Index of used barcode IDs (default=1-96)')

args = parser.parse_args()

print_logger('Demultiplexing SAM/BAM starts {} ...'.format(args.sbam))
if args.claim:
all_bc_dict = bc_dict_id2seq(args.bc_index, args.bc_seq_column)
bc_index_used = str2int(args.bc_index_used)
bc_seq_used = [all_bc_dict.get(x, None) for x in bc_index_used]
demultiplex_sam_with_claim(
samfile=args.sbam,
outdir=args.savetodir,
bc_length=args.bc_length,
claimed_bc=bc_seq_used)
else:
demultiplex_sam(
samfile=args.sbam,
outdir=args.savetodir,
bc_length=args.bc_length)

print_logger('Demultiplexing SAM/BAM ends. See: {}'.format(args.savetodir))


if __name__ == "__main__":
main()


17 changes: 11 additions & 6 deletions celseq2/support/st_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,23 @@ def celseq2stpipeline(celseq2_fpath, spatial_map, out,

genes = map(lambda x: x.replace(' ', '_'), expr_valid.index.values)
colnames = expr_valid.columns.values
fhout.write('{}\t{}\n'.format('', '\t'.join(genes))) # header
# fhout.write('{}\t{}\n'.format('', '\t'.join(genes))) # header
fhout.write('{}\t{}\t{}\n'.format('Row', 'Col', '\t'.join(genes))) # header

for colname in colnames:
tmp = colname.replace('.', '-')
spot_seq = tmp.split('-')[-1]
tmp = colname.replace('.', '-') # BC-1-ATGC or ATGC
spot_seq = tmp.split('-')[-1] # ATGC or ATGC
spot_expr = expr_valid[colname].values
spot_xy = dict_spatial_seq2xy.get(spot_seq, None)
if not spot_xy:
continue
spot_xy = 'x'.join(map(str, spot_xy))
fhout.write('{}\t{}\n'.format(
spot_xy,
# spot_xy = 'x'.join(map(str, spot_xy))
# fhout.write('{}\t{}\n'.format(
# spot_xy,
# '\t'.join(map(str, spot_expr))))
spot_r, spot_c = spot_xy
fhout.write('{}\t{}\t{}\n'.format(
spot_r, spot_c,
'\t'.join(map(str, spot_expr))))

fhout.close()
Expand Down
35 changes: 0 additions & 35 deletions celseq2/template/config_single_lib.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion celseq2/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.4.8'
__version__ = '0.5.1'
Loading

0 comments on commit 8601214

Please sign in to comment.