Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
… into dev
  • Loading branch information
ktmeaton committed Apr 26, 2021
2 parents cd24348 + cd6f133 commit cfbc469
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 12 deletions.
2 changes: 1 addition & 1 deletion workflow/rules/alignment.smk
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ rule eager:
--max_memory {resources.mem_mb}.MB \
--max_time {resources.time_min}m \
{config[eager_other]} \
-resume;"
-resume; > {output.log_txt}"
"{scripts_dir}/eager_cleanup.sh {results_dir} {wildcards.reads_origin} {wildcards.sample}; "

# -----------------------------------------------------------------------------#
Expand Down
33 changes: 22 additions & 11 deletions workflow/scripts/filter_sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
num_samples = len(alignment_in[:, 1])
logging.info("Number of samples: " + str(num_samples))
# Counter to store number of singleton sites
constant_sites = 0
biallelic_singleton_sites = 0
multiallelic_singleton_sites = 0
pseudo_singleton_sites = 0
Expand Down Expand Up @@ -169,27 +170,30 @@
# store the variant type (default singleton)
site_type = "singleton"

# Remove singleton columns sites
# Determine Site Type
# Constant site format [0,0,0,x]
if count_list.count(0) == 3:
constant_sites += 1
# Biallelic format [0,0,1,x]
if count_list.count(0) == 2 and count_list.count(1) == 1:
elif count_list.count(0) == 2 and count_list.count(1) >= 1:
biallelic_singleton_sites += 1
if not keep_singleton:
continue
# Multi allelic format [0,1,1,x]
elif count_list.count(0) == 1 and count_list.count(1) == 2:
elif count_list.count(0) == 1 and count_list.count(1) >= 2:
multiallelic_singleton_sites += 1
if not keep_singleton:
continue
# Pseudo allelic format [0,1,x,x] or [1,1,x,x]
# Pseudo allelic format [0,1,x,x] or [1,1,x,x] or [1,x,x,x]
elif count_list.count(1) > 0:
pseudo_singleton_sites += 1
if not keep_singleton:
continue
else:
parsimony_informative_sites += 1
site_type = "parsimony"

# Decide if this
# Decide whether to skip over this site
if site_type == "constant":
continue
if site_type == "singleton" and not keep_singleton:
continue

# Decide whether this site has enough informative nucleotides
num_data = count_g + count_t + count_a + count_c
site_prop = num_data / num_samples
# Check if the amount of missing data passes user parameter
Expand Down Expand Up @@ -226,6 +230,13 @@
total_singleton_sites = (
biallelic_singleton_sites + multiallelic_singleton_sites + pseudo_singleton_sites
)
logging.info(
"Constant sites: "
+ str(constant_sites)
+ " ("
+ str(int(constant_sites / alignment_in_len * 100))
+ "%)"
)
logging.info(
"Parsimony informative sites: "
+ str(parsimony_informative_sites)
Expand Down

0 comments on commit cfbc469

Please sign in to comment.