Skip to content

Commit

Permalink
catch constant sites in filter script
Browse files Browse the repository at this point in the history
  • Loading branch information
ktmeaton committed Apr 24, 2021
1 parent 035c929 commit cd6f133
Showing 1 changed file with 22 additions and 11 deletions.
33 changes: 22 additions & 11 deletions workflow/scripts/filter_sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
num_samples = len(alignment_in[:, 1])
logging.info("Number of samples: " + str(num_samples))
# Counter to store number of singleton sites
constant_sites = 0
biallelic_singleton_sites = 0
multiallelic_singleton_sites = 0
pseudo_singleton_sites = 0
Expand Down Expand Up @@ -169,27 +170,30 @@
# store the variant type (default singleton)
site_type = "singleton"

# Remove singleton columns sites
# Determine Site Type
# Constant site format [0,0,0,x]
if count_list.count(0) == 3:
constant_sites += 1
# Biallelic format [0,0,1,x]
if count_list.count(0) == 2 and count_list.count(1) == 1:
elif count_list.count(0) == 2 and count_list.count(1) >= 1:
biallelic_singleton_sites += 1
if not keep_singleton:
continue
# Multi allelic format [0,1,1,x]
elif count_list.count(0) == 1 and count_list.count(1) == 2:
elif count_list.count(0) == 1 and count_list.count(1) >= 2:
multiallelic_singleton_sites += 1
if not keep_singleton:
continue
# Pseudo allelic format [0,1,x,x] or [1,1,x,x]
# Pseudo allelic format [0,1,x,x] or [1,1,x,x] or [1,x,x,x]
elif count_list.count(1) > 0:
pseudo_singleton_sites += 1
if not keep_singleton:
continue
else:
parsimony_informative_sites += 1
site_type = "parsimony"

# Decide if this
# Decide whether to skip over this site
if site_type == "constant":
continue
if site_type == "singleton" and not keep_singleton:
continue

# Decide whether this site has enough informative nucleotides
num_data = count_g + count_t + count_a + count_c
site_prop = num_data / num_samples
# Check if the amount of missing data passes user parameter
Expand Down Expand Up @@ -226,6 +230,13 @@
total_singleton_sites = (
biallelic_singleton_sites + multiallelic_singleton_sites + pseudo_singleton_sites
)
logging.info(
"Constant sites: "
+ str(constant_sites)
+ " ("
+ str(int(constant_sites / alignment_in_len * 100))
+ "%)"
)
logging.info(
"Parsimony informative sites: "
+ str(parsimony_informative_sites)
Expand Down

0 comments on commit cd6f133

Please sign in to comment.