Skip to content

Commit

Permalink
Merge pull request #282 from nanoporetech/stack-allocs
Browse files Browse the repository at this point in the history
Avoid potentially large stack allocation, clean up int usage
  • Loading branch information
zhengzhenxian authored Mar 15, 2024
2 parents 366bee5 + adbd284 commit 751307f
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 34 deletions.
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ CFLAGS = -fpic -std=c99 -O3 -I ${PREFIX}/include -L ${PREFIX}/lib
CPPFLAGS = -std=c++11 -Wall -O3 -I ${PREFIX}/include -L ${PREFIX}/lib -Wl,-rpath=${PREFIX}/lib
LP_CPPFLAGS = -std=c++11 -Wall -g -O3 -I ${PREFIX}/include -L ${PREFIX}/lib -Wl,-rpath=${PREFIX}/lib



samtools-$(SAMVER)/Makefile:
curl -L -o samtools-${SAMVER}.tar.bz2 https://github.com/samtools/samtools/releases/download/${SAMVER}/samtools-${SAMVER}.tar.bz2; \
tar -xjf samtools-${SAMVER}.tar.bz2; \
Expand All @@ -27,7 +29,6 @@ libhts.a: samtools-$(SAMVER)/Makefile
cd samtools-${SAMVER}/htslib-${SAMVER}; CFLAGS="${CFLAGS}" LDFLAGS="${LDFLAGS}" ./configure; make CFLAGS="${CFLAGS}" LDFLAGS="${LDFLAGS}"
cp samtools-${SAMVER}/htslib-${SAMVER}/$@ $@


longphase-$(LPVER)/Makefile:
curl -L -o longphase-${LPVER}.tar.gz https://github.com/twolinin/longphase/archive/refs/tags/v${LPVER}.tar.gz; \
tar -zxvf longphase-${LPVER}.tar.gz; \
Expand All @@ -39,7 +40,7 @@ longphase: longphase-$(LPVER)/Makefile
cp longphase-${LPVER}/$@ $@


libclair3.so: samtools-${SAMVER}/htslib-${SAMVER}
libclair3.so: samtools-${SAMVER}/htslib-${SAMVER} libhts.a
${PYTHON} build.py


Expand Down
42 changes: 23 additions & 19 deletions src/clair3_full_alignment.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ int realign_read(Variant *variant, Read *read, size_t i, size_t consumed, size_t
uint32_t *cigartuples = read->cigartuples;
uint8_t *seqi = read->seqi;
size_t n_cigar = read->n_cigar;
size_t middle_op = bam_cigar_op(cigartuples[i]);
//size_t middle_op = bam_cigar_op(cigartuples[i]); // unused
size_t middle_length = bam_cigar_oplen(cigartuples[i]);
size_t left_consumed = consumed > 0 ? consumed : 0;
size_t right_consumed = consumed < middle_length ? middle_length - consumed : 0;
Expand Down Expand Up @@ -258,15 +258,15 @@ int haplotag_read(Variants_info *variants_info, Read *read, char *ref_seq, size_
size_t query_pos = 0;
size_t v_position = 0;
Variant **variants = variants_info->variants;
uint8_t *seqi = read->seqi;
//uint8_t *seqi = read->seqi; // unused
uint32_t *cigartuples = read->cigartuples;
size_t n_cigar = read->n_cigar;
size_t j = variants_info->variant_current_pos;
size_t ref_pos = read->read_start;
khash_t(KH_INT_COUNTER) *haplotype_cost = kh_init(KH_INT_COUNTER);
int allele = 0;

while (j < n && variants[j]->position < ref_pos)
while (j < n && (size_t)variants[j]->position < ref_pos)
j += 1;

for (size_t i = 0; i < n_cigar; i++)
Expand Down Expand Up @@ -456,7 +456,8 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
}
}

size_t flanking_candidates[flanking_candidates_num];
size_t *flanking_candidates = malloc(flanking_candidates_num * sizeof(size_t));

for (khiter_t k = kh_begin(flanking_candidates_p); k != kh_end(flanking_candidates_p); ++k)
{
if (kh_exist(flanking_candidates_p, k))
Expand Down Expand Up @@ -517,13 +518,13 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
variant_current_pos++;
variants_info.variant_current_pos = variant_current_pos;

while (candidate_current_index < flanking_candidates_num && flanking_candidates[candidate_current_index] < pos)
while (candidate_current_index < flanking_candidates_num && flanking_candidates[candidate_current_index] < (size_t)pos)
candidate_current_index++;

read.read_end = get_read_end(cigartuples, n_cigar, read.read_start);

// get the overlap candidates number and skip the alignment if no flanking candidate overlapped
size_t overlap_candidates_num = get_overlap_candidate_num(pos, read.read_end, candidate_current_index, flanking_candidates_num, &flanking_candidates);
size_t overlap_candidates_num = get_overlap_candidate_num(pos, read.read_end, candidate_current_index, flanking_candidates_num, flanking_candidates);
read.overlap_candidates_num = overlap_candidates_num;
if (read.overlap_candidates_num == 0)
{
Expand Down Expand Up @@ -651,14 +652,14 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
}

// allocate memory of the input matrix of all candidates
int8_t *matrix = calloc(candidate_num * matrix_depth * no_of_positions * channel_size, sizeof(int8_t));

HAP read_hap_array[reads_num];
int matrix_read_index_array[matrix_depth];
Alt_info *alt_info = malloc(matrix_depth * sizeof(Alt_info));
HAP *read_hap_array = malloc(reads_num * sizeof(HAP));
int *matrix_read_index_array = malloc(matrix_depth * sizeof(int));

char **alt_info_p = calloc(candidate_num, sizeof(char*));
// allocate output
fa_data data = calloc(1, sizeof(_fa_data));
char **alt_info_p = calloc(candidate_num, sizeof(char*));
int8_t *matrix = calloc(candidate_num * matrix_depth * no_of_positions * channel_size, sizeof(int8_t));

// loop each candiate and generate full-alignment input matrix
for (size_t i = 0; i < candidate_num; i++)
Expand Down Expand Up @@ -690,7 +691,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
read_hap_array[overlap_read_num++].haplotype = read.haplotype;
}

sort_read_name_by_haplotype(&read_hap_array, &matrix_read_index_array, matrix_depth, overlap_read_num);
sort_read_name_by_haplotype(read_hap_array, matrix_read_index_array, matrix_depth, overlap_read_num);

// loop each overlapped read of a candidate
for (size_t d = 0; d < matrix_depth; d++)
Expand Down Expand Up @@ -718,7 +719,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
continue;
}

if (offset < 0 || offset >= read.overlap_candidates_num)
if (offset < 0 || (size_t)offset >= read.overlap_candidates_num)
continue;

int8_t alt_v = 0;
Expand All @@ -728,7 +729,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)

if (is_center_pos)
candidate_depth++;
size_t alt_int = read.pos_info[offset].alt_base;
//size_t alt_int = read.pos_info[offset].alt_base; // unused
char alt_base = seq_nt16_str[read.pos_info[offset].alt_base];
if (read.pos_info[offset].ins_length > 0)
{
Expand Down Expand Up @@ -825,17 +826,17 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)

int ref_count = pos_alt_info[i].acgt_count[acgt2num[center_ref_base - 'A']];

sprintf(alt_info_str, "%i-%i-%c-", candidate + 1, candidate_depth, center_ref_base);
for (size_t j = 0; j < 4; j++)
sprintf(alt_info_str, "%zu-%zu-%c-", candidate + 1, candidate_depth, center_ref_base);
for (int8_t j = 0; j < 4; j++)
{
if (j != acgt2num[center_ref_base - 'A'] && pos_alt_info[i].acgt_count[j] > 0)
sprintf(alt_info_str + strlen(alt_info_str), "X%c %i ", ACGT[j], pos_alt_info[i].acgt_count[j]);
sprintf(alt_info_str + strlen(alt_info_str), "X%c %zu ", ACGT[j], pos_alt_info[i].acgt_count[j]);
}
for (khiter_t k = kh_begin(pos_alt_info[i].ins_counter); k != kh_end(pos_alt_info[i].ins_counter); k++)
{
if (kh_exist(pos_alt_info[i].ins_counter, k))
{
char *key = kh_key(pos_alt_info[i].ins_counter, k);
const char *key = kh_key(pos_alt_info[i].ins_counter, k);
int val = kh_val(pos_alt_info[i].ins_counter, k);
ref_count -= val;
if (strlen(key) <= max_indel_length)
Expand All @@ -858,7 +859,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
int key = kh_key(pos_alt_info[i].del_counter, k);
int val = kh_val(pos_alt_info[i].del_counter, k);
ref_count -= val;
if (key <= max_indel_length)
if (key <= (int)max_indel_length) // don't set max_indel_length to >2^32
{
if (strlen(alt_info_str) + key + 32 >= max_alt_length)
{
Expand Down Expand Up @@ -904,6 +905,9 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length)
free(chr);
free(pos_alt_info);
free(alt_info);
free(read_hap_array);
free(matrix_read_index_array);
free(flanking_candidates);
kh_counter_destroy(read_name_set);
kh_int_counter_destroy(candidates_p);
kh_int_counter_destroy(flanking_candidates_p);
Expand Down
2 changes: 1 addition & 1 deletion src/clair3_full_alignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ static const int8_t HAP_TYPE[3] = {60, 30, 90};
#define normalize_hap(x) (HAP_TYPE[x])

static const size_t overhang = 10;
static const char *RN = "\0";
//static const char *RN = "\0"; // unused
static const size_t min_haplotag_mq = 20;
static const size_t expand_reference_region = 2000000;
static const size_t flanking_base_num = 16;
Expand Down
18 changes: 9 additions & 9 deletions src/clair3_pileup.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,8 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
size_t ins_count = 0;

bool pass_af = false;
bool pass_snp_af = false;
bool pass_indel_af = false;
//bool pass_snp_af = false; // ununsed
//bool pass_indel_af = false; // unused

const char *c_name = data->hdr->target_name[tid];
if (strcmp(c_name, chr) != 0) continue;
Expand All @@ -222,7 +222,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
n_cols++;


if (pre_pos + 1 != pos || pre_pos == 0)
if (pre_pos + 1 != (size_t)pos || pre_pos == 0)
contiguous_flanking_num = 0;
else
contiguous_flanking_num++;
Expand Down Expand Up @@ -355,7 +355,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
for (size_t i = 0; i < 4; i++) {
forward_sum += pileup->matrix[major_col + i];
reverse_sum += pileup->matrix[major_col + i + reverse_pos_start];
if (i == ref_offset_forward) {
if (i == (size_t)ref_offset_forward) {
ref_count = pileup->matrix[major_col + i] + pileup->matrix[major_col + i + reverse_pos_start];
} else {
size_t current_count = pileup->matrix[major_col + i] + pileup->matrix[major_col + i + reverse_pos_start];
Expand Down Expand Up @@ -397,15 +397,15 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
size_t max_alt_length = 64;
char *alt_info_str = xalloc(max_alt_length, sizeof(char), "alt_info_str");

sprintf(alt_info_str, "%i-%i-%c-", pos+1, depth, ref_base);
sprintf(alt_info_str, "%i-%zu-%c-", pos+1, depth, ref_base);
//snp
for (size_t i = 0; i < 4; i++) {
forward_sum += pileup->matrix[major_col + i];
reverse_sum += pileup->matrix[major_col + i + reverse_pos_start];
size_t alt_sum = pileup->matrix[major_col + i] + pileup->matrix[major_col + i + reverse_pos_start];

if (alt_sum > 0 && i != ref_offset_forward)
sprintf(alt_info_str + strlen(alt_info_str), "X%c %i ", plp_bases_clair3[i], alt_sum);
if (alt_sum > 0 && i != (size_t)ref_offset_forward)
sprintf(alt_info_str + strlen(alt_info_str), "X%c %zu ", plp_bases_clair3[i], alt_sum);
}
//del
for (size_t i = 0; i < del_buf_size; i++) {
Expand All @@ -418,7 +418,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
max_alt_length = max_alt_length << 1;
alt_info_str = xrealloc(alt_info_str, max_alt_length*sizeof(char), "alt_info_str");
}
sprintf(alt_info_str + strlen(alt_info_str), "D%.*s %i ", i+1,ref_seq+offset+1, d);
sprintf(alt_info_str + strlen(alt_info_str), "D%.*s %zu ", (int)i+1, ref_seq+offset+1, d);
}

}
Expand All @@ -434,7 +434,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co
max_alt_length = max_alt_length << 1;
alt_info_str = xrealloc(alt_info_str, max_alt_length *sizeof(char), "alt_info_str");
}
sprintf(alt_info_str + strlen(alt_info_str), "I%c%s %i ", ref_base, key, val);
sprintf(alt_info_str + strlen(alt_info_str), "I%c%s %zu ", ref_base, key, val);
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/medaka_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,10 @@ char *substring(char *string, int position, int length) {
char *ptr;
size_t i;

if(length < 0) return NULL;
ptr = malloc(length + 1);

for (i = 0 ; i < length ; i++) {
for (i = 0 ; i < (size_t) length ; i++) {
*(ptr + i) = *(string + position);
string++;
}
Expand Down
4 changes: 2 additions & 2 deletions src/medaka_khcounter.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ void kh_counter_print(khash_t(KH_COUNTER) *hash) {
printf("%s -> %i\n", key, val);
}
}
kh_counter_stats_t stats = kh_counter_stats(hash);
// printf("max: %i, sum: %i\n", stats.max, stats.sum);
//kh_counter_stats_t stats = kh_counter_stats(hash);
//printf("max: %i, sum: %i\n", stats.max, stats.sum);
}


Expand Down

0 comments on commit 751307f

Please sign in to comment.