diff --git a/Makefile b/Makefile index 57f0e0b..ee28733 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,8 @@ CFLAGS = -fpic -std=c99 -O3 -I ${PREFIX}/include -L ${PREFIX}/lib CPPFLAGS = -std=c++11 -Wall -O3 -I ${PREFIX}/include -L ${PREFIX}/lib -Wl,-rpath=${PREFIX}/lib LP_CPPFLAGS = -std=c++11 -Wall -g -O3 -I ${PREFIX}/include -L ${PREFIX}/lib -Wl,-rpath=${PREFIX}/lib + + samtools-$(SAMVER)/Makefile: curl -L -o samtools-${SAMVER}.tar.bz2 https://github.com/samtools/samtools/releases/download/${SAMVER}/samtools-${SAMVER}.tar.bz2; \ tar -xjf samtools-${SAMVER}.tar.bz2; \ @@ -27,7 +29,6 @@ libhts.a: samtools-$(SAMVER)/Makefile cd samtools-${SAMVER}/htslib-${SAMVER}; CFLAGS="${CFLAGS}" LDFLAGS="${LDFLAGS}" ./configure; make CFLAGS="${CFLAGS}" LDFLAGS="${LDFLAGS}" cp samtools-${SAMVER}/htslib-${SAMVER}/$@ $@ - longphase-$(LPVER)/Makefile: curl -L -o longphase-${LPVER}.tar.gz https://github.com/twolinin/longphase/archive/refs/tags/v${LPVER}.tar.gz; \ tar -zxvf longphase-${LPVER}.tar.gz; \ @@ -39,7 +40,7 @@ longphase: longphase-$(LPVER)/Makefile cp longphase-${LPVER}/$@ $@ -libclair3.so: samtools-${SAMVER}/htslib-${SAMVER} +libclair3.so: samtools-${SAMVER}/htslib-${SAMVER} libhts.a ${PYTHON} build.py diff --git a/src/clair3_full_alignment.c b/src/clair3_full_alignment.c index 7a59b01..0366398 100644 --- a/src/clair3_full_alignment.c +++ b/src/clair3_full_alignment.c @@ -209,7 +209,7 @@ int realign_read(Variant *variant, Read *read, size_t i, size_t consumed, size_t uint32_t *cigartuples = read->cigartuples; uint8_t *seqi = read->seqi; size_t n_cigar = read->n_cigar; - size_t middle_op = bam_cigar_op(cigartuples[i]); + //size_t middle_op = bam_cigar_op(cigartuples[i]); // unused size_t middle_length = bam_cigar_oplen(cigartuples[i]); size_t left_consumed = consumed > 0 ? consumed : 0; size_t right_consumed = consumed < middle_length ? middle_length - consumed : 0; @@ -258,7 +258,7 @@ int haplotag_read(Variants_info *variants_info, Read *read, char *ref_seq, size_ size_t query_pos = 0; size_t v_position = 0; Variant **variants = variants_info->variants; - uint8_t *seqi = read->seqi; + //uint8_t *seqi = read->seqi; // unused uint32_t *cigartuples = read->cigartuples; size_t n_cigar = read->n_cigar; size_t j = variants_info->variant_current_pos; @@ -266,7 +266,7 @@ int haplotag_read(Variants_info *variants_info, Read *read, char *ref_seq, size_ khash_t(KH_INT_COUNTER) *haplotype_cost = kh_init(KH_INT_COUNTER); int allele = 0; - while (j < n && variants[j]->position < ref_pos) + while (j < n && (size_t)variants[j]->position < ref_pos) j += 1; for (size_t i = 0; i < n_cigar; i++) @@ -456,7 +456,8 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length) } } - size_t flanking_candidates[flanking_candidates_num]; + size_t *flanking_candidates = malloc(flanking_candidates_num * sizeof(size_t)); + for (khiter_t k = kh_begin(flanking_candidates_p); k != kh_end(flanking_candidates_p); ++k) { if (kh_exist(flanking_candidates_p, k)) @@ -517,13 +518,13 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length) variant_current_pos++; variants_info.variant_current_pos = variant_current_pos; - while (candidate_current_index < flanking_candidates_num && flanking_candidates[candidate_current_index] < pos) + while (candidate_current_index < flanking_candidates_num && flanking_candidates[candidate_current_index] < (size_t)pos) candidate_current_index++; read.read_end = get_read_end(cigartuples, n_cigar, read.read_start); // get the overlap candidates number and skip the alignment if no flanking candidate overlapped - size_t overlap_candidates_num = get_overlap_candidate_num(pos, read.read_end, candidate_current_index, flanking_candidates_num, &flanking_candidates); + size_t overlap_candidates_num = get_overlap_candidate_num(pos, read.read_end, candidate_current_index, flanking_candidates_num, flanking_candidates); read.overlap_candidates_num = overlap_candidates_num; if (read.overlap_candidates_num == 0) { @@ -651,14 +652,14 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length) } // allocate memory of the input matrix of all candidates - int8_t *matrix = calloc(candidate_num * matrix_depth * no_of_positions * channel_size, sizeof(int8_t)); - - HAP read_hap_array[reads_num]; - int matrix_read_index_array[matrix_depth]; Alt_info *alt_info = malloc(matrix_depth * sizeof(Alt_info)); + HAP *read_hap_array = malloc(reads_num * sizeof(HAP)); + int *matrix_read_index_array = malloc(matrix_depth * sizeof(int)); - char **alt_info_p = calloc(candidate_num, sizeof(char*)); + // allocate output fa_data data = calloc(1, sizeof(_fa_data)); + char **alt_info_p = calloc(candidate_num, sizeof(char*)); + int8_t *matrix = calloc(candidate_num * matrix_depth * no_of_positions * channel_size, sizeof(int8_t)); // loop each candiate and generate full-alignment input matrix for (size_t i = 0; i < candidate_num; i++) @@ -690,7 +691,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length) read_hap_array[overlap_read_num++].haplotype = read.haplotype; } - sort_read_name_by_haplotype(&read_hap_array, &matrix_read_index_array, matrix_depth, overlap_read_num); + sort_read_name_by_haplotype(read_hap_array, matrix_read_index_array, matrix_depth, overlap_read_num); // loop each overlapped read of a candidate for (size_t d = 0; d < matrix_depth; d++) @@ -718,7 +719,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length) continue; } - if (offset < 0 || offset >= read.overlap_candidates_num) + if (offset < 0 || (size_t)offset >= read.overlap_candidates_num) continue; int8_t alt_v = 0; @@ -728,7 +729,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length) if (is_center_pos) candidate_depth++; - size_t alt_int = read.pos_info[offset].alt_base; + //size_t alt_int = read.pos_info[offset].alt_base; // unused char alt_base = seq_nt16_str[read.pos_info[offset].alt_base]; if (read.pos_info[offset].ins_length > 0) { @@ -825,17 +826,17 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length) int ref_count = pos_alt_info[i].acgt_count[acgt2num[center_ref_base - 'A']]; - sprintf(alt_info_str, "%i-%i-%c-", candidate + 1, candidate_depth, center_ref_base); - for (size_t j = 0; j < 4; j++) + sprintf(alt_info_str, "%zu-%zu-%c-", candidate + 1, candidate_depth, center_ref_base); + for (int8_t j = 0; j < 4; j++) { if (j != acgt2num[center_ref_base - 'A'] && pos_alt_info[i].acgt_count[j] > 0) - sprintf(alt_info_str + strlen(alt_info_str), "X%c %i ", ACGT[j], pos_alt_info[i].acgt_count[j]); + sprintf(alt_info_str + strlen(alt_info_str), "X%c %zu ", ACGT[j], pos_alt_info[i].acgt_count[j]); } for (khiter_t k = kh_begin(pos_alt_info[i].ins_counter); k != kh_end(pos_alt_info[i].ins_counter); k++) { if (kh_exist(pos_alt_info[i].ins_counter, k)) { - char *key = kh_key(pos_alt_info[i].ins_counter, k); + const char *key = kh_key(pos_alt_info[i].ins_counter, k); int val = kh_val(pos_alt_info[i].ins_counter, k); ref_count -= val; if (strlen(key) <= max_indel_length) @@ -858,7 +859,7 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length) int key = kh_key(pos_alt_info[i].del_counter, k); int val = kh_val(pos_alt_info[i].del_counter, k); ref_count -= val; - if (key <= max_indel_length) + if (key <= (int)max_indel_length) // don't set max_indel_length to >2^32 { if (strlen(alt_info_str) + key + 32 >= max_alt_length) { @@ -904,6 +905,9 @@ size_t min_mq, size_t min_bq, size_t matrix_depth, size_t max_indel_length) free(chr); free(pos_alt_info); free(alt_info); + free(read_hap_array); + free(matrix_read_index_array); + free(flanking_candidates); kh_counter_destroy(read_name_set); kh_int_counter_destroy(candidates_p); kh_int_counter_destroy(flanking_candidates_p); diff --git a/src/clair3_full_alignment.h b/src/clair3_full_alignment.h index 6fcf080..ad071f5 100644 --- a/src/clair3_full_alignment.h +++ b/src/clair3_full_alignment.h @@ -14,7 +14,7 @@ static const int8_t HAP_TYPE[3] = {60, 30, 90}; #define normalize_hap(x) (HAP_TYPE[x]) static const size_t overhang = 10; -static const char *RN = "\0"; +//static const char *RN = "\0"; // unused static const size_t min_haplotag_mq = 20; static const size_t expand_reference_region = 2000000; static const size_t flanking_base_num = 16; diff --git a/src/clair3_pileup.c b/src/clair3_pileup.c index 62308b0..206e21a 100644 --- a/src/clair3_pileup.c +++ b/src/clair3_pileup.c @@ -212,8 +212,8 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co size_t ins_count = 0; bool pass_af = false; - bool pass_snp_af = false; - bool pass_indel_af = false; + //bool pass_snp_af = false; // ununsed + //bool pass_indel_af = false; // unused const char *c_name = data->hdr->target_name[tid]; if (strcmp(c_name, chr) != 0) continue; @@ -222,7 +222,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co n_cols++; - if (pre_pos + 1 != pos || pre_pos == 0) + if (pre_pos + 1 != (size_t)pos || pre_pos == 0) contiguous_flanking_num = 0; else contiguous_flanking_num++; @@ -355,7 +355,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co for (size_t i = 0; i < 4; i++) { forward_sum += pileup->matrix[major_col + i]; reverse_sum += pileup->matrix[major_col + i + reverse_pos_start]; - if (i == ref_offset_forward) { + if (i == (size_t)ref_offset_forward) { ref_count = pileup->matrix[major_col + i] + pileup->matrix[major_col + i + reverse_pos_start]; } else { size_t current_count = pileup->matrix[major_col + i] + pileup->matrix[major_col + i + reverse_pos_start]; @@ -397,15 +397,15 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co size_t max_alt_length = 64; char *alt_info_str = xalloc(max_alt_length, sizeof(char), "alt_info_str"); - sprintf(alt_info_str, "%i-%i-%c-", pos+1, depth, ref_base); + sprintf(alt_info_str, "%i-%zu-%c-", pos+1, depth, ref_base); //snp for (size_t i = 0; i < 4; i++) { forward_sum += pileup->matrix[major_col + i]; reverse_sum += pileup->matrix[major_col + i + reverse_pos_start]; size_t alt_sum = pileup->matrix[major_col + i] + pileup->matrix[major_col + i + reverse_pos_start]; - if (alt_sum > 0 && i != ref_offset_forward) - sprintf(alt_info_str + strlen(alt_info_str), "X%c %i ", plp_bases_clair3[i], alt_sum); + if (alt_sum > 0 && i != (size_t)ref_offset_forward) + sprintf(alt_info_str + strlen(alt_info_str), "X%c %zu ", plp_bases_clair3[i], alt_sum); } //del for (size_t i = 0; i < del_buf_size; i++) { @@ -418,7 +418,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co max_alt_length = max_alt_length << 1; alt_info_str = xrealloc(alt_info_str, max_alt_length*sizeof(char), "alt_info_str"); } - sprintf(alt_info_str + strlen(alt_info_str), "D%.*s %i ", i+1,ref_seq+offset+1, d); + sprintf(alt_info_str + strlen(alt_info_str), "D%.*s %zu ", (int)i+1, ref_seq+offset+1, d); } } @@ -434,7 +434,7 @@ plp_data calculate_clair3_pileup(const char *region, const bam_fset* bam_set, co max_alt_length = max_alt_length << 1; alt_info_str = xrealloc(alt_info_str, max_alt_length *sizeof(char), "alt_info_str"); } - sprintf(alt_info_str + strlen(alt_info_str), "I%c%s %i ", ref_base, key, val); + sprintf(alt_info_str + strlen(alt_info_str), "I%c%s %zu ", ref_base, key, val); } } } diff --git a/src/medaka_common.c b/src/medaka_common.c index 588006b..2240032 100644 --- a/src/medaka_common.c +++ b/src/medaka_common.c @@ -62,9 +62,10 @@ char *substring(char *string, int position, int length) { char *ptr; size_t i; + if(length < 0) return NULL; ptr = malloc(length + 1); - for (i = 0 ; i < length ; i++) { + for (i = 0 ; i < (size_t) length ; i++) { *(ptr + i) = *(string + position); string++; } diff --git a/src/medaka_khcounter.c b/src/medaka_khcounter.c index a8e9577..abf4ae0 100644 --- a/src/medaka_khcounter.c +++ b/src/medaka_khcounter.c @@ -93,8 +93,8 @@ void kh_counter_print(khash_t(KH_COUNTER) *hash) { printf("%s -> %i\n", key, val); } } - kh_counter_stats_t stats = kh_counter_stats(hash); -// printf("max: %i, sum: %i\n", stats.max, stats.sum); + //kh_counter_stats_t stats = kh_counter_stats(hash); + //printf("max: %i, sum: %i\n", stats.max, stats.sum); }