From 1519d48eec40d6e5b6f839c4d8c28d190439eab1 Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Thu, 23 Nov 2023 12:52:43 +0000 Subject: [PATCH] Fix buffer read overrun in cram_encode_aux. This is triggered when the aux data hasn't been validated and contains a partial buffer. Eg a non-null terminated string, or an 'i' integer with < 4 bytes remaining in the buffer. This is a bit involved as the htslib API also lacks bounds checking in the utility functions like bam_aux2i. I added a local bounds checking equivalent, but in time maybe this znd other similar functions should be moved to become an official part of the API. --- cram/cram_encode.c | 112 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 18 deletions(-) diff --git a/cram/cram_encode.c b/cram/cram_encode.c index ccb22397cd..2ac5ad51f9 100644 --- a/cram/cram_encode.c +++ b/cram/cram_encode.c @@ -1243,6 +1243,58 @@ static int cram_encode_slice(cram_fd *fd, cram_container *c, return -1; } +static inline const char *bam_data_end(bam1_t *b) { + return (const char *)b->data + b->l_data; +} + +/* + * A bounds checking version of bam_aux2i. + */ +static inline int bam_aux2i_end(const uint8_t *aux, const uint8_t *aux_end) { + int type = *aux++; + switch (type) { + case 'c': + if (aux_end - aux < 1) { + errno = EINVAL; + return 0; + } + return *(int8_t *)aux; + case 'C': + if (aux_end - aux < 1) { + errno = EINVAL; + return 0; + } + return *aux; + case 's': + if (aux_end - aux < 2) { + errno = EINVAL; + return 0; + } + return le_to_i16(aux); + case 'S': + if (aux_end - aux < 2) { + errno = EINVAL; + return 0; + } + return le_to_u16(aux); + case 'i': + if (aux_end - aux < 4) { + errno = EINVAL; + return 0; + } + return le_to_i32(aux); + case 'I': + if (aux_end - aux < 4) { + errno = EINVAL; + return 0; + } + return le_to_u32(aux); + default: + errno = EINVAL; + } + return 0; +} + /* * Returns the number of expected read names for this record. */ @@ -1251,7 +1303,7 @@ static int expected_template_count(bam_seq_t *b) { uint8_t *TC = (uint8_t *)bam_aux_get(b, "TC"); if (TC) { - int n = bam_aux2i(TC); + int n = bam_aux2i_end(TC, (uint8_t *)bam_data_end(b)); if (expected < n) expected = n; } @@ -2704,6 +2756,7 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, char *aux, *orig; sam_hrec_rg_t *brg = NULL; int aux_size = bam_get_l_aux(b); + const char *aux_end = bam_data_end(b); cram_block *td_b = c->comp_hdr->TD_blk; int TD_blk_size = BLOCK_SIZE(td_b), new; char *key; @@ -2732,15 +2785,19 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, } // Copy aux keys to td_b and aux values to slice aux blocks - while (aux - orig < aux_size && aux[0] != 0) { + while (aux_end - aux >= 1 && aux[0] != 0) { int r; + // Room for code + type + at least 1 byte of data + if (aux - orig >= aux_size - 3) + return NULL; + // RG:Z if (aux[0] == 'R' && aux[1] == 'G' && aux[2] == 'Z') { char *rg = &aux[3]; brg = sam_hrecs_find_rg(fd->header->hrecs, rg); if (brg) { - while (*aux++); + while (aux < aux_end && *aux++); if (CRAM_MAJOR_VERS(fd->version) >= 4) BLOCK_APPEND(td_b, "RG*", 3); continue; @@ -2754,7 +2811,7 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, if (aux[0] == 'M' && aux[1] == 'D' && aux[2] == 'Z') { if (cr->len && !no_ref && !(cr->flags & BAM_FUNMAP) && !verbatim_MD) { if (MD && MD->s && strncasecmp(MD->s, aux+3, orig + aux_size - (aux+3)) == 0) { - while (*aux++); + while (aux < aux_end && *aux++); if (CRAM_MAJOR_VERS(fd->version) >= 4) BLOCK_APPEND(td_b, "MD*", 3); continue; @@ -2765,7 +2822,7 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, // NM:i if (aux[0] == 'N' && aux[1] == 'M') { if (cr->len && !no_ref && !(cr->flags & BAM_FUNMAP) && !verbatim_NM) { - int NM_ = bam_aux2i((uint8_t *)aux+2); + int NM_ = bam_aux2i_end((uint8_t *)aux+2, (uint8_t *)aux_end); if (NM_ == NM) { switch(aux[2]) { case 'A': case 'C': case 'c': aux+=4; break; @@ -2952,6 +3009,9 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, switch(aux[2]) { case 'A': case 'C': case 'c': + if (aux_end - aux < 3+1) + return NULL; + if (!tm->blk) { if (!(tm->blk = cram_new_block(EXTERNAL, key))) return NULL; @@ -2966,6 +3026,9 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, break; case 'S': case 's': + if (aux_end - aux < 3+2) + return NULL; + if (!tm->blk) { if (!(tm->blk = cram_new_block(EXTERNAL, key))) return NULL; @@ -2979,6 +3042,9 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, break; case 'I': case 'i': case 'f': + if (aux_end - aux < 3+4) + return NULL; + if (!tm->blk) { if (!(tm->blk = cram_new_block(EXTERNAL, key))) return NULL; @@ -2992,6 +3058,9 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, break; case 'd': + if (aux_end - aux < 3+8) + return NULL; + if (!tm->blk) { if (!(tm->blk = cram_new_block(EXTERNAL, key))) return NULL; @@ -3004,24 +3073,29 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, aux+=8; break; - case 'Z': case 'H': - { - if (!tm->blk) { - if (!(tm->blk = cram_new_block(EXTERNAL, key))) - return NULL; - codec->out = tm->blk; - } + case 'Z': case 'H': { + if (aux_end - aux < 3) + return NULL; - char *aux_s; - aux += 3; - aux_s = aux; - while (*aux++); - if (codec->encode(s, codec, aux_s, aux - aux_s) < 0) + if (!tm->blk) { + if (!(tm->blk = cram_new_block(EXTERNAL, key))) return NULL; + codec->out = tm->blk; } + + char *aux_s; + aux += 3; + aux_s = aux; + while (aux < aux_end && *aux++); + if (codec->encode(s, codec, aux_s, aux - aux_s) < 0) + return NULL; break; + } case 'B': { + if (aux_end - aux < 3+4) + return NULL; + int type = aux[3], blen; uint32_t count = (uint32_t)((((unsigned char *)aux)[4]<< 0) + (((unsigned char *)aux)[5]<< 8) + @@ -3061,6 +3135,8 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, } blen += 5; // sub-type & length + if (aux_end - aux < blen) + return NULL; if (codec->encode(s, codec, aux, blen) < 0) return NULL; @@ -3068,7 +3144,7 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b, break; } default: - hts_log_error("Unknown aux type '%c'", aux[2]); + hts_log_error("Unknown aux type '%c'", aux_end - aux < 2 ? '?' : aux[2]); return NULL; } tm->blk->m = tm->m;