From be4e6a0f40f87faa8b246d28b7bcf6b770d26c56 Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Wed, 2 Jan 2013 17:09:14 +0000 Subject: [PATCH 1/4] Added more wrappers for functions that make system calls Added the following wrappers that check the results of system calls and exit non-zero if something went wrong. This is so bwa can be more robust against system failures (e.g. IO errors from remote storage, or running out of memory). The existing and new wrappers have also been modified so that they no longer try to dump core on failure. In most cases the resulting core files are not useful (especially if bwa was compiled with optimization turned on) so just pollute whatever directories they got written to. The err_fflush wrapper also now calls fsync on regular files to try to ensure that data has been written out when using remote filesystems. This is not really necessary for local filesystems but only takes a few seconds to do. The fsync can be disabled by removing the #define for FSYNC_ON_FLUSH in utils.c. Wrappers for memory allocation functions: xcalloc xmalloc xrealloc xstrdup New wrappers for IO functions: err_fwrite err_fread_noeof (also dies on EOF) err_gzread err_fseek err_rewind err_ftell err_fprintf err_printf err_fflush err_fclose err_gzclose --- utils.c | 227 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- utils.h | 42 ++++++++++- 2 files changed, 254 insertions(+), 15 deletions(-) diff --git a/utils.c b/utils.c index 303e9d28..430dbab0 100644 --- a/utils.c +++ b/utils.c @@ -24,6 +24,7 @@ */ /* Contact: Heng Li */ +#define FSYNC_ON_FLUSH #include #include @@ -32,8 +33,15 @@ #include #include #include +#include +#ifdef FSYNC_ON_FLUSH +#include +#include +#include +#endif #include "utils.h" + extern time_t _prog_start; FILE *err_xopen_core(const char *func, const char *fn, const char *mode) @@ -42,8 +50,7 @@ FILE *err_xopen_core(const char *func, const char *fn, const char *mode) if (strcmp(fn, "-") == 0) return (strstr(mode, "r"))? stdin : stdout; if ((fp = fopen(fn, mode)) == 0) { - fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn); - abort(); + err_fatal(func, "fail to open file '%s' : %s", fn, strerror(errno)); } // setvbuf(fp, NULL, _IOFBF, 1048576); return fp; @@ -51,10 +58,7 @@ FILE *err_xopen_core(const char *func, const char *fn, const char *mode) FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp) { if (freopen(fn, mode, fp) == 0) { - fprintf(stderr, "[%s] fail to open file '%s': ", func, fn); - perror(NULL); - fprintf(stderr, "Abort!\n"); - abort(); + err_fatal(func, "fail to open file '%s' : %s", fn, strerror(errno)); } // setvbuf(fp, NULL, _IOFBF, 1048576); return fp; @@ -62,16 +66,30 @@ FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE gzFile err_xzopen_core(const char *func, const char *fn, const char *mode) { gzFile fp; - if (strcmp(fn, "-") == 0) - return gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode); + if (strcmp(fn, "-") == 0) { + fp = gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode); + /* According to zlib.h, this is the only reason gzdopen can fail */ + if (!fp) err_fatal(func, "Out of memory"); + return fp; + } if ((fp = gzopen(fn, mode)) == 0) { - fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn); - abort(); + err_fatal(func, "fail to open file '%s' : %s", fn, errno ? strerror(errno) : "Out of memory"); } // gzbuffer(fp, 524288); return fp; } void err_fatal(const char *header, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + fprintf(stderr, "[%s] ", header); + vfprintf(stderr, fmt, args); + fprintf(stderr, "\n"); + va_end(args); + exit(EXIT_FAILURE); +} + +void err_fatal_core(const char *header, const char *fmt, ...) { va_list args; va_start(args, fmt); @@ -82,11 +100,198 @@ void err_fatal(const char *header, const char *fmt, ...) abort(); } -void err_fatal_simple_core(const char *func, const char *msg) +void _err_fatal_simple(const char *func, const char *msg) +{ + fprintf(stderr, "[%s] %s\n", func, msg); + exit(EXIT_FAILURE); +} + +void _err_fatal_simple_core(const char *func, const char *msg) { fprintf(stderr, "[%s] %s Abort!\n", func, msg); abort(); } +size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + size_t ret = fwrite(ptr, size, nmemb, stream); + if (ret != nmemb) + { + _err_fatal_simple("fwrite", strerror(errno)); + } + return ret; +} + +size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + size_t ret = fread(ptr, size, nmemb, stream); + if (ret != nmemb) + { + _err_fatal_simple("fread", ferror(stream) ? strerror(errno) : "Unexpected end of file"); + } + return ret; +} + +int err_gzread(gzFile file, void *ptr, unsigned int len) +{ + int ret = gzread(file, ptr, len); + + if (ret < 0) + { + int errnum = 0; + const char *msg = gzerror(file, &errnum); + _err_fatal_simple("gzread", Z_ERRNO == errnum ? strerror(errno) : msg); + } + + return ret; +} + +int err_fseek(FILE *stream, long offset, int whence) +{ + int ret = fseek(stream, offset, whence); + if (0 != ret) + { + _err_fatal_simple("fseek", strerror(errno)); + } + return ret; +} + +long err_ftell(FILE *stream) +{ + long ret = ftell(stream); + if (-1 == ret) + { + _err_fatal_simple("ftell", strerror(errno)); + } + return ret; +} + +int err_printf(const char *format, ...) +{ + va_list arg; + int done; + + va_start(arg, format); + done = vfprintf(stdout, format, arg); + int saveErrno = errno; + va_end(arg); + + if (done < 0) + { + _err_fatal_simple("vfprintf(stdout)", strerror(saveErrno)); + } + return done; +} + +int err_fprintf(FILE *stream, const char *format, ...) +{ + va_list arg; + int done; + + va_start(arg, format); + done = vfprintf(stream, format, arg); + int saveErrno = errno; + va_end(arg); + + if (done < 0) + { + _err_fatal_simple("vfprintf", strerror(saveErrno)); + } + return done; +} + +int err_fflush(FILE *stream) +{ + int ret = fflush(stream); + if (ret != 0) + { + _err_fatal_simple("fflush", strerror(errno)); + } +#ifdef FSYNC_ON_FLUSH + /* Calling fflush() ensures that all the data has made it to the + kernel buffers, but this may not be sufficient for remote filesystems + (e.g. NFS, lustre) as an error may still occur while the kernel + is copying the buffered data to the file server. To be sure of + catching these errors, we need to call fsync() on the file + descriptor, but only if it is a regular file. */ + { + struct stat sbuf; + if (0 != fstat(fileno(stream), &sbuf)) + { + _err_fatal_simple("fstat", strerror(errno)); + } + if (S_ISREG(sbuf.st_mode)) + { + if (0 != fsync(fileno(stream))) + { + _err_fatal_simple("fsync", strerror(errno)); + } + } + } +#endif + return ret; +} + +int err_fclose(FILE *stream) +{ + int ret = fclose(stream); + if (ret != 0) + { + _err_fatal_simple("fclose", strerror(errno)); + } + return ret; +} + +int err_gzclose(gzFile file) +{ + int ret = gzclose(file); + if (Z_OK != ret) + { + _err_fatal_simple("gzclose", Z_ERRNO == ret ? strerror(errno) : zError(ret)); + } + + return ret; +} + +void *err_calloc(size_t nmemb, size_t size, const char *file, unsigned int line, const char *func) +{ + void *p = calloc(nmemb, size); + if (NULL == p) + { + err_fatal(func, "Failed to allocate %zd bytes at %s line %u: %s\n", nmemb * size, file, line, strerror(errno)); + } + return p; +} + +void *err_malloc(size_t size, const char *file, unsigned int line, const char *func) +{ + void *p = malloc(size); + if (NULL == p) + { + err_fatal(func, "Failed to allocate %zd bytes at %s line %u: %s\n", size, file, line, strerror(errno)); + } + return p; +} + +void *err_realloc(void *ptr, size_t size, const char *file, unsigned int line, const char *func) +{ + void *p = realloc(ptr, size); + if (NULL == p) + { + err_fatal(func, "Failed to allocate %zd bytes at %s line %u: %s\n", size, file, line, strerror(errno)); + } + return p; +} + +char *err_strdup(const char *s, const char *file, unsigned int line, const char *func) +{ + char *p = strdup(s); + + if (NULL == p) + { + err_fatal(func, "Failed to allocate %zd bytes at %s line %u: %s\n", strlen(s), file, line, strerror(errno)); + } + return p; +} clock_t clock(void) { diff --git a/utils.h b/utils.h index efe3155c..369e3ea9 100644 --- a/utils.h +++ b/utils.h @@ -31,21 +31,55 @@ #include #include -#define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg) +#ifdef __GNUC__ +// Tell GCC to validate printf format string and args +#define ATTRIBUTE(list) __attribute__ (list) +#else +#define ATTRIBUTE(list) +#endif + +#define err_fatal_simple(msg) _err_fatal_simple(__func__, msg) +#define err_fatal_simple_core(msg) _err_fatal_simple_core(__func__, msg) #define xopen(fn, mode) err_xopen_core(__func__, fn, mode) #define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp) #define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode) -#define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg) +#define xassert(cond, msg) if ((cond) == 0) _err_fatal_simple_core(__func__, msg) + +#define xcalloc(n, s) err_calloc( (n), (s), __FILE__, __LINE__, __func__) +#define xmalloc(s) err_malloc( (s), __FILE__, __LINE__, __func__) +#define xrealloc(p, s) err_realloc((p), (s), __FILE__, __LINE__, __func__) +#define xstrdup(s) err_strdup( (s), __FILE__, __LINE__, __func__) #ifdef __cplusplus extern "C" { #endif - void err_fatal(const char *header, const char *fmt, ...); - void err_fatal_simple_core(const char *func, const char *msg); + void err_fatal(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn)); + void err_fatal_core(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn)); + void _err_fatal_simple(const char *func, const char *msg) ATTRIBUTE((noreturn)); + void _err_fatal_simple_core(const char *func, const char *msg) ATTRIBUTE((noreturn)); FILE *err_xopen_core(const char *func, const char *fn, const char *mode); FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp); gzFile err_xzopen_core(const char *func, const char *fn, const char *mode); + size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); + size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream); + int err_gzread(gzFile file, void *ptr, unsigned int len); + int err_fseek(FILE *stream, long offset, int whence); +#define err_rewind(FP) err_fseek((FP), 0, SEEK_SET) + long err_ftell(FILE *stream); + int err_fprintf(FILE *stream, const char *format, ...) + ATTRIBUTE((format(printf, 2, 3))); + int err_printf(const char *format, ...) + ATTRIBUTE((format(printf, 1, 2))); + int err_fflush(FILE *stream); + int err_fclose(FILE *stream); + int err_gzclose(gzFile file); + void *err_calloc(size_t nmemb, size_t size, const char *file, unsigned int line, const char *func); + void *err_malloc(size_t size, const char *file, unsigned int line, const char *func); + void *err_realloc(void *ptr, size_t size, const char *file, unsigned int line, const char *func); + char *err_strdup(const char *s, const char *file, unsigned int line, const char *func); + + int getmaxrss(long *maxrsskb); #ifdef __cplusplus From e78ff22ff4942a181320d181b6193e52efab4bdf Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Thu, 3 Jan 2013 09:22:19 +0000 Subject: [PATCH 2/4] Use wrapper functions to catch system errors - io calls Use the wrapper functions in utils.c plus a few extra bits of error checking code to catch system errors and exit non-zero when they occur. This commit mainly covers funtions that make IO system calls. Updated dependencies in Makefiles. --- Makefile | 62 +++++++++++++++++--------------- bamlite.c | 26 ++++++++------ bamlite.h | 7 ++-- bntseq.c | 76 +++++++++++++++++++++++++-------------- bwape.c | 20 +++++------ bwapeio1.c | 5 +-- bwase.c | 92 +++++++++++++++++++++++------------------------ bwaseio1.c | 5 +-- bwaseqio.c | 4 +-- bwt_gen/Makefile | 5 ++- bwt_gen/bwt_gen.c | 30 ++++++++-------- bwtaln.c | 11 +++--- bwtindex.c | 4 +-- bwtio.c | 48 +++++++++++++------------ bwtmisc.c | 34 +++++++++--------- bwtsw2_aux.c | 12 +++---- main.c | 40 +++++++++++---------- simple_dp.c | 12 +++---- 18 files changed, 271 insertions(+), 222 deletions(-) diff --git a/Makefile b/Makefile index 7949e991..db5af6ac 100644 --- a/Makefile +++ b/Makefile @@ -54,38 +54,42 @@ clean:cleanlocal-recur # DO NOT DELETE -bamlite.o: bamlite.h -bntseq.o: bntseq.h main.h utils.h kseq.h -bwape.o: bwtaln.h bwt.h stdaln.h kvec.h bntseq.h utils.h bwatpx.h kstring.h -bwape.o: khash.h ksort.h -bwape1.o: bwatpx.h bwtaln.h bwt.h stdaln.h bntseq.h kvec.h kstring.h khash.h -bwape1.o: ksort.h -bwape2.o: bwatpx.h bwtaln.h bwt.h stdaln.h bntseq.h kvec.h kstring.h -bwape3.o: bwatpx.h bwtaln.h bwt.h stdaln.h bntseq.h kvec.h kstring.h -bwape4.o: bwatpx.h bwtaln.h bwt.h stdaln.h bntseq.h kvec.h kstring.h -bwapeio1.o: bwatpx.h bwtaln.h bwt.h stdaln.h bntseq.h kvec.h kstring.h -bwapese1.o: bwatpx.h bwtaln.h bwt.h stdaln.h bntseq.h kvec.h kstring.h -bwase.o: stdaln.h bwase.h bntseq.h bwt.h bwtaln.h utils.h kstring.h bwatpx.h -bwase.o: kvec.h -bwase1.o: bwatpx.h bwtaln.h bwt.h stdaln.h bntseq.h kvec.h kstring.h -bwase4.o: bwatpx.h bwtaln.h bwt.h stdaln.h bntseq.h kvec.h kstring.h -bwaseio1.o: bwatpx.h bwtaln.h bwt.h stdaln.h bntseq.h kvec.h kstring.h -bwaseqio.o: bwtaln.h bwt.h stdaln.h utils.h bamlite.h kseq.h -bwt.o: utils.h bwt.h +bamlite.o: bamlite.h utils.h +bntseq.o: bntseq.h kseq.h main.h utils.h +bwape.o: bntseq.h bwatpx.h bwt.h bwtaln.h khash.h ksort.h kstring.h kvec.h +bwape.o: stdaln.h utils.h +bwape1.o: bntseq.h bwatpx.h bwt.h bwtaln.h khash.h ksort.h kstring.h kvec.h +bwape1.o: stdaln.h +bwape2.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwape3.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwape4.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwapeio1.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwapeio1.o: utils.h +bwapese1.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwase.o: bntseq.h bwase.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwase.o: utils.h +bwase1.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwase4.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwaseio1.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwaseio1.o: utils.h +bwaseqio.o: bamlite.h bwt.h bwtaln.h kseq.h stdaln.h utils.h +bwatpx.o: bntseq.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwt.o: bwt.h utils.h bwt_lite.o: bwt_lite.h -bwtaln.o: bwtaln.h bwt.h stdaln.h bwtgap.h utils.h -bwtgap.o: bwtgap.h bwt.h bwtaln.h stdaln.h +bwtaln.o: bwt.h bwtaln.h bwtgap.h stdaln.h utils.h +bwtgap.o: bwt.h bwtaln.h bwtgap.h stdaln.h bwtindex.o: bntseq.h bwt.h main.h utils.h bwtio.o: bwt.h utils.h -bwtmisc.o: bntseq.h utils.h main.h bwt.h -bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h stdaln.h kstring.h -bwtsw2_aux.o: kseq.h ksort.h -bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h ksort.h -bwtsw2_core.o: bwt_lite.h bwtsw2.h bntseq.h bwt.h kvec.h khash.h ksort.h -bwtsw2_main.o: bwt.h bwtsw2.h bntseq.h bwt_lite.h utils.h -cs2nt.o: bwtaln.h bwt.h stdaln.h +bwtmisc.o: bntseq.h bwt.h main.h utils.h +bwtsw2.o: bntseq.h bwt.h bwt_lite.h +bwtsw2_aux.o: bntseq.h bwt.h bwt_lite.h bwtsw2.h kseq.h ksort.h kstring.h +bwtsw2_aux.o: stdaln.h utils.h +bwtsw2_chain.o: bntseq.h bwt.h bwt_lite.h bwtsw2.h ksort.h +bwtsw2_core.o: bntseq.h bwt.h bwt_lite.h bwtsw2.h khash.h ksort.h kvec.h +bwtsw2_main.o: bntseq.h bwt.h bwt_lite.h bwtsw2.h utils.h +cs2nt.o: bwt.h bwtaln.h stdaln.h kstring.o: kstring.h -main.o: main.h -simple_dp.o: stdaln.h utils.h kseq.h +main.o: main.h utils.h +simple_dp.o: kseq.h stdaln.h utils.h stdaln.o: stdaln.h utils.o: utils.h diff --git a/bamlite.c b/bamlite.c index 5aad3927..836510cd 100644 --- a/bamlite.c +++ b/bamlite.c @@ -2,6 +2,7 @@ #include #include #include +#include "utils.h" #include "bamlite.h" /********************* @@ -62,11 +63,11 @@ void bam_header_destroy(bam_header_t *header) if (header == 0) return; if (header->target_name) { for (i = 0; i < header->n_targets; ++i) - free(header->target_name[i]); + if (header->target_name[i]) free(header->target_name[i]); + if (header->target_len) free(header->target_len); free(header->target_name); - free(header->target_len); } - free(header->text); + if (header->text) free(header->text); free(header); } @@ -80,28 +81,33 @@ bam_header_t *bam_header_read(bamFile fp) magic_len = bam_read(fp, buf, 4); if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) { fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n"); - return 0; + return NULL; } header = bam_header_init(); // read plain text and the number of reference sequences - bam_read(fp, &header->l_text, 4); + if (bam_read(fp, &header->l_text, 4) != 4) goto fail; if (bam_is_be) bam_swap_endian_4p(&header->l_text); header->text = (char*)calloc(header->l_text + 1, 1); - bam_read(fp, header->text, header->l_text); - bam_read(fp, &header->n_targets, 4); + if (bam_read(fp, header->text, header->l_text) != header->l_text) goto fail; + if (bam_read(fp, &header->n_targets, 4) != 4) goto fail; if (bam_is_be) bam_swap_endian_4p(&header->n_targets); // read reference sequence names and lengths header->target_name = (char**)calloc(header->n_targets, sizeof(char*)); header->target_len = (uint32_t*)calloc(header->n_targets, 4); for (i = 0; i != header->n_targets; ++i) { - bam_read(fp, &name_len, 4); + if (bam_read(fp, &name_len, 4) != 4) goto fail; if (bam_is_be) bam_swap_endian_4p(&name_len); header->target_name[i] = (char*)calloc(name_len, 1); - bam_read(fp, header->target_name[i], name_len); - bam_read(fp, &header->target_len[i], 4); + if (bam_read(fp, header->target_name[i], name_len) != name_len) { + goto fail; + } + if (bam_read(fp, &header->target_len[i], 4) != 4) goto fail; if (bam_is_be) bam_swap_endian_4p(&header->target_len[i]); } return header; + fail: + bam_header_destroy(header); + return NULL; } static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data) diff --git a/bamlite.h b/bamlite.h index 167fa441..4f6a9f05 100644 --- a/bamlite.h +++ b/bamlite.h @@ -3,12 +3,13 @@ #include #include +#include "utils.h" typedef gzFile bamFile; -#define bam_open(fn, mode) gzopen(fn, mode) +#define bam_open(fn, mode) xzopen(fn, mode) #define bam_dopen(fd, mode) gzdopen(fd, mode) -#define bam_close(fp) gzclose(fp) -#define bam_read(fp, buf, size) gzread(fp, buf, size) +#define bam_close(fp) err_gzclose(fp) +#define bam_read(fp, buf, size) err_gzread(fp, buf, size) typedef struct { int32_t n_targets; diff --git a/bntseq.c b/bntseq.c index 21ba91f1..8a16950d 100644 --- a/bntseq.c +++ b/bntseq.c @@ -29,12 +29,13 @@ #include #include #include +#include #include "bntseq.h" #include "main.h" #include "utils.h" #include "kseq.h" -KSEQ_INIT(gzFile, gzread) +KSEQ_INIT(gzFile, err_gzread) unsigned char nst_nt4_table[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, @@ -63,25 +64,27 @@ void bns_dump(const bntseq_t *bns, const char *prefix) { // dump .ann strcpy(str, prefix); strcat(str, ".ann"); fp = xopen(str, "w"); - fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->seed); + err_fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->seed); for (i = 0; i != bns->n_seqs; ++i) { bntann1_t *p = bns->anns + i; - fprintf(fp, "%d %s", p->gi, p->name); - if (p->anno[0]) fprintf(fp, " %s\n", p->anno); - else fprintf(fp, "\n"); - fprintf(fp, "%lld %d %d\n", (long long)p->offset, p->len, p->n_ambs); + err_fprintf(fp, "%d %s", p->gi, p->name); + if (p->anno[0]) err_fprintf(fp, " %s\n", p->anno); + else err_fprintf(fp, "\n"); + err_fprintf(fp, "%lld %d %d\n", (long long)p->offset, p->len, p->n_ambs); } - fclose(fp); + err_fflush(fp); + err_fclose(fp); } { // dump .amb strcpy(str, prefix); strcat(str, ".amb"); fp = xopen(str, "w"); - fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->n_holes); + err_fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->n_holes); for (i = 0; i != bns->n_holes; ++i) { bntamb1_t *p = bns->ambs + i; - fprintf(fp, "%lld %d %c\n", (long long)p->offset, p->len, p->amb); + err_fprintf(fp, "%lld %d %c\n", (long long)p->offset, p->len, p->amb); } - fclose(fp); + err_fflush(fp); + err_fclose(fp); } } @@ -89,13 +92,16 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c { char str[1024]; FILE *fp; + const char *fname; bntseq_t *bns; long long xx; int i; + int scanres; bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); { // read .ann - fp = xopen(ann_filename, "r"); - fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed); + fp = xopen(fname = ann_filename, "r"); + scanres = fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed); + if (scanres != 3) goto badread; bns->l_pac = xx; bns->anns = (bntann1_t*)calloc(bns->n_seqs, sizeof(bntann1_t)); for (i = 0; i < bns->n_seqs; ++i) { @@ -103,39 +109,54 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c char *q = str; int c; // read gi and sequence name - fscanf(fp, "%u%s", &p->gi, str); + scanres = fscanf(fp, "%u%s", &p->gi, str); + if (scanres != 2) goto badread; p->name = strdup(str); // read fasta comments - while ((c = fgetc(fp)) != '\n' && c != EOF) *q++ = c; + while (str - q < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c; + while (c != '\n' && c != EOF) c = fgetc(fp); + if (c == EOF) { + scanres = EOF; + goto badread; + } *q = 0; if (q - str > 1) p->anno = strdup(str + 1); // skip leading space else p->anno = strdup(""); // read the rest - fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs); + scanres = fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs); + if (scanres != 3) goto badread; p->offset = xx; } - fclose(fp); + err_fclose(fp); } { // read .amb int64_t l_pac; int32_t n_seqs; - fp = xopen(amb_filename, "r"); - fscanf(fp, "%lld%d%d", &xx, &n_seqs, &bns->n_holes); + fp = xopen(fname = amb_filename, "r"); + scanres = fscanf(fp, "%lld%d%d", &xx, &n_seqs, &bns->n_holes); + if (scanres != 3) goto badread; l_pac = xx; xassert(l_pac == bns->l_pac && n_seqs == bns->n_seqs, "inconsistent .ann and .amb files."); bns->ambs = (bntamb1_t*)calloc(bns->n_holes, sizeof(bntamb1_t)); for (i = 0; i < bns->n_holes; ++i) { bntamb1_t *p = bns->ambs + i; - fscanf(fp, "%lld%d%s", &xx, &p->len, str); + scanres = fscanf(fp, "%lld%d%s", &xx, &p->len, str); + if (scanres != 3) goto badread; p->offset = xx; p->amb = str[0]; } - fclose(fp); + err_fclose(fp); } { // open .pac bns->fp_pac = xopen(pac_filename, "rb"); } return bns; + + badread: + if (EOF == scanres) { + err_fatal(__func__, "Error reading %s : %s\n", fname, ferror(fp) ? strerror(errno) : "Unexpected end of file"); + } + err_fatal(__func__, "Parse error reading %s\n", fname); } bntseq_t *bns_restore(const char *prefix) @@ -152,7 +173,7 @@ void bns_destroy(bntseq_t *bns) if (bns == 0) return; else { int i; - if (bns->fp_pac) fclose(bns->fp_pac); + if (bns->fp_pac) err_fclose(bns->fp_pac); free(bns->ambs); for (i = 0; i < bns->n_seqs; ++i) { free(bns->anns[i].name); @@ -224,7 +245,7 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix) { // fill buffer if (c >= 4) c = lrand48()&0x3; if (l_buf == 0x40000) { - fwrite(buf, 1, 0x10000, fp); + err_fwrite(buf, 1, 0x10000, fp); memset(buf, 0, 0x10000); l_buf = 0; } @@ -239,16 +260,17 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix) ret = bns->l_pac; { // finalize .pac file ubyte_t ct; - fwrite(buf, 1, (l_buf>>2) + ((l_buf&3) == 0? 0 : 1), fp); + err_fwrite(buf, 1, (l_buf>>2) + ((l_buf&3) == 0? 0 : 1), fp); // the following codes make the pac file size always (l_pac/4+1+1) if (bns->l_pac % 4 == 0) { ct = 0; - fwrite(&ct, 1, 1, fp); + err_fwrite(&ct, 1, 1, fp); } ct = bns->l_pac % 4; - fwrite(&ct, 1, 1, fp); + err_fwrite(&ct, 1, 1, fp); // close .pac file - fclose(fp); + err_fflush(fp); + err_fclose(fp); } bns_dump(bns, prefix); bns_destroy(bns); @@ -265,7 +287,7 @@ int bwa_fa2pac(int argc, char *argv[]) } fp = xzopen(argv[1], "r"); bns_fasta2bntseq(fp, (argc < 3)? argv[1] : argv[2]); - gzclose(fp); + err_gzclose(fp); return 0; } diff --git a/bwape.c b/bwape.c index 9ad28f83..b52ff670 100644 --- a/bwape.c +++ b/bwape.c @@ -244,14 +244,14 @@ int bwa_cal_pac_pos_pe(const char *prefix, bwt_t *const _bwt[2], int n_seqs, bwa p[j]->n_multi = 0; p[j]->extra_flag |= SAM_FPD | (j == 0? SAM_FR1 : SAM_FR2); - fread(&n_aln, 4, 1, fp_sa[j]); + err_fread_noeof(&n_aln, 4, 1, fp_sa[j]); if (n_aln > kv_max(d[0]->aln[j])) kv_resize(bwt_aln1_t, d[0]->aln[j], n_aln); d[0]->aln[j].n = n_aln; - fread(d[0]->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa[j]); + err_fread_noeof(d[0]->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa[j]); kv_copy(bwt_aln1_t, buf[j][i].aln, d[0]->aln[j]); // backup d[0]->aln[j] @@ -550,8 +550,8 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs, // load reference sequence if (_pacseq == 0) { pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1); - rewind(bns->fp_pac); - fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); + err_rewind(bns->fp_pac); + err_fread_noeof(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); } else pacseq = (ubyte_t*)_pacseq; if (!popt->is_sw || ii->avg < 0.0) return pacseq; @@ -686,10 +686,10 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f #endif // ! _USE_LOCAL_GHASH last_ii.avg = -1.0; - fread(&opt, sizeof(gap_opt_t), 1, fp_sa[0]); + err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa[0]); ks[0] = bwa_open_reads(opt.mode, fn_fa[0]); opt0 = opt; - fread(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten! + err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten! ks[1] = bwa_open_reads(opt.mode, fn_fa[1]); if (!(opt.mode & BWA_MODE_COMPREAD)) { popt->type = BWA_PET_SOLID; @@ -701,8 +701,8 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f strcpy(str, prefix); strcat(str, ".rbwt"); bwt[1] = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt[1]); pac = (ubyte_t*)calloc(bns->l_pac/4+1, 1); - rewind(bns->fp_pac); - fread(pac, 1, bns->l_pac/4+1, bns->fp_pac); + err_rewind(bns->fp_pac); + err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac); } } @@ -889,7 +889,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f if (ntbns) bns_destroy(ntbns); for (i = 0; i < 2; ++i) { bwa_seq_close(ks[i]); - fclose(fp_sa[i]); + err_fclose(fp_sa[i]); } #ifndef _USE_LOCAL_GHASH @@ -980,7 +980,7 @@ int bwa_sai2sam_pe(int argc, char *argv[]) } bwa_sai2sam_pe_core(argv[optind], argv + optind + 1, argv + optind+3, popt); - + err_fflush(stdout); free(bwa_rg_line); free(bwa_rg_id); free(popt); diff --git a/bwapeio1.c b/bwapeio1.c index 9e35cb9e..be046ea2 100644 --- a/bwapeio1.c +++ b/bwapeio1.c @@ -7,6 +7,7 @@ #include #include "bwatpx.h" +#include "utils.h" extern int num_sampe_threads; extern int async_read_seq; @@ -94,14 +95,14 @@ static void thr_bwa_read_seq2_tpx(long n_needed) p[j]->n_multi = 0; p[j]->extra_flag |= SAM_FPD | (j == 0? SAM_FR1 : SAM_FR2); - fread(&n_aln, 4, 1, fp_sa_addr[j]); + err_fread_noeof(&n_aln, 4, 1, fp_sa_addr[j]); if (n_aln > kv_max(d_addr[0]->aln[j])) kv_resize(bwt_aln1_t, d_addr[0]->aln[j], n_aln); d_addr[0]->aln[j].n = n_aln; - fread(d_addr[0]->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa_addr[j]); + err_fread_noeof(d_addr[0]->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa_addr[j]); kv_copy(bwt_aln1_t, buf[j][i].aln, d_addr[0]->aln[j]); // backup d_addr[0]->aln[j] diff --git a/bwase.c b/bwase.c index d0da8ea3..c50b8eb2 100644 --- a/bwase.c +++ b/bwase.c @@ -524,14 +524,14 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t if (ntbns) { // in color space ntpac = (ubyte_t*)calloc(ntbns->l_pac/4+1, 1); - rewind(ntbns->fp_pac); - fread(ntpac, 1, ntbns->l_pac/4 + 1, ntbns->fp_pac); + err_rewind(ntbns->fp_pac); + err_fread_noeof(ntpac, 1, ntbns->l_pac/4 + 1, ntbns->fp_pac); } if (!_pacseq) { pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1); - rewind(bns->fp_pac); - fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); + err_rewind(bns->fp_pac); + err_fread_noeof(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); } else pacseq = _pacseq; // tpx @@ -654,18 +654,18 @@ void bwa_print_sam1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, i } else flag |= SAM_FMU; } - printf("%s\t%d\t%s\t", p->name, flag, bns->anns[seqid].name); - printf("%d\t%d\t", (int)(p->pos - bns->anns[seqid].offset + 1), p->mapQ); + err_printf("%s\t%d\t%s\t", p->name, flag, bns->anns[seqid].name); + err_printf("%d\t%d\t", (int)(p->pos - bns->anns[seqid].offset + 1), p->mapQ); // print CIGAR if (p->cigar) { for (j = 0; j != p->n_cigar; ++j) { - printf("%d%c", __cigar_len(p->cigar[j]), "MIDS"[__cigar_op(p->cigar[j])]); + err_printf("%d%c", __cigar_len(p->cigar[j]), "MIDS"[__cigar_op(p->cigar[j])]); } } else if (p->type == BWA_TYPE_NO_MATCH) { - printf("*"); + err_printf("*"); } else { - printf("%dM", p->len); + err_printf("%dM", p->len); } // print mate coordinate @@ -675,14 +675,14 @@ void bwa_print_sam1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, i am = mate->seQ < p->seQ? mate->seQ : p->seQ; // smaller single-end mapping quality // redundant calculation here, but should not matter too much m_is_N = bns_coor_pac2real(bns, mate->pos, mate->len, &m_seqid); - printf("\t%s\t", (seqid == m_seqid)? "=" : bns->anns[m_seqid].name); + err_printf("\t%s\t", (seqid == m_seqid)? "=" : bns->anns[m_seqid].name); isize = (seqid == m_seqid)? pos_5(mate) - pos_5(p) : 0; if (p->type == BWA_TYPE_NO_MATCH) isize = 0; - printf("%d\t%lld\t", (int)(mate->pos - bns->anns[m_seqid].offset + 1), isize); + err_printf("%d\t%lld\t", (int)(mate->pos - bns->anns[m_seqid].offset + 1), isize); } else if (mate) { - printf("\t=\t%d\t0\t", (int)(p->pos - bns->anns[seqid].offset + 1)); + err_printf("\t=\t%d\t0\t", (int)(p->pos - bns->anns[seqid].offset + 1)); } else { - printf("\t*\t0\t0\t"); + err_printf("\t*\t0\t0\t"); } // print sequence and quality @@ -702,19 +702,19 @@ void bwa_print_sam1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, i if (p->strand) { seq_reverse(p->len, p->qual, 0); // reverse quality } - printf("%s", p->qual); + err_printf("%s", p->qual); } else { - printf("*"); + err_printf("*"); } if (bwa_rg_id) { - printf("\tRG:Z:%s", bwa_rg_id); + err_printf("\tRG:Z:%s", bwa_rg_id); } if (p->bc[0]) { - printf("\tBC:Z:%s", p->bc); + err_printf("\tBC:Z:%s", p->bc); } if (p->clip_len < p->full_len) { - printf("\tXC:i:%d", p->clip_len); + err_printf("\tXC:i:%d", p->clip_len); } if (p->type != BWA_TYPE_NO_MATCH) { int i; @@ -722,30 +722,30 @@ void bwa_print_sam1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, i XT = "NURM"[p->type]; if (nn > 10) XT = 'N'; // print tags - printf("\tXT:A:%c\t%s:i:%d", XT, (mode & BWA_MODE_COMPREAD)? "NM" : "CM", p->nm); - if (nn) printf("\tXN:i:%d", nn); - if (mate) printf("\tSM:i:%d\tAM:i:%d", p->seQ, am); + err_printf("\tXT:A:%c\t%s:i:%d", XT, (mode & BWA_MODE_COMPREAD)? "NM" : "CM", p->nm); + if (nn) err_printf("\tXN:i:%d", nn); + if (mate) err_printf("\tSM:i:%d\tAM:i:%d", p->seQ, am); if (p->type != BWA_TYPE_MATESW) { // X0 and X1 are not available for this type of alignment - printf("\tX0:i:%d", p->c1); - if (p->c1 <= max_top2) printf("\tX1:i:%d", p->c2); + err_printf("\tX0:i:%d", p->c1); + if (p->c1 <= max_top2) err_printf("\tX1:i:%d", p->c2); } - printf("\tXM:i:%d\tXO:i:%d\tXG:i:%d", p->n_mm, p->n_gapo, p->n_gapo+p->n_gape); - if (p->md) printf("\tMD:Z:%s", p->md); + err_printf("\tXM:i:%d\tXO:i:%d\tXG:i:%d", p->n_mm, p->n_gapo, p->n_gapo+p->n_gape); + if (p->md) err_printf("\tMD:Z:%s", p->md); // print multiple hits if (p->n_multi) { - printf("\tXA:Z:"); + err_printf("\tXA:Z:"); for (i = 0; i < p->n_multi; ++i) { bwt_multi1_t *q = p->multi + i; int k; j = pos_end_multi(q, p->len) - q->pos; nn = bns_coor_pac2real(bns, q->pos, j, &seqid); - printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+', + err_printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+', (int)(q->pos - bns->anns[seqid].offset + 1)); if (q->cigar) { for (k = 0; k < q->n_cigar; ++k) - printf("%d%c", __cigar_len(q->cigar[k]), "MIDS"[__cigar_op(q->cigar[k])]); - } else printf("%dM", p->len); - printf(",%d;", q->gap + q->mm); + err_printf("%d%c", __cigar_len(q->cigar[k]), "MIDS"[__cigar_op(q->cigar[k])]); + } else err_printf("%dM", p->len); + err_printf(",%d;", q->gap + q->mm); } } } @@ -758,7 +758,7 @@ void bwa_print_sam1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, i int flag = p->extra_flag | SAM_FSU; if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU; - printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag); + err_printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag); for (j = 0; j != p->len; ++j) { putchar("ACGTN"[(int)s[j]]); @@ -770,19 +770,19 @@ void bwa_print_sam1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, i if (p->strand) { seq_reverse(p->len, p->qual, 0); // reverse quality } - printf("%s", p->qual); + err_printf("%s", p->qual); } else { - printf("*"); + err_printf("*"); } if (bwa_rg_id) { - printf("\tRG:Z:%s", bwa_rg_id); + err_printf("\tRG:Z:%s", bwa_rg_id); } if (p->bc[0]) { - printf("\tBC:Z:%s", p->bc); + err_printf("\tBC:Z:%s", p->bc); } if (p->clip_len < p->full_len) { - printf("\tXC:i:%d", p->clip_len); + err_printf("\tXC:i:%d", p->clip_len); } putchar('\n'); @@ -1236,10 +1236,10 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, } if(num_mut > 0){ - printf("%s/%d\t%s\t%s\t%d\t%c\t%d\t%c\t%s\t%d\t%s\t%dM\t%s\n", + err_printf("%s/%d\t%s\t%s\t%d\t%c\t%d\t%c\t%s\t%d\t%s\t%dM\t%s\n", samf1,samf2a,samf10,samf11,samf5a,samf2b,p->full_len,samf2c,samf3,samf4,sam_mutstr,p->full_len,sam_mdzstr); }else{ - printf("%s/%d\t%s\t%s\t%d\t%c\t%d\t%c\t%s\t%d\t0\t%dM\t%s\n", + err_printf("%s/%d\t%s\t%s\t%d\t%c\t%d\t%c\t%s\t%d\t0\t%dM\t%s\n", samf1,samf2a,samf10,samf11,samf5a,samf2b,p->full_len,samf2c,samf3,samf4,p->full_len,sam_mdzstr); } @@ -1376,7 +1376,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, samf5a = 1; } - printf("%s/%d\t%s\t%s\t%d\t%c\t%d\t%c\t%s\t%d\t0\t%dM\t%d\n", + err_printf("%s/%d\t%s\t%s\t%d\t%c\t%d\t%c\t%s\t%d\t0\t%dM\t%d\n", samf1,samf2a,samf10,samf11,samf5a,samf2b,p->full_len,samf2c,samf3,samf4,p->full_len,p->len); } @@ -1416,8 +1416,8 @@ void bwa_print_sam_SQ(const bntseq_t *bns) } for (i = 0; i < bns->n_seqs; ++i) - printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[i].name, bns->anns[i].len); - if (bwa_rg_line) printf("%s\n", bwa_rg_line); + err_printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[i].name, bns->anns[i].len); + if (bwa_rg_line) err_printf("%s\n", bwa_rg_line); } void bwase_initialize() @@ -1516,7 +1516,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f srand48(bns->seed); fp_sa = xopen(fn_sa, "r"); - fread(&opt, sizeof(gap_opt_t), 1, fp_sa); + err_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa); if (!(opt.mode & BWA_MODE_COMPREAD)) // in color space; initialize ntpac ntbns = bwa_open_nt(prefix); @@ -1599,14 +1599,14 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f bwa_seq_t *p = seqs[nexti1] + i; int n_aln; - fread(&n_aln, 4, 1, fp_sa); + err_fread_noeof(&n_aln, 4, 1, fp_sa); if (n_aln > m_aln) { m_aln = n_aln; aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln); } - fread(aln, sizeof(bwt_aln1_t), n_aln, fp_sa); + err_fread_noeof(aln, sizeof(bwt_aln1_t), n_aln, fp_sa); bwa_aln2seq_core(n_aln, aln, p, 1, n_occ); } @@ -1688,7 +1688,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f bwa_seq_close(ks); if (ntbns) bns_destroy(ntbns); bns_destroy(bns); - fclose(fp_sa); + err_fclose(fp_sa); return; } @@ -1741,7 +1741,7 @@ int bwa_sai2sam_se(int argc, char *argv[]) } bwa_sai2sam_se_core(argv[optind], argv[optind+1], argv[optind+2], n_occ); - + err_fflush(stdout); free(bwa_rg_line); free(bwa_rg_id); // cant use getrusage for ru.maxrss until kernel 2.6.36 ... diff --git a/bwaseio1.c b/bwaseio1.c index cfc97152..fa0712fd 100644 --- a/bwaseio1.c +++ b/bwaseio1.c @@ -7,6 +7,7 @@ #include #include "bwatpx.h" +#include "utils.h" extern int num_sampe_threads; extern int async_read_seq; @@ -48,14 +49,14 @@ static void thr_bwa_read_seq1_tpx(long n_needed) bwa_seq_t *p = *seq1_addr + i; int n_aln; - fread(&n_aln, 4, 1, fp_sa_addr); + err_fread_noeof(&n_aln, 4, 1, fp_sa_addr); if (n_aln > m_aln_copy) { m_aln_copy = n_aln; *aln_addr = (bwt_aln1_t*)realloc(*aln_addr, sizeof(bwt_aln1_t) * m_aln_copy); } - fread(*aln_addr, sizeof(bwt_aln1_t), n_aln, fp_sa_addr); + err_fread_noeof(*aln_addr, sizeof(bwt_aln1_t), n_aln, fp_sa_addr); bwa_aln2seq_core(n_aln, *aln_addr, p, 1, n_occ_copy); } diff --git a/bwaseqio.c b/bwaseqio.c index 967b3cd0..835f9877 100644 --- a/bwaseqio.c +++ b/bwaseqio.c @@ -5,7 +5,7 @@ #include "bamlite.h" #include "kseq.h" -KSEQ_INIT(gzFile, gzread) +KSEQ_INIT(gzFile, err_gzread) extern unsigned char nst_nt4_table[256]; static char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 }; @@ -46,7 +46,7 @@ void bwa_seq_close(bwa_seqio_t *bs) if (bs == 0) return; if (bs->is_bam) bam_close(bs->fp); else { - gzclose(bs->ks->f->f); + err_gzclose(bs->ks->f->f); kseq_destroy(bs->ks); } free(bs); diff --git a/bwt_gen/Makefile b/bwt_gen/Makefile index f7e669e6..5dc4513d 100644 --- a/bwt_gen/Makefile +++ b/bwt_gen/Makefile @@ -2,7 +2,7 @@ CC= gcc CFLAGS= -g -Wall -O2 -m64 # comment out `-m64' for 32-bit compilation DFLAGS= -D_FILE_OFFSET_BITS=64 OBJS= bwt_gen.o QSufSort.o -INCLUDES= +INCLUDES= -I. -I.. VERSION= 0.1.0 LIBS= SUBDIRS= @@ -21,3 +21,6 @@ cleanlocal: rm -f gmon.out *.o a.out $(PROG) *~ *.a clean:cleanlocal + +QSufSort.o: QSufSort.h bwt_gen.h +bwt_gen.o: QSufSort.h bwt_gen.h diff --git a/bwt_gen/bwt_gen.c b/bwt_gen/bwt_gen.c index d208a81f..8f00463e 100644 --- a/bwt_gen/bwt_gen.c +++ b/bwt_gen/bwt_gen.c @@ -27,6 +27,7 @@ #include #include "bwt_gen.h" #include "QSufSort.h" +#include "utils.h" static unsigned int TextLengthFromBytePacked(unsigned int bytePackedLength, unsigned int bitPerChar, unsigned int lastByteLength) @@ -1407,13 +1408,13 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, const float targetN exit(1); } - fseek(packedFile, -1, SEEK_END); - packedFileLen = ftell(packedFile); + err_fseek(packedFile, -1, SEEK_END); + packedFileLen = err_ftell(packedFile); if ((int)packedFileLen < 0) { fprintf(stderr, "BWTIncConstructFromPacked: Cannot determine file length!\n"); exit(1); } - fread(&lastByteLength, sizeof(unsigned char), 1, packedFile); + err_fread_noeof(&lastByteLength, sizeof(unsigned char), 1, packedFile); totalTextLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength); bwtInc = BWTIncCreate(totalTextLength, targetNBit, initialMaxBuildSize, incMaxBuildSize); @@ -1427,10 +1428,10 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, const float targetN } textSizeInByte = textToLoad / CHAR_PER_BYTE; // excluded the odd byte - fseek(packedFile, -2, SEEK_CUR); - fseek(packedFile, -((int)textSizeInByte), SEEK_CUR); - fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile); - fseek(packedFile, -((int)textSizeInByte + 1), SEEK_CUR); + err_fseek(packedFile, -2, SEEK_CUR); + err_fseek(packedFile, -((int)textSizeInByte), SEEK_CUR); + err_fread_noeof(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile); + err_fseek(packedFile, -((int)textSizeInByte + 1), SEEK_CUR); ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad); BWTIncConstruct(bwtInc, textToLoad); @@ -1443,9 +1444,9 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, const float targetN textToLoad = totalTextLength - processedTextLength; } textSizeInByte = textToLoad / CHAR_PER_BYTE; - fseek(packedFile, -((int)textSizeInByte), SEEK_CUR); - fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile); - fseek(packedFile, -((int)textSizeInByte), SEEK_CUR); + err_fseek(packedFile, -((int)textSizeInByte), SEEK_CUR); + err_fread_noeof(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile); + err_fseek(packedFile, -((int)textSizeInByte), SEEK_CUR); ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad); BWTIncConstruct(bwtInc, textToLoad); processedTextLength += textToLoad; @@ -1498,11 +1499,12 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o exit(1); } - fwrite(&bwt->inverseSa0, sizeof(unsigned int), 1, bwtFile); - fwrite(bwt->cumulativeFreq + 1, sizeof(unsigned int), ALPHABET_SIZE, bwtFile); + err_fwrite(&bwt->inverseSa0, sizeof(unsigned int), 1, bwtFile); + err_fwrite(bwt->cumulativeFreq + 1, sizeof(unsigned int), ALPHABET_SIZE, bwtFile); bwtLength = BWTFileSizeInWord(bwt->textLength); - fwrite(bwt->bwtCode, sizeof(unsigned int), bwtLength, bwtFile); - fclose(bwtFile); + err_fwrite(bwt->bwtCode, sizeof(unsigned int), bwtLength, bwtFile); + err_fflush(bwtFile); + err_fclose(bwtFile); /* occValueFile = (FILE*)fopen(occValueFileName, "wb"); if (occValueFile == NULL) { diff --git a/bwtaln.c b/bwtaln.c index 75dd6765..09ff1d8e 100644 --- a/bwtaln.c +++ b/bwtaln.c @@ -201,7 +201,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) } // core loop - fwrite(opt, sizeof(gap_opt_t), 1, stdout); + err_fwrite(opt, sizeof(gap_opt_t), 1, stdout); while ((seqs = bwa_read_seq(ks, n_needed, &n_seqs, opt->mode, opt->trim_qual)) != 0) { tot_seqs += n_seqs; t = clock(); @@ -277,17 +277,17 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) } } - fwrite(&nhits, 4, 1, stdout); + err_fwrite(&nhits, 4, 1, stdout); if(nhits > 0){ - fwrite(ps, sizeof(bwt_aln1_t), nhits, stdout); + err_fwrite(ps, sizeof(bwt_aln1_t), nhits, stdout); } } #else // _REMOVE_SHADOW fprintf(stderr, "[bwa_aln_core] write to the disk... "); for (i = 0; i < n_seqs; ++i) { bwa_seq_t *p = seqs + i; - fwrite(&p->n_aln, 4, 1, stdout); - if (p->n_aln) fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout); + err_fwrite(&p->n_aln, 4, 1, stdout); + if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout); } #endif // _REMOVE_SHADOW fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); @@ -395,6 +395,7 @@ int bwa_aln(int argc, char *argv[]) } } bwa_aln_core(argv[optind], argv[optind+1], opt); + err_fflush(stdout); free(opt); // cant use getrusage for ru.maxrss until kernel 2.6.36 ... diff --git a/bwtindex.c b/bwtindex.c index c752a2f7..929aa226 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -82,7 +82,7 @@ int bwa_index(int argc, char *argv[]) fprintf(stderr, "[bwa_index] Pack FASTA... "); l_pac = bns_fasta2bntseq(fp, prefix); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); - gzclose(fp); + err_gzclose(fp); } else { // color indexing gzFile fp = xzopen(argv[optind], "r"); strcat(strcpy(str, prefix), ".nt"); @@ -90,7 +90,7 @@ int bwa_index(int argc, char *argv[]) fprintf(stderr, "[bwa_index] Pack nucleotide FASTA... "); l_pac = bns_fasta2bntseq(fp, str); fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); - gzclose(fp); + err_gzclose(fp); { char *tmp_argv[3]; tmp_argv[0] = argv[0]; tmp_argv[1] = str; tmp_argv[2] = prefix; diff --git a/bwtio.c b/bwtio.c index c5ffcae1..05eb326c 100644 --- a/bwtio.c +++ b/bwtio.c @@ -8,22 +8,24 @@ void bwt_dump_bwt(const char *fn, const bwt_t *bwt) { FILE *fp; fp = xopen(fn, "wb"); - fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); - fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); - fwrite(bwt->bwt, sizeof(bwtint_t), bwt->bwt_size, fp); - fclose(fp); + err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); + err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); + err_fwrite(bwt->bwt, sizeof(bwtint_t), bwt->bwt_size, fp); + err_fflush(fp); + err_fclose(fp); } void bwt_dump_sa(const char *fn, const bwt_t *bwt) { FILE *fp; fp = xopen(fn, "wb"); - fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); - fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); - fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); - fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp); - fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); - fclose(fp); + err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); + err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); + err_fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); + err_fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp); + err_fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); + err_fflush(fp); + err_fclose(fp); } void bwt_restore_sa(const char *fn, bwt_t *bwt) @@ -33,19 +35,19 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt) bwtint_t primary; fp = xopen(fn, "rb"); - fread(&primary, sizeof(bwtint_t), 1, fp); + err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp); xassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same."); - fread(skipped, sizeof(bwtint_t), 4, fp); // skip - fread(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); - fread(&primary, sizeof(bwtint_t), 1, fp); + err_fread_noeof(skipped, sizeof(bwtint_t), 4, fp); // skip + err_fread_noeof(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); + err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp); xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same."); bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv; bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); bwt->sa[0] = -1; - fread(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); - fclose(fp); + err_fread_noeof(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); + err_fclose(fp); } bwt_t *bwt_restore_bwt(const char *fn) @@ -55,15 +57,15 @@ bwt_t *bwt_restore_bwt(const char *fn) bwt = (bwt_t*)calloc(1, sizeof(bwt_t)); fp = xopen(fn, "rb"); - fseek(fp, 0, SEEK_END); - bwt->bwt_size = (ftell(fp) - sizeof(bwtint_t) * 5) >> 2; + err_fseek(fp, 0, SEEK_END); + bwt->bwt_size = (err_ftell(fp) - sizeof(bwtint_t) * 5) >> 2; bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4); - fseek(fp, 0, SEEK_SET); - fread(&bwt->primary, sizeof(bwtint_t), 1, fp); - fread(bwt->L2+1, sizeof(bwtint_t), 4, fp); - fread(bwt->bwt, 4, bwt->bwt_size, fp); + err_fseek(fp, 0, SEEK_SET); + err_fread_noeof(&bwt->primary, sizeof(bwtint_t), 1, fp); + err_fread_noeof(bwt->L2+1, sizeof(bwtint_t), 4, fp); + err_fread_noeof(bwt->bwt, 4, bwt->bwt_size, fp); bwt->seq_len = bwt->L2[4]; - fclose(fp); + err_fclose(fp); bwt_gen_cnt_table(bwt); return bwt; diff --git a/bwtmisc.c b/bwtmisc.c index 1082065b..fbd7aacc 100644 --- a/bwtmisc.c +++ b/bwtmisc.c @@ -46,10 +46,10 @@ int64_t bwa_seq_len(const char *fn_pac) int64_t pac_len; ubyte_t c; fp = xopen(fn_pac, "rb"); - fseek(fp, -1, SEEK_END); - pac_len = ftell(fp); - fread(&c, 1, 1, fp); - fclose(fp); + err_fseek(fp, -1, SEEK_END); + pac_len = err_ftell(fp); + err_fread_noeof(&c, 1, 1, fp); + err_fclose(fp); return (pac_len - 1) * 4 + (int)c; } @@ -69,8 +69,8 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is) // prepare sequence pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1); buf2 = (ubyte_t*)calloc(pac_size, 1); - fread(buf2, 1, pac_size, fp); - fclose(fp); + err_fread_noeof(buf2, 1, pac_size, fp); + err_fclose(fp); memset(bwt->L2, 0, 5 * 4); buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1); for (i = 0; i < bwt->seq_len; ++i) { @@ -168,8 +168,8 @@ void bwa_pac_rev_core(const char *fn, const char *fn_rev) bufin = (ubyte_t*)calloc(pac_len, 1); bufout = (ubyte_t*)calloc(pac_len, 1); fp = xopen(fn, "rb"); - fread(bufin, 1, pac_len, fp); - fclose(fp); + err_fread_noeof(bufin, 1, pac_len, fp); + err_fclose(fp); for (i = seq_len - 1, j = 0; i >= 0; --i) { int c = bufin[i>>2] >> ((~i&3)<<1) & 3; bwtint_t j = seq_len - 1 - i; @@ -177,10 +177,11 @@ void bwa_pac_rev_core(const char *fn, const char *fn_rev) } free(bufin); fp = xopen(fn_rev, "wb"); - fwrite(bufout, 1, pac_len, fp); + err_fwrite(bufout, 1, pac_len, fp); ct = seq_len % 4; - fwrite(&ct, 1, 1, fp); - fclose(fp); + err_fwrite(&ct, 1, 1, fp); + err_fflush(fp); + err_fclose(fp); free(bufout); } @@ -206,8 +207,8 @@ uint8_t *bwa_pac2cspac_core(const bntseq_t *bns) int c1, c2; pac = (uint8_t*)calloc(bns->l_pac/4 + 1, 1); cspac = (uint8_t*)calloc(bns->l_pac/4 + 1, 1); - fread(pac, 1, bns->l_pac/4+1, bns->fp_pac); - rewind(bns->fp_pac); + err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac); + err_rewind(bns->fp_pac); c1 = pac[0]>>6; cspac[0] = c1<<6; for (i = 1; i < bns->l_pac; ++i) { c2 = pac[i>>2] >> (~i&3)*2 & 3; @@ -236,10 +237,11 @@ int bwa_pac2cspac(int argc, char *argv[]) str = (char*)calloc(strlen(argv[2]) + 5, 1); strcat(strcpy(str, argv[2]), ".pac"); fp = xopen(str, "wb"); - fwrite(cspac, 1, bns->l_pac/4 + 1, fp); + err_fwrite(cspac, 1, bns->l_pac/4 + 1, fp); ct = bns->l_pac % 4; - fwrite(&ct, 1, 1, fp); - fclose(fp); + err_fwrite(&ct, 1, 1, fp); + err_fflush(fp); + err_fclose(fp); bns_destroy(bns); free(cspac); return 0; diff --git a/bwtsw2_aux.c b/bwtsw2_aux.c index 8ba6455d..c9a95924 100644 --- a/bwtsw2_aux.c +++ b/bwtsw2_aux.c @@ -15,7 +15,7 @@ #include "kstring.h" #include "kseq.h" -KSEQ_INIT(gzFile, gzread) +KSEQ_INIT(gzFile, err_gzread) #include "ksort.h" #define __left_lt(a, b) ((a).end > (b).end) @@ -581,12 +581,12 @@ static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t * // print and reset for (i = 0; i < _seq->n; ++i) { bsw2seq1_t *p = _seq->seq + i; - if (p->sam) printf("%s", p->sam); + if (p->sam) err_printf("%s", p->sam); free(p->name); free(p->seq); free(p->qual); free(p->sam); p->tid = -1; p->l = 0; p->name = p->seq = p->qual = p->sam = 0; } - fflush(stdout); + err_fflush(stdout); _seq->n = 0; } @@ -604,8 +604,8 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target[2] return; } for (l = 0; l < bns->n_seqs; ++l) - printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len); - fread(pac, 1, bns->l_pac/4+1, bns->fp_pac); + err_printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len); + err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac); fp = xzopen(fn, "r"); ks = kseq_init(fp); _seq = calloc(1, sizeof(bsw2seq_t)); @@ -633,6 +633,6 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target[2] process_seqs(_seq, opt, bns, pac, target); free(_seq->seq); free(_seq); kseq_destroy(ks); - gzclose(fp); + err_gzclose(fp); free(pac); } diff --git a/main.c b/main.c index 034742a6..cd4766d5 100644 --- a/main.c +++ b/main.c @@ -67,13 +67,14 @@ void bwa_print_sam_PG() return; } - printf("@PG\tID:bwa\tPN:bwa\tVN:%s\n", PACKAGE_VERSION); + err_printf("@PG\tID:bwa\tPN:bwa\tVN:%s\n", PACKAGE_VERSION); } int main(int argc, char *argv[]) { struct timeval st; int j; + int ret; if (argc < 2) return usage(); @@ -94,25 +95,28 @@ int main(int argc, char *argv[]) // ------------------- - if (strcmp(argv[1], "fa2pac") == 0) return bwa_fa2pac(argc-1, argv+1); - else if (strcmp(argv[1], "pac2bwt") == 0) return bwa_pac2bwt(argc-1, argv+1); - else if (strcmp(argv[1], "pac2bwtgen") == 0) return bwt_bwtgen_main(argc-1, argv+1); - else if (strcmp(argv[1], "bwtupdate") == 0) return bwa_bwtupdate(argc-1, argv+1); - else if (strcmp(argv[1], "pac_rev") == 0) return bwa_pac_rev(argc-1, argv+1); - else if (strcmp(argv[1], "bwt2sa") == 0) return bwa_bwt2sa(argc-1, argv+1); - else if (strcmp(argv[1], "index") == 0) return bwa_index(argc-1, argv+1); - else if (strcmp(argv[1], "aln") == 0) return bwa_aln(argc-1, argv+1); - else if (strcmp(argv[1], "sw") == 0) return bwa_stdsw(argc-1, argv+1); - else if (strcmp(argv[1], "samse") == 0) return bwa_sai2sam_se(argc-1, argv+1); - else if (strcmp(argv[1], "sampe") == 0) return bwa_sai2sam_pe(argc-1, argv+1); - else if (strcmp(argv[1], "pac2cspac") == 0) return bwa_pac2cspac(argc-1, argv+1); - else if (strcmp(argv[1], "stdsw") == 0) return bwa_stdsw(argc-1, argv+1); - else if (strcmp(argv[1], "bwtsw2") == 0) return bwa_bwtsw2(argc-1, argv+1); - else if (strcmp(argv[1], "dbwtsw") == 0) return bwa_bwtsw2(argc-1, argv+1); - else if (strcmp(argv[1], "bwasw") == 0) return bwa_bwtsw2(argc-1, argv+1); + if (strcmp(argv[1], "fa2pac") == 0) ret = bwa_fa2pac(argc-1, argv+1); + else if (strcmp(argv[1], "pac2bwt") == 0) ret = bwa_pac2bwt(argc-1, argv+1); + else if (strcmp(argv[1], "pac2bwtgen") == 0) ret = bwt_bwtgen_main(argc-1, argv+1); + else if (strcmp(argv[1], "bwtupdate") == 0) ret = bwa_bwtupdate(argc-1, argv+1); + else if (strcmp(argv[1], "pac_rev") == 0) ret = bwa_pac_rev(argc-1, argv+1); + else if (strcmp(argv[1], "bwt2sa") == 0) ret = bwa_bwt2sa(argc-1, argv+1); + else if (strcmp(argv[1], "index") == 0) ret = bwa_index(argc-1, argv+1); + else if (strcmp(argv[1], "aln") == 0) ret = bwa_aln(argc-1, argv+1); + else if (strcmp(argv[1], "sw") == 0) ret = bwa_stdsw(argc-1, argv+1); + else if (strcmp(argv[1], "samse") == 0) ret = bwa_sai2sam_se(argc-1, argv+1); + else if (strcmp(argv[1], "sampe") == 0) ret = bwa_sai2sam_pe(argc-1, argv+1); + else if (strcmp(argv[1], "pac2cspac") == 0) ret = bwa_pac2cspac(argc-1, argv+1); + else if (strcmp(argv[1], "stdsw") == 0) ret = bwa_stdsw(argc-1, argv+1); + else if (strcmp(argv[1], "bwtsw2") == 0) ret = bwa_bwtsw2(argc-1, argv+1); + else if (strcmp(argv[1], "dbwtsw") == 0) ret = bwa_bwtsw2(argc-1, argv+1); + else if (strcmp(argv[1], "bwasw") == 0) ret = bwa_bwtsw2(argc-1, argv+1); else { fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]); return 1; } - return 0; + + err_fflush(stdout); + + return ret; } diff --git a/simple_dp.c b/simple_dp.c index 7c078c21..4663d6ae 100644 --- a/simple_dp.c +++ b/simple_dp.c @@ -8,7 +8,7 @@ #include "utils.h" #include "kseq.h" -KSEQ_INIT(gzFile, gzread) +KSEQ_INIT(gzFile, err_gzread) typedef struct { int l; @@ -80,7 +80,7 @@ static seqs_t *load_seqs(const char *fn) p->n = strdup((const char*)seq->name.s); } kseq_destroy(seq); - gzclose(fp); + err_gzclose(fp); fprintf(stderr, "[load_seqs] %d sequences are loaded.\n", s->n_seqs); return s; } @@ -94,14 +94,14 @@ static void aln_1seq(const seqs_t *ss, const char *name, int l, const char *s, c g_aln_param.band_width = l + p->l; aa = aln_stdaln_aux(s, (const char*)p->s, &g_aln_param, g_is_global, g_thres, l, p->l); if (aa->score >= g_thres || g_is_global) { - printf(">%s\t%d\t%d\t%s\t%c\t%d\t%d\t%d\t%d\t", p->n, aa->start1? aa->start1 : 1, aa->end1, name, strand, + err_printf(">%s\t%d\t%d\t%s\t%c\t%d\t%d\t%d\t%d\t", p->n, aa->start1? aa->start1 : 1, aa->end1, name, strand, aa->start2? aa->start2 : 1, aa->end2, aa->score, aa->subo); // NB: I put the short sequence as the first sequence in SW, an insertion to // the reference becomes a deletion from the short sequence. Therefore, I use // "MDI" here rather than "MID", and print ->out2 first rather than ->out1. for (i = 0; i != aa->n_cigar; ++i) - printf("%d%c", aa->cigar32[i]>>4, "MDI"[aa->cigar32[i]&0xf]); - printf("\n%s\n%s\n%s\n", aa->out2, aa->outm, aa->out1); + err_printf("%d%c", aa->cigar32[i]>>4, "MDI"[aa->cigar32[i]&0xf]); + err_printf("\n%s\n%s\n%s\n", aa->out2, aa->outm, aa->out1); } aln_free_AlnAln(aa); } @@ -123,7 +123,7 @@ static void aln_seqs(const seqs_t *ss, const char *fn) } } kseq_destroy(seq); - gzclose(fp); + err_gzclose(fp); } int bwa_stdsw(int argc, char *argv[]) From 6f0c56838b65947f62a099cf83ff4252a05e9208 Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Thu, 3 Jan 2013 12:16:52 +0000 Subject: [PATCH 3/4] Use wrapper functions to catch system errors - memory Use the wrapper functions in utils.c to catch system errors and exit non-zero when they occur. This commit adds wrappers for memory allocation functions (malloc, calloc, realloc and strdup). It also prevents gen_cigar from trying to realloc a buffer to zero bytes long. Updates the dependencies in the Makefile again. --- Makefile | 29 +++++++++-------- bamlite.c | 12 +++---- bamlite.h | 2 +- bntseq.c | 26 +++++++-------- bwape.c | 20 ++++++------ bwape1.c | 4 +-- bwapeio1.c | 8 ++--- bwapese1.c | 3 +- bwase.c | 80 +++++++++++++++++++++++------------------------ bwaseio1.c | 2 +- bwaseqio.c | 24 +++++++------- bwt.c | 2 +- bwt_gen/bwt_gen.c | 16 +++++----- bwt_lite.c | 11 ++++--- bwtaln.c | 16 +++++----- bwtgap.c | 13 ++++---- bwtindex.c | 10 +++--- bwtio.c | 6 ++-- bwtmisc.c | 20 ++++++------ bwtsw2_aux.c | 46 +++++++++++++-------------- bwtsw2_chain.c | 7 +++-- bwtsw2_core.c | 29 ++++++++--------- cs2nt.c | 3 +- is.c | 7 +++-- khash.h | 12 +++---- kseq.h | 12 +++---- ksort.h | 4 +-- kstring.c | 4 +-- kstring.h | 5 +-- kvec.h | 8 ++--- simple_dp.c | 10 +++--- stdaln.c | 37 +++++++++++----------- 32 files changed, 250 insertions(+), 238 deletions(-) diff --git a/Makefile b/Makefile index db5af6ac..37ca236b 100644 --- a/Makefile +++ b/Makefile @@ -59,37 +59,40 @@ bntseq.o: bntseq.h kseq.h main.h utils.h bwape.o: bntseq.h bwatpx.h bwt.h bwtaln.h khash.h ksort.h kstring.h kvec.h bwape.o: stdaln.h utils.h bwape1.o: bntseq.h bwatpx.h bwt.h bwtaln.h khash.h ksort.h kstring.h kvec.h -bwape1.o: stdaln.h -bwape2.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h -bwape3.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h -bwape4.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwape1.o: stdaln.h utils.h +bwape2.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h utils.h +bwape3.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h utils.h +bwape4.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h utils.h bwapeio1.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h bwapeio1.o: utils.h bwapese1.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwapese1.o: utils.h bwase.o: bntseq.h bwase.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h bwase.o: utils.h -bwase1.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h -bwase4.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwase1.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h utils.h +bwase4.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h utils.h bwaseio1.o: bntseq.h bwatpx.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h bwaseio1.o: utils.h bwaseqio.o: bamlite.h bwt.h bwtaln.h kseq.h stdaln.h utils.h -bwatpx.o: bntseq.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h +bwatpx.o: bntseq.h bwt.h bwtaln.h kstring.h kvec.h stdaln.h utils.h bwt.o: bwt.h utils.h -bwt_lite.o: bwt_lite.h +bwt_lite.o: bwt_lite.h utils.h bwtaln.o: bwt.h bwtaln.h bwtgap.h stdaln.h utils.h -bwtgap.o: bwt.h bwtaln.h bwtgap.h stdaln.h +bwtgap.o: bwt.h bwtaln.h bwtgap.h stdaln.h utils.h bwtindex.o: bntseq.h bwt.h main.h utils.h bwtio.o: bwt.h utils.h bwtmisc.o: bntseq.h bwt.h main.h utils.h bwtsw2.o: bntseq.h bwt.h bwt_lite.h bwtsw2_aux.o: bntseq.h bwt.h bwt_lite.h bwtsw2.h kseq.h ksort.h kstring.h bwtsw2_aux.o: stdaln.h utils.h -bwtsw2_chain.o: bntseq.h bwt.h bwt_lite.h bwtsw2.h ksort.h +bwtsw2_chain.o: bntseq.h bwt.h bwt_lite.h bwtsw2.h ksort.h utils.h bwtsw2_core.o: bntseq.h bwt.h bwt_lite.h bwtsw2.h khash.h ksort.h kvec.h +bwtsw2_core.o: utils.h bwtsw2_main.o: bntseq.h bwt.h bwt_lite.h bwtsw2.h utils.h -cs2nt.o: bwt.h bwtaln.h stdaln.h -kstring.o: kstring.h +cs2nt.o: bwt.h bwtaln.h stdaln.h utils.h +is.o: utils.h +kstring.o: kstring.h utils.h main.o: main.h utils.h simple_dp.o: kseq.h stdaln.h utils.h -stdaln.o: stdaln.h +stdaln.o: stdaln.h utils.h utils.o: utils.h diff --git a/bamlite.c b/bamlite.c index 836510cd..ec365d1c 100644 --- a/bamlite.c +++ b/bamlite.c @@ -54,7 +54,7 @@ int bam_is_be; bam_header_t *bam_header_init() { bam_is_be = bam_is_big_endian(); - return (bam_header_t*)calloc(1, sizeof(bam_header_t)); + return (bam_header_t*)xcalloc(1, sizeof(bam_header_t)); } void bam_header_destroy(bam_header_t *header) @@ -87,17 +87,17 @@ bam_header_t *bam_header_read(bamFile fp) // read plain text and the number of reference sequences if (bam_read(fp, &header->l_text, 4) != 4) goto fail; if (bam_is_be) bam_swap_endian_4p(&header->l_text); - header->text = (char*)calloc(header->l_text + 1, 1); + header->text = (char*)xcalloc(header->l_text + 1, 1); if (bam_read(fp, header->text, header->l_text) != header->l_text) goto fail; if (bam_read(fp, &header->n_targets, 4) != 4) goto fail; if (bam_is_be) bam_swap_endian_4p(&header->n_targets); // read reference sequence names and lengths - header->target_name = (char**)calloc(header->n_targets, sizeof(char*)); - header->target_len = (uint32_t*)calloc(header->n_targets, 4); + header->target_name = (char**)xcalloc(header->n_targets, sizeof(char*)); + header->target_len = (uint32_t*)xcalloc(header->n_targets, 4); for (i = 0; i != header->n_targets; ++i) { if (bam_read(fp, &name_len, 4) != 4) goto fail; if (bam_is_be) bam_swap_endian_4p(&name_len); - header->target_name[i] = (char*)calloc(name_len, 1); + header->target_name[i] = (char*)xcalloc(name_len, 1); if (bam_read(fp, header->target_name[i], name_len) != name_len) { goto fail; } @@ -152,7 +152,7 @@ int bam_read1(bamFile fp, bam1_t *b) if (b->m_data < b->data_len) { b->m_data = b->data_len; kroundup32(b->m_data); - b->data = (uint8_t*)realloc(b->data, b->m_data); + b->data = (uint8_t*)xrealloc(b->data, b->m_data); } if (bam_read(fp, b->data, b->data_len) != b->data_len) return -4; b->l_aux = b->data_len - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2; diff --git a/bamlite.h b/bamlite.h index 4f6a9f05..0c080fdc 100644 --- a/bamlite.h +++ b/bamlite.h @@ -72,7 +72,7 @@ typedef struct { #define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf) #define bam1_aux(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2) -#define bam_init1() ((bam1_t*)calloc(1, sizeof(bam1_t))) +#define bam_init1() ((bam1_t*)xcalloc(1, sizeof(bam1_t))) #define bam_destroy1(b) do { \ if (b) { free((b)->data); free(b); } \ } while (0) diff --git a/bntseq.c b/bntseq.c index 8a16950d..5f7062aa 100644 --- a/bntseq.c +++ b/bntseq.c @@ -97,13 +97,13 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c long long xx; int i; int scanres; - bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); + bns = (bntseq_t*)xcalloc(1, sizeof(bntseq_t)); { // read .ann fp = xopen(fname = ann_filename, "r"); scanres = fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed); if (scanres != 3) goto badread; bns->l_pac = xx; - bns->anns = (bntann1_t*)calloc(bns->n_seqs, sizeof(bntann1_t)); + bns->anns = (bntann1_t*)xcalloc(bns->n_seqs, sizeof(bntann1_t)); for (i = 0; i < bns->n_seqs; ++i) { bntann1_t *p = bns->anns + i; char *q = str; @@ -111,7 +111,7 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c // read gi and sequence name scanres = fscanf(fp, "%u%s", &p->gi, str); if (scanres != 2) goto badread; - p->name = strdup(str); + p->name = xstrdup(str); // read fasta comments while (str - q < sizeof(str) - 1 && (c = fgetc(fp)) != '\n' && c != EOF) *q++ = c; while (c != '\n' && c != EOF) c = fgetc(fp); @@ -120,8 +120,8 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c goto badread; } *q = 0; - if (q - str > 1) p->anno = strdup(str + 1); // skip leading space - else p->anno = strdup(""); + if (q - str > 1) p->anno = xstrdup(str + 1); // skip leading space + else p->anno = xstrdup(""); // read the rest scanres = fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs); if (scanres != 3) goto badread; @@ -137,7 +137,7 @@ bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, c if (scanres != 3) goto badread; l_pac = xx; xassert(l_pac == bns->l_pac && n_seqs == bns->n_seqs, "inconsistent .ann and .amb files."); - bns->ambs = (bntamb1_t*)calloc(bns->n_holes, sizeof(bntamb1_t)); + bns->ambs = (bntamb1_t*)xcalloc(bns->n_holes, sizeof(bntamb1_t)); for (i = 0; i < bns->n_holes; ++i) { bntamb1_t *p = bns->ambs + i; scanres = fscanf(fp, "%lld%d%s", &xx, &p->len, str); @@ -198,12 +198,12 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix) // initialization seq = kseq_init(fp_fa); - bns = (bntseq_t*)calloc(1, sizeof(bntseq_t)); + bns = (bntseq_t*)xcalloc(1, sizeof(bntseq_t)); bns->seed = 11; // fixed seed for random generator srand48(bns->seed); m_seqs = m_holes = 8; - bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t)); - bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t)); + bns->anns = (bntann1_t*)xcalloc(m_seqs, sizeof(bntann1_t)); + bns->ambs = (bntamb1_t*)xcalloc(m_holes, sizeof(bntamb1_t)); q = bns->ambs; l_buf = 0; strcpy(name, prefix); strcat(name, ".pac"); @@ -215,11 +215,11 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix) int lasts; if (bns->n_seqs == m_seqs) { m_seqs <<= 1; - bns->anns = (bntann1_t*)realloc(bns->anns, m_seqs * sizeof(bntann1_t)); + bns->anns = (bntann1_t*)xrealloc(bns->anns, m_seqs * sizeof(bntann1_t)); } p = bns->anns + bns->n_seqs; - p->name = strdup((char*)seq->name.s); - p->anno = seq->comment.s? strdup((char*)seq->comment.s) : strdup("(null)"); + p->name = xstrdup((char*)seq->name.s); + p->anno = seq->comment.s? xstrdup((char*)seq->comment.s) : xstrdup("(null)"); p->gi = 0; p->len = l; p->offset = (bns->n_seqs == 0)? 0 : (p-1)->offset + (p-1)->len; p->n_ambs = 0; @@ -231,7 +231,7 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix) } else { if (bns->n_holes == m_holes) { m_holes <<= 1; - bns->ambs = (bntamb1_t*)realloc(bns->ambs, m_holes * sizeof(bntamb1_t)); + bns->ambs = (bntamb1_t*)xrealloc(bns->ambs, m_holes * sizeof(bntamb1_t)); } q = bns->ambs + bns->n_holes; q->len = 1; diff --git a/bwape.c b/bwape.c index b52ff670..1c24bb07 100644 --- a/bwape.c +++ b/bwape.c @@ -69,7 +69,7 @@ static clock_t read_aln_clocks = 0; pe_opt_t *bwa_init_pe_opt() { pe_opt_t *po; - po = (pe_opt_t*)calloc(1, sizeof(pe_opt_t)); + po = (pe_opt_t*)xcalloc(1, sizeof(pe_opt_t)); po->max_isize = 500; po->force_isize = 0; po->max_occ = 100000; @@ -115,7 +115,7 @@ static int infer_isize(int n_seqs, bwa_seq_t *seqs[2], isize_info_t *ii, double ii->avg = ii->std = -1.0; ii->low = ii->high = ii->high_bayesian = 0; - isizes = (uint64_t*)calloc(n_seqs, 8); + isizes = (uint64_t*)xcalloc(n_seqs, 8); for (i = 0, tot = 0; i != n_seqs; ++i) { bwa_seq_t *p[2]; p[0] = seqs[0] + i; p[1] = seqs[1] + i; @@ -446,10 +446,10 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u if ((float)x/len >= 0.25 || len - x < SW_MIN_MATCH_LEN) return 0; // get reference subsequence - ref_seq = (ubyte_t*)calloc(reglen, 1); + ref_seq = (ubyte_t*)xcalloc(reglen, 1); for (k = *beg, l = 0; l < reglen && k < l_pac; ++k) ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3; - path = (path_t*)calloc(l+len, sizeof(path_t)); + path = (path_t*)xcalloc(l+len, sizeof(path_t)); // do alignment ret = aln_local_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len, 1, 0); @@ -478,7 +478,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u *beg += (p->i? p->i : 1) - 1; start = (p->j? p->j : 1) - 1; end = path->j; - cigar = (bwa_cigar_t*)realloc(cigar, sizeof(bwa_cigar_t) * (*n_cigar + 2)); + cigar = (bwa_cigar_t*)xrealloc(cigar, sizeof(bwa_cigar_t) * (*n_cigar + 2)); if (start) { memmove(cigar + 1, cigar, sizeof(bwa_cigar_t) * (*n_cigar)); cigar[0] = __cigar_create(3, start); @@ -549,7 +549,7 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs, // load reference sequence if (_pacseq == 0) { - pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1); + pacseq = (ubyte_t*)xcalloc(bns->l_pac/4+1, 1); err_rewind(bns->fp_pac); err_fread_noeof(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); } else pacseq = (ubyte_t*)_pacseq; @@ -700,7 +700,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt[0]); strcpy(str, prefix); strcat(str, ".rbwt"); bwt[1] = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt[1]); - pac = (ubyte_t*)calloc(bns->l_pac/4+1, 1); + pac = (ubyte_t*)xcalloc(bns->l_pac/4+1, 1); err_rewind(bns->fp_pac); err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac); } @@ -738,9 +738,9 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f alnbuf[2][1] = NULL; for(i=0; i #include #include - +#include "utils.h" #include "bwatpx.h" #include "khash.h" @@ -229,7 +229,7 @@ int bwa_pe_tpx(int iidx, const bwt_t *bwt[2], int n_seqs1, int n_seqs2, bwa_seq_ if (ret) { // not in the hash table; ret must equal 1 as we never remove elements poslist_t *z = &kh_val(g_hash[iidx], iter); z->n = r->l - r->k + 1; - z->a = (bwtint_t*)malloc(sizeof(bwtint_t) * z->n); + z->a = (bwtint_t*)xmalloc(sizeof(bwtint_t) * z->n); for (l = r->k; l <= r->l; ++l) z->a[l - r->k] = r->a? bwt_sa(bwt[0], l) : bwt[1]->seq_len - (bwt_sa(bwt[1], l) + p[j]->len); } diff --git a/bwapeio1.c b/bwapeio1.c index be046ea2..c5633ef7 100644 --- a/bwapeio1.c +++ b/bwapeio1.c @@ -67,12 +67,12 @@ static void thr_bwa_read_seq2_tpx(long n_needed) if(buf_nseqs[nexti_copy] == 0){ buf_nseqs[nexti_copy] = (long)n1; - *buf1_addr = (aln_buf_t*)calloc(buf_nseqs[nexti_copy], sizeof(aln_buf_t)); - *buf2_addr = (aln_buf_t*)calloc(buf_nseqs[nexti_copy], sizeof(aln_buf_t)); + *buf1_addr = (aln_buf_t*)xcalloc(buf_nseqs[nexti_copy], sizeof(aln_buf_t)); + *buf2_addr = (aln_buf_t*)xcalloc(buf_nseqs[nexti_copy], sizeof(aln_buf_t)); }else if(buf_nseqs[nexti_copy] < (long)n1){ buf_nseqs[nexti_copy] = (long)n1; - *buf1_addr = (aln_buf_t*)realloc(*buf1_addr, (buf_nseqs[nexti_copy] * sizeof(aln_buf_t))); - *buf2_addr = (aln_buf_t*)realloc(*buf2_addr, (buf_nseqs[nexti_copy] * sizeof(aln_buf_t))); + *buf1_addr = (aln_buf_t*)xrealloc(*buf1_addr, (buf_nseqs[nexti_copy] * sizeof(aln_buf_t))); + *buf2_addr = (aln_buf_t*)xrealloc(*buf2_addr, (buf_nseqs[nexti_copy] * sizeof(aln_buf_t))); memset(*buf1_addr, 0, (buf_nseqs[nexti_copy] * sizeof(aln_buf_t))); memset(*buf2_addr, 0, (buf_nseqs[nexti_copy] * sizeof(aln_buf_t))); }else{ diff --git a/bwapese1.c b/bwapese1.c index 1731942f..a75e9c69 100644 --- a/bwapese1.c +++ b/bwapese1.c @@ -7,6 +7,7 @@ #include #include "bwatpx.h" +#include "utils.h" extern bwa_cigar_t *refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, bwtint_t *_pos, int ext, int *n_cigar, int is_end_correct); @@ -75,7 +76,7 @@ void bwa_rg_tpx(int iidx, const bntseq_t *bns, int n_seqs1, int n_seqs2, } // generate MD tag - str = (kstring_t*)calloc(1, sizeof(kstring_t)); + str = (kstring_t*)xcalloc(1, sizeof(kstring_t)); for (i = n_seqs1; i < n_seqs2; ++i) { bwa_seq_t *s = seqs + i; diff --git a/bwase.c b/bwase.c index c50b8eb2..0c6b63e0 100644 --- a/bwase.c +++ b/bwase.c @@ -108,7 +108,7 @@ void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_ma * simply output all hits, but the following samples "rest" * number of random hits. */ rest = n_occ > n_multi + 1? n_multi + 1 : n_occ; // find one additional for ->sa - s->multi = calloc(rest, sizeof(bwt_multi1_t)); + s->multi = xcalloc(rest, sizeof(bwt_multi1_t)); for (k = 0; k < n_aln; ++k) { const bwt_aln1_t *q = aln + k; if (q->l - q->k + 1 <= rest) { @@ -368,16 +368,16 @@ bwa_cigar_t *refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, ref_len = len + abs(ext); if (ext > 0) { - ref_seq = (ubyte_t*)calloc(ref_len, 1); + ref_seq = (ubyte_t*)xcalloc(ref_len, 1); for (k = __pos; k < __pos + ref_len && k < l_pac; ++k) ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3; } else { int64_t x = __pos + (is_end_correct? len : ref_len); - ref_seq = (ubyte_t*)calloc(ref_len, 1); + ref_seq = (ubyte_t*)xcalloc(ref_len, 1); for (l = 0, k = x - ref_len > 0? x - ref_len : 0; k < x && k < l_pac; ++k) ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3; } - path = (path_t*)calloc(l+len, sizeof(path_t)); + path = (path_t*)xcalloc(l+len, sizeof(path_t)); aln_global_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len); cigar = bwa_aln_path2cigar(path, path_len, n_cigar); @@ -454,7 +454,7 @@ char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_ } ksprintf(str, "%d", u); *_nm = nm; - return strdup(str->s); + return xstrdup(str->s); } void bwa_correct_trimmed(bwa_seq_t *s) @@ -466,11 +466,11 @@ void bwa_correct_trimmed(bwa_seq_t *s) } else { if (s->cigar == 0) { s->n_cigar = 2; - s->cigar = calloc(s->n_cigar, sizeof(bwa_cigar_t)); + s->cigar = xcalloc(s->n_cigar, sizeof(bwa_cigar_t)); s->cigar[0] = __cigar_create(0, s->len); } else { ++s->n_cigar; - s->cigar = realloc(s->cigar, s->n_cigar * sizeof(bwa_cigar_t)); + s->cigar = xrealloc(s->cigar, s->n_cigar * sizeof(bwa_cigar_t)); } s->cigar[s->n_cigar-1] = __cigar_create(3, (s->full_len - s->len)); } @@ -480,11 +480,11 @@ void bwa_correct_trimmed(bwa_seq_t *s) } else { if (s->cigar == 0) { s->n_cigar = 2; - s->cigar = calloc(s->n_cigar, sizeof(bwa_cigar_t)); + s->cigar = xcalloc(s->n_cigar, sizeof(bwa_cigar_t)); s->cigar[1] = __cigar_create(0, s->len); } else { ++s->n_cigar; - s->cigar = realloc(s->cigar, s->n_cigar * sizeof(bwa_cigar_t)); + s->cigar = xrealloc(s->cigar, s->n_cigar * sizeof(bwa_cigar_t)); memmove(s->cigar + 1, s->cigar, (s->n_cigar-1) * sizeof(bwa_cigar_t)); } s->cigar[0] = __cigar_create(3, (s->full_len - s->len)); @@ -523,13 +523,13 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t #endif // HAVE_PTHREAD if (ntbns) { // in color space - ntpac = (ubyte_t*)calloc(ntbns->l_pac/4+1, 1); + ntpac = (ubyte_t*)xcalloc(ntbns->l_pac/4+1, 1); err_rewind(ntbns->fp_pac); err_fread_noeof(ntpac, 1, ntbns->l_pac/4 + 1, ntbns->fp_pac); } if (!_pacseq) { - pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1); + pacseq = (ubyte_t*)xcalloc(bns->l_pac/4+1, 1); err_rewind(bns->fp_pac); err_fread_noeof(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); } else pacseq = _pacseq; @@ -811,19 +811,19 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, if(first){ first = 0; samc_len = 10000; - sam_cigar = (char *)malloc(samc_len); + sam_cigar = (char *)xmalloc(samc_len); if(sam_cigar == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); } sammdz_len = 10000; - sam_mdzstr = (char *)malloc(sammdz_len); + sam_mdzstr = (char *)xmalloc(sammdz_len); if(sam_mdzstr == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); } sammut_len = 10000; - sam_mutstr = (char *)malloc(sammut_len); + sam_mutstr = (char *)xmalloc(sammut_len); if(sam_mutstr == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -863,7 +863,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = (int)strlen(p->name); if(slen >= samf1_len){ samf1_len = slen + 10000; - samf1 = (char *)realloc(samf1, samf1_len); + samf1 = (char *)xrealloc(samf1, samf1_len); if(samf1 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -876,7 +876,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = (int)strlen(bns->anns[seqid].name); if(slen >= samf3_len){ samf3_len = slen + 10000; - samf3 = (char *)realloc(samf3, samf3_len); + samf3 = (char *)xrealloc(samf3, samf3_len); if(samf3 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -916,7 +916,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen += (int)strlen(tmpstr); if(slen >= samc_len){ samc_len = slen + 10000; - sam_cigar = (char *)realloc(sam_cigar, samc_len); + sam_cigar = (char *)xrealloc(sam_cigar, samc_len); if(sam_cigar == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -945,7 +945,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, } if(slen >= samf7_len){ samf7_len = slen + 10000; - samf7 = (char *)realloc(samf7, samf7_len); + samf7 = (char *)xrealloc(samf7, samf7_len); if(samf7 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -968,7 +968,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = 1; if(slen >= samf7_len){ samf7_len = slen + 10000; - samf7 = (char *)realloc(samf7, samf7_len); + samf7 = (char *)xrealloc(samf7, samf7_len); if(samf7 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -984,7 +984,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = 1; if(slen >= samf7_len){ samf7_len = slen + 10000; - samf7 = (char *)realloc(samf7, samf7_len); + samf7 = (char *)xrealloc(samf7, samf7_len); if(samf7 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1001,7 +1001,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = p->full_len; if(slen >= samf10_len){ samf10_len = slen + 10000; - samf10 = (char *)realloc(samf10, samf10_len); + samf10 = (char *)xrealloc(samf10, samf10_len); if(samf10 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1025,7 +1025,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = p->len; if(slen >= samf11_len){ samf11_len = slen + 10000; - samf11 = (char *)realloc(samf11, samf11_len); + samf11 = (char *)xrealloc(samf11, samf11_len); if(samf11 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1036,7 +1036,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = 1; if(slen >= samf11_len){ samf11_len = slen + 10000; - samf11 = (char *)realloc(samf11, samf11_len); + samf11 = (char *)xrealloc(samf11, samf11_len); if(samf11 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1082,7 +1082,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = (int)strlen(p->md); if(slen >= sammdz_len){ sammdz_len = slen + 10000; - sam_mdzstr = (char *)realloc(sam_mdzstr, sammdz_len); + sam_mdzstr = (char *)xrealloc(sam_mdzstr, sammdz_len); if(sam_mdzstr == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1128,7 +1128,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = (int)strlen(tmpstr); if(slen >= sammut_len){ sammut_len = slen + 10000; - sam_mutstr = (char *)realloc(sam_mutstr, sammut_len); + sam_mutstr = (char *)xrealloc(sam_mutstr, sammut_len); if(sam_mutstr == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1140,7 +1140,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen += (int)strlen(tmpstr2[num_mut]); if(slen >= sammut_len){ sammut_len = slen + 10000; - sam_mutstr = (char *)realloc(sam_mutstr, sammut_len); + sam_mutstr = (char *)xrealloc(sam_mutstr, sammut_len); if(sam_mutstr == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1154,7 +1154,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = (int)strlen(sam_cigar) + 8; if(slen >= sammut_len){ sammut_len = slen + 10000; - sam_mutstr = (char *)realloc(sam_mutstr, sammut_len); + sam_mutstr = (char *)xrealloc(sam_mutstr, sammut_len); if(sam_mutstr == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1169,7 +1169,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = (int)strlen(tmpstr); if(slen >= sammdz_len){ sammdz_len = slen + 10000; - sam_mdzstr = (char *)realloc(sam_mdzstr, sammdz_len); + sam_mdzstr = (char *)xrealloc(sam_mdzstr, sammdz_len); if(sam_mdzstr == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1226,7 +1226,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = (int)strlen(tmpstr); if(slen >= sammdz_len){ sammdz_len = slen + 10000; - sam_mdzstr = (char *)realloc(sam_mdzstr, sammdz_len); + sam_mdzstr = (char *)xrealloc(sam_mdzstr, sammdz_len); if(sam_mdzstr == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1252,7 +1252,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = (int)strlen(p->name); if(slen >= samf1_len){ samf1_len = slen + 10000; - samf1 = (char *)realloc(samf1, samf1_len); + samf1 = (char *)xrealloc(samf1, samf1_len); if(samf1 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1265,7 +1265,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = 1; if(slen >= samf3_len){ samf3_len = slen + 10000; - samf3 = (char *)realloc(samf3, samf3_len); + samf3 = (char *)xrealloc(samf3, samf3_len); if(samf3 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1281,7 +1281,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = 1; if(slen >= samf6_len){ samf6_len = slen + 10000; - samf6 = (char *)realloc(samf6, samf6_len); + samf6 = (char *)xrealloc(samf6, samf6_len); if(samf6 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1292,7 +1292,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = 1; if(slen >= samf7_len){ samf7_len = slen + 10000; - samf7 = (char *)realloc(samf7, samf7_len); + samf7 = (char *)xrealloc(samf7, samf7_len); if(samf7 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1306,7 +1306,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = p->len; if(slen >= samf10_len){ samf10_len = slen + 10000; - samf10 = (char *)realloc(samf10, samf10_len); + samf10 = (char *)xrealloc(samf10, samf10_len); if(samf10 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1324,7 +1324,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = p->len; if(slen >= samf11_len){ samf11_len = slen + 10000; - samf11 = (char *)realloc(samf11, samf11_len); + samf11 = (char *)xrealloc(samf11, samf11_len); if(samf11 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1335,7 +1335,7 @@ void bwa_print_soap1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, slen = 1; if(slen >= samf11_len){ samf11_len = slen + 10000; - samf11 = (char *)realloc(samf11, samf11_len); + samf11 = (char *)xrealloc(samf11, samf11_len); if(samf11 == NULL){ fprintf(stderr,"Error, unable to alloc mem for expanding output string\n"); exit(1); @@ -1400,7 +1400,7 @@ bntseq_t *bwa_open_nt(const char *prefix) { bntseq_t *ntbns; char *str; - str = (char*)calloc(strlen(prefix) + 10, 1); + str = (char*)xcalloc(strlen(prefix) + 10, 1); strcat(strcpy(str, prefix), ".nt"); ntbns = bns_restore(str); free(str); @@ -1448,14 +1448,14 @@ int bwa_set_rg(const char *s) if (strstr(s, "@RG") != s) return -1; if (bwa_rg_line) free(bwa_rg_line); if (bwa_rg_id) free(bwa_rg_id); - bwa_rg_line = strdup(s); + bwa_rg_line = xstrdup(s); bwa_rg_id = 0; bwa_escape(bwa_rg_line); p = strstr(bwa_rg_line, "\tID:"); if (p == 0) return -1; p += 4; for (q = p; *q && *q != '\t' && *q != '\n'; ++q); - bwa_rg_id = calloc(q - p + 1, 1); + bwa_rg_id = xcalloc(q - p + 1, 1); for (q = p, r = bwa_rg_id; *q && *q != '\t' && *q != '\n'; ++q) *r++ = *q; return 0; @@ -1603,7 +1603,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f if (n_aln > m_aln) { m_aln = n_aln; - aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln); + aln = (bwt_aln1_t*)xrealloc(aln, sizeof(bwt_aln1_t) * m_aln); } err_fread_noeof(aln, sizeof(bwt_aln1_t), n_aln, fp_sa); diff --git a/bwaseio1.c b/bwaseio1.c index fa0712fd..7a9a686a 100644 --- a/bwaseio1.c +++ b/bwaseio1.c @@ -53,7 +53,7 @@ static void thr_bwa_read_seq1_tpx(long n_needed) if (n_aln > m_aln_copy) { m_aln_copy = n_aln; - *aln_addr = (bwt_aln1_t*)realloc(*aln_addr, sizeof(bwt_aln1_t) * m_aln_copy); + *aln_addr = (bwt_aln1_t*)xrealloc(*aln_addr, sizeof(bwt_aln1_t) * m_aln_copy); } err_fread_noeof(*aln_addr, sizeof(bwt_aln1_t), n_aln, fp_sa_addr); diff --git a/bwaseqio.c b/bwaseqio.c index 835f9877..ad914984 100644 --- a/bwaseqio.c +++ b/bwaseqio.c @@ -22,7 +22,7 @@ bwa_seqio_t *bwa_bam_open(const char *fn, int which) { bwa_seqio_t *bs; bam_header_t *h; - bs = (bwa_seqio_t*)calloc(1, sizeof(bwa_seqio_t)); + bs = (bwa_seqio_t*)xcalloc(1, sizeof(bwa_seqio_t)); bs->is_bam = 1; bs->which = which; bs->fp = bam_open(fn, "r"); @@ -35,7 +35,7 @@ bwa_seqio_t *bwa_seq_open(const char *fn) { gzFile fp; bwa_seqio_t *bs; - bs = (bwa_seqio_t*)calloc(1, sizeof(bwa_seqio_t)); + bs = (bwa_seqio_t*)xcalloc(1, sizeof(bwa_seqio_t)); fp = xzopen(fn, "r"); bs->ks = kseq_init(fp); return bs; @@ -95,7 +95,7 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com b = bam_init1(); n_seqs = 0; - seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t)); + seqs = (bwa_seq_t*)xcalloc(n_needed, sizeof(bwa_seq_t)); while (bam_read1(bs->fp, b) >= 0) { uint8_t *s, *q; int go = 0; @@ -110,8 +110,8 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com p->full_len = p->clip_len = p->len = l; n_tot += p->full_len; s = bam1_seq(b); q = bam1_qual(b); - p->seq = (ubyte_t*)calloc(p->len + 1, 1); - p->qual = (ubyte_t*)calloc(p->len + 1, 1); + p->seq = (ubyte_t*)xcalloc(p->len + 1, 1); + p->qual = (ubyte_t*)xcalloc(p->len + 1, 1); for (i = 0; i != p->full_len; ++i) { p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)]; p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126; @@ -121,11 +121,11 @@ static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_com seq_reverse(p->len, p->qual, 0); } if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p); - p->rseq = (ubyte_t*)calloc(p->full_len, 1); + p->rseq = (ubyte_t*)xcalloc(p->full_len, 1); memcpy(p->rseq, p->seq, p->len); seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped() seq_reverse(p->len, p->rseq, is_comp); - p->name = strdup((const char*)bam1_qname(b)); + p->name = xstrdup((const char*)bam1_qname(b)); if (n_seqs == n_needed) break; } *n = n_seqs; @@ -155,7 +155,7 @@ bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int tri } if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual); // l_bc has no effect for BAM input n_seqs = 0; - seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t)); + seqs = (bwa_seq_t*)xcalloc(n_needed, sizeof(bwa_seq_t)); while ((l = kseq_read(seq)) >= 0) { if ((mode & BWA_MODE_CFY) && (seq->comment.l != 0)) { // skip reads that are marked to be filtered by Casava @@ -186,18 +186,18 @@ bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int tri p->qual = 0; p->full_len = p->clip_len = p->len = l; n_tot += p->full_len; - p->seq = (ubyte_t*)calloc(p->len, 1); + p->seq = (ubyte_t*)xcalloc(p->len, 1); for (i = 0; i != p->full_len; ++i) p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]]; if (seq->qual.l) { // copy quality - p->qual = (ubyte_t*)strdup((char*)seq->qual.s); + p->qual = (ubyte_t*)xstrdup((char*)seq->qual.s); if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p); } - p->rseq = (ubyte_t*)calloc(p->full_len, 1); + p->rseq = (ubyte_t*)xcalloc(p->full_len, 1); memcpy(p->rseq, p->seq, p->len); seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped() seq_reverse(p->len, p->rseq, is_comp); - p->name = strdup((const char*)seq->name.s); + p->name = xstrdup((const char*)seq->name.s); { // trim /[12]$ int t = strlen(p->name); if (t > 2 && p->name[t-2] == '/' && (p->name[t-1] == '1' || p->name[t-1] == '2')) p->name[t-2] = '\0'; diff --git a/bwt.c b/bwt.c index bcdbace8..7c90116f 100644 --- a/bwt.c +++ b/bwt.c @@ -107,7 +107,7 @@ void bwt_cal_sa(bwt_t *bwt, int intv) if (bwt->sa) free(bwt->sa); bwt->sa_intv = intv; bwt->n_sa = (bwt->seq_len + intv) / intv; - bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); + bwt->sa = (bwtint_t*)xcalloc(bwt->n_sa, sizeof(bwtint_t)); // calculate SA value isa = 0; sa = bwt->seq_len; for (i = 0; i < bwt->seq_len; ++i) { diff --git a/bwt_gen/bwt_gen.c b/bwt_gen/bwt_gen.c index 8f00463e..9ed08e65 100644 --- a/bwt_gen/bwt_gen.c +++ b/bwt_gen/bwt_gen.c @@ -255,12 +255,12 @@ BWT *BWTCreate(const unsigned int textLength, unsigned int *decodeTable) { BWT *bwt; - bwt = (BWT*)calloc(1, sizeof(BWT)); + bwt = (BWT*)xcalloc(1, sizeof(BWT)); bwt->textLength = 0; bwt->inverseSa = 0; - bwt->cumulativeFreq = (unsigned*)calloc((ALPHABET_SIZE + 1), sizeof(unsigned int*)); + bwt->cumulativeFreq = (unsigned*)xcalloc((ALPHABET_SIZE + 1), sizeof(unsigned int*)); initializeVAL(bwt->cumulativeFreq, ALPHABET_SIZE + 1, 0); bwt->bwtSizeInWord = 0; @@ -268,14 +268,14 @@ BWT *BWTCreate(const unsigned int textLength, unsigned int *decodeTable) // Generate decode tables if (decodeTable == NULL) { - bwt->decodeTable = (unsigned*)calloc(DNA_OCC_CNT_TABLE_SIZE_IN_WORD, sizeof(unsigned int)); + bwt->decodeTable = (unsigned*)xcalloc(DNA_OCC_CNT_TABLE_SIZE_IN_WORD, sizeof(unsigned int)); GenerateDNAOccCountTable(bwt->decodeTable); } else { bwt->decodeTable = decodeTable; } bwt->occMajorSizeInWord = BWTOccValueMajorSizeInWord(textLength); - bwt->occValueMajor = (unsigned*)calloc(bwt->occMajorSizeInWord, sizeof(unsigned int)); + bwt->occValueMajor = (unsigned*)xcalloc(bwt->occMajorSizeInWord, sizeof(unsigned int)); bwt->occSizeInWord = 0; bwt->occValue = NULL; @@ -302,17 +302,17 @@ BWTInc *BWTIncCreate(const unsigned int textLength, const float targetNBit, exit(1); } - bwtInc = (BWTInc*)calloc(1, sizeof(BWTInc)); + bwtInc = (BWTInc*)xcalloc(1, sizeof(BWTInc)); bwtInc->numberOfIterationDone = 0; bwtInc->bwt = BWTCreate(textLength, NULL); bwtInc->initialMaxBuildSize = initialMaxBuildSize; bwtInc->incMaxBuildSize = incMaxBuildSize; bwtInc->targetNBit = targetNBit; - bwtInc->cumulativeCountInCurrentBuild = (unsigned*)calloc((ALPHABET_SIZE + 1), sizeof(unsigned int)); + bwtInc->cumulativeCountInCurrentBuild = (unsigned*)xcalloc((ALPHABET_SIZE + 1), sizeof(unsigned int)); initializeVAL(bwtInc->cumulativeCountInCurrentBuild, ALPHABET_SIZE + 1, 0); // Build frequently accessed data - bwtInc->packedShift = (unsigned*)calloc(CHAR_PER_WORD, sizeof(unsigned int)); + bwtInc->packedShift = (unsigned*)xcalloc(CHAR_PER_WORD, sizeof(unsigned int)); for (i=0; ipackedShift[i] = BITS_IN_WORD - (i+1) * BIT_PER_CHAR; } @@ -323,7 +323,7 @@ BWTInc *BWTIncCreate(const unsigned int textLength, const float targetNBit, fprintf(stderr, "BWTIncCreate() : targetNBit is too low!\n"); exit(1); } - bwtInc->workingMemory = (unsigned*)calloc(bwtInc->availableWord, BYTES_IN_WORD); + bwtInc->workingMemory = (unsigned*)xcalloc(bwtInc->availableWord, BYTES_IN_WORD); return bwtInc; diff --git a/bwt_lite.c b/bwt_lite.c index dd411e11..32338388 100644 --- a/bwt_lite.c +++ b/bwt_lite.c @@ -2,6 +2,7 @@ #include #include #include "bwt_lite.h" +#include "utils.h" int is_sa(const uint8_t *T, uint32_t *SA, int n); int is_bwt(uint8_t *T, int n); @@ -10,21 +11,21 @@ bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq) { bwtl_t *b; int i; - b = (bwtl_t*)calloc(1, sizeof(bwtl_t)); + b = (bwtl_t*)xcalloc(1, sizeof(bwtl_t)); b->seq_len = len; { // calculate b->bwt uint8_t *s; - b->sa = (uint32_t*)calloc(len + 1, 4); + b->sa = (uint32_t*)xcalloc(len + 1, 4); is_sa(seq, b->sa, len); - s = (uint8_t*)calloc(len + 1, 1); + s = (uint8_t*)xcalloc(len + 1, 1); for (i = 0; i <= len; ++i) { if (b->sa[i] == 0) b->primary = i; else s[i] = seq[b->sa[i] - 1]; } for (i = b->primary; i < len; ++i) s[i] = s[i + 1]; b->bwt_size = (len + 15) / 16; - b->bwt = (uint32_t*)calloc(b->bwt_size, 4); + b->bwt = (uint32_t*)xcalloc(b->bwt_size, 4); for (i = 0; i < len; ++i) b->bwt[i>>4] |= s[i] << ((15 - (i&15)) << 1); free(s); @@ -32,7 +33,7 @@ bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq) { // calculate b->occ uint32_t c[4]; b->n_occ = (len + 15) / 16 * 4; - b->occ = (uint32_t*)calloc(b->n_occ, 4); + b->occ = (uint32_t*)xcalloc(b->n_occ, 4); memset(c, 0, 16); for (i = 0; i < len; ++i) { if (i % 16 == 0) diff --git a/bwtaln.c b/bwtaln.c index 09ff1d8e..b87e4303 100644 --- a/bwtaln.c +++ b/bwtaln.c @@ -25,7 +25,7 @@ extern int adj_n_needed; gap_opt_t *gap_init_opt() { gap_opt_t *o; - o = (gap_opt_t*)calloc(1, sizeof(gap_opt_t)); + o = (gap_opt_t*)xcalloc(1, sizeof(gap_opt_t)); /* IMPORTANT: s_mm*10 should be about the average base error rate. Voilating this requirement will break pairing! */ o->s_mm = 3; o->s_gapo = 11; o->s_gape = 4; @@ -96,8 +96,8 @@ void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt[2], int n_seqs, bwa_seq_t *seq if (local_opt.max_diff < local_opt.max_gapo) local_opt.max_gapo = local_opt.max_diff; stack = gap_init_stack(local_opt.max_diff, local_opt.max_gapo, local_opt.max_gape, &local_opt); - seed_w[0] = (bwt_width_t*)calloc(opt->seed_len+1, sizeof(bwt_width_t)); - seed_w[1] = (bwt_width_t*)calloc(opt->seed_len+1, sizeof(bwt_width_t)); + seed_w[0] = (bwt_width_t*)xcalloc(opt->seed_len+1, sizeof(bwt_width_t)); + seed_w[1] = (bwt_width_t*)xcalloc(opt->seed_len+1, sizeof(bwt_width_t)); w[0] = w[1] = 0; for (i = 0; i != n_seqs; ++i) { bwa_seq_t *p = seqs + i; @@ -108,8 +108,8 @@ void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt[2], int n_seqs, bwa_seq_t *seq seq[0] = p->seq; seq[1] = p->rseq; if (max_l < p->len) { max_l = p->len; - w[0] = (bwt_width_t*)realloc(w[0], (max_l + 1) * sizeof(bwt_width_t)); - w[1] = (bwt_width_t*)realloc(w[1], (max_l + 1) * sizeof(bwt_width_t)); + w[0] = (bwt_width_t*)xrealloc(w[0], (max_l + 1) * sizeof(bwt_width_t)); + w[1] = (bwt_width_t*)xrealloc(w[1], (max_l + 1) * sizeof(bwt_width_t)); memset(w[0], 0, (max_l + 1) * sizeof(bwt_width_t)); memset(w[1], 0, (max_l + 1) * sizeof(bwt_width_t)); } @@ -194,7 +194,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) ks = bwa_open_reads(opt->mode, fn_fa); { // load BWT - char *str = (char*)calloc(strlen(prefix) + 10, 1); + char *str = (char*)xcalloc(strlen(prefix) + 10, 1); strcpy(str, prefix); strcat(str, ".bwt"); bwt[0] = bwt_restore_bwt(str); strcpy(str, prefix); strcat(str, ".rbwt"); bwt[1] = bwt_restore_bwt(str); free(str); @@ -218,8 +218,8 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) int j; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); - data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t)); - tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t)); + data = (thread_aux_t*)xcalloc(opt->n_threads, sizeof(thread_aux_t)); + tid = (pthread_t*)xcalloc(opt->n_threads, sizeof(pthread_t)); for (j = 0; j < opt->n_threads; ++j) { data[j].tid = j; data[j].bwt[0] = bwt[0]; data[j].bwt[1] = bwt[1]; data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt; diff --git a/bwtgap.c b/bwtgap.c index 9301ad9a..dcce13b3 100644 --- a/bwtgap.c +++ b/bwtgap.c @@ -3,6 +3,7 @@ #include #include "bwtgap.h" #include "bwtaln.h" +#include "utils.h" #define STATE_M 0 #define STATE_I 1 @@ -14,13 +15,13 @@ gap_stack_t *gap_init_stack(int max_mm, int max_gapo, int max_gape, const gap_op { int i; gap_stack_t *stack; - stack = (gap_stack_t*)calloc(1, sizeof(gap_stack_t)); + stack = (gap_stack_t*)xcalloc(1, sizeof(gap_stack_t)); stack->n_stacks = aln_score(max_mm+1, max_gapo+1, max_gape+1, opt); - stack->stacks = (gap_stack1_t*)calloc(stack->n_stacks, sizeof(gap_stack1_t)); + stack->stacks = (gap_stack1_t*)xcalloc(stack->n_stacks, sizeof(gap_stack1_t)); for (i = 0; i != stack->n_stacks; ++i) { gap_stack1_t *p = stack->stacks + i; p->m_entries = 4; - p->stack = (gap_entry_t*)calloc(p->m_entries, sizeof(gap_entry_t)); + p->stack = (gap_entry_t*)xcalloc(p->m_entries, sizeof(gap_entry_t)); } return stack; } @@ -52,7 +53,7 @@ static inline void gap_push(gap_stack_t *stack, int a, int i, bwtint_t k, bwtint q = stack->stacks + score; if (q->n_entries == q->m_entries) { q->m_entries <<= 1; - q->stack = (gap_entry_t*)realloc(q->stack, sizeof(gap_entry_t) * q->m_entries); + q->stack = (gap_entry_t*)xrealloc(q->stack, sizeof(gap_entry_t) * q->m_entries); } p = q->stack + q->n_entries; p->info = (u_int32_t)score<<21 | a<<20 | i; p->k = k; p->l = l; @@ -111,7 +112,7 @@ bwt_aln1_t *bwt_match_gap(bwt_t *const bwts[2], int len, const ubyte_t *seq[2], bwt_aln1_t *aln; m_aln = 4; n_aln = 0; - aln = (bwt_aln1_t*)calloc(m_aln, sizeof(bwt_aln1_t)); + aln = (bwt_aln1_t*)xcalloc(m_aln, sizeof(bwt_aln1_t)); // check whether there are too many N for (j = _j = 0; j < len; ++j) @@ -187,7 +188,7 @@ bwt_aln1_t *bwt_match_gap(bwt_t *const bwts[2], int len, const ubyte_t *seq[2], #endif // _REMOVE_SHADOW if (n_aln == m_aln) { m_aln <<= 1; - aln = (bwt_aln1_t*)realloc(aln, m_aln * sizeof(bwt_aln1_t)); + aln = (bwt_aln1_t*)xrealloc(aln, m_aln * sizeof(bwt_aln1_t)); memset(aln + m_aln/2, 0, m_aln/2*sizeof(bwt_aln1_t)); } p = aln + n_aln; diff --git a/bwtindex.c b/bwtindex.c index 929aa226..c203d3e4 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -54,7 +54,7 @@ int bwa_index(int argc, char *argv[]) else if (strcmp(optarg, "is") == 0) algo_type = 3; else err_fatal(__func__, "unknown algorithm: '%s'.", optarg); break; - case 'p': prefix = strdup(optarg); break; + case 'p': prefix = xstrdup(optarg); break; case 'c': is_color = 1; break; default: return 1; } @@ -71,10 +71,10 @@ int bwa_index(int argc, char *argv[]) fprintf(stderr, " according to the length of the genome.\n\n"); return 1; } - if (prefix == 0) prefix = strdup(argv[optind]); - str = (char*)calloc(strlen(prefix) + 10, 1); - str2 = (char*)calloc(strlen(prefix) + 10, 1); - str3 = (char*)calloc(strlen(prefix) + 10, 1); + if (prefix == 0) prefix = xstrdup(argv[optind]); + str = (char*)xcalloc(strlen(prefix) + 10, 1); + str2 = (char*)xcalloc(strlen(prefix) + 10, 1); + str3 = (char*)xcalloc(strlen(prefix) + 10, 1); if (is_color == 0) { // nucleotide indexing gzFile fp = xzopen(argv[optind], "r"); diff --git a/bwtio.c b/bwtio.c index 05eb326c..bcf5b354 100644 --- a/bwtio.c +++ b/bwtio.c @@ -43,7 +43,7 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt) xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same."); bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv; - bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); + bwt->sa = (bwtint_t*)xcalloc(bwt->n_sa, sizeof(bwtint_t)); bwt->sa[0] = -1; err_fread_noeof(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); @@ -55,11 +55,11 @@ bwt_t *bwt_restore_bwt(const char *fn) bwt_t *bwt; FILE *fp; - bwt = (bwt_t*)calloc(1, sizeof(bwt_t)); + bwt = (bwt_t*)xcalloc(1, sizeof(bwt_t)); fp = xopen(fn, "rb"); err_fseek(fp, 0, SEEK_END); bwt->bwt_size = (err_ftell(fp) - sizeof(bwtint_t) * 5) >> 2; - bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4); + bwt->bwt = (uint32_t*)xcalloc(bwt->bwt_size, 4); err_fseek(fp, 0, SEEK_SET); err_fread_noeof(&bwt->primary, sizeof(bwtint_t), 1, fp); err_fread_noeof(bwt->L2+1, sizeof(bwtint_t), 4, fp); diff --git a/bwtmisc.c b/bwtmisc.c index fbd7aacc..51ca06a2 100644 --- a/bwtmisc.c +++ b/bwtmisc.c @@ -61,18 +61,18 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is) FILE *fp; // initialization - bwt = (bwt_t*)calloc(1, sizeof(bwt_t)); + bwt = (bwt_t*)xcalloc(1, sizeof(bwt_t)); bwt->seq_len = bwa_seq_len(fn_pac); bwt->bwt_size = (bwt->seq_len + 15) >> 4; fp = xopen(fn_pac, "rb"); // prepare sequence pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1); - buf2 = (ubyte_t*)calloc(pac_size, 1); + buf2 = (ubyte_t*)xcalloc(pac_size, 1); err_fread_noeof(buf2, 1, pac_size, fp); err_fclose(fp); memset(bwt->L2, 0, 5 * 4); - buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1); + buf = (ubyte_t*)xcalloc(bwt->seq_len + 1, 1); for (i = 0; i < bwt->seq_len; ++i) { buf[i] = buf2[i>>2] >> ((3 - (i&3)) << 1) & 3; ++bwt->L2[1+buf[i]]; @@ -90,7 +90,7 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is) err_fatal_simple("libdivsufsort is not compiled in."); #endif } - bwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4); + bwt->bwt = (u_int32_t*)xcalloc(bwt->bwt_size, 4); for (i = 0; i < bwt->seq_len; ++i) bwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1); free(buf); @@ -126,7 +126,7 @@ void bwt_bwtupdate_core(bwt_t *bwt) n_occ = (bwt->seq_len + OCC_INTERVAL - 1) / OCC_INTERVAL + 1; bwt->bwt_size += n_occ * 4; // the new size - buf = (uint32_t*)calloc(bwt->bwt_size, 4); // will be the new bwt + buf = (uint32_t*)xcalloc(bwt->bwt_size, 4); // will be the new bwt c[0] = c[1] = c[2] = c[3] = 0; for (i = k = 0; i < bwt->seq_len; ++i) { if (i % OCC_INTERVAL == 0) { @@ -165,8 +165,8 @@ void bwa_pac_rev_core(const char *fn, const char *fn_rev) FILE *fp; seq_len = bwa_seq_len(fn); pac_len = (seq_len >> 2) + 1; - bufin = (ubyte_t*)calloc(pac_len, 1); - bufout = (ubyte_t*)calloc(pac_len, 1); + bufin = (ubyte_t*)xcalloc(pac_len, 1); + bufout = (ubyte_t*)xcalloc(pac_len, 1); fp = xopen(fn, "rb"); err_fread_noeof(bufin, 1, pac_len, fp); err_fclose(fp); @@ -205,8 +205,8 @@ uint8_t *bwa_pac2cspac_core(const bntseq_t *bns) uint8_t *pac, *cspac; bwtint_t i; int c1, c2; - pac = (uint8_t*)calloc(bns->l_pac/4 + 1, 1); - cspac = (uint8_t*)calloc(bns->l_pac/4 + 1, 1); + pac = (uint8_t*)xcalloc(bns->l_pac/4 + 1, 1); + cspac = (uint8_t*)xcalloc(bns->l_pac/4 + 1, 1); err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac); err_rewind(bns->fp_pac); c1 = pac[0]>>6; cspac[0] = c1<<6; @@ -234,7 +234,7 @@ int bwa_pac2cspac(int argc, char *argv[]) cspac = bwa_pac2cspac_core(bns); bns_dump(bns, argv[2]); // now write cspac - str = (char*)calloc(strlen(argv[2]) + 5, 1); + str = (char*)xcalloc(strlen(argv[2]) + 5, 1); strcat(strcpy(str, argv[2]), ".pac"); fp = xopen(str, "wb"); err_fwrite(cspac, 1, bns->l_pac/4 + 1, fp); diff --git a/bwtsw2_aux.c b/bwtsw2_aux.c index c9a95924..6fd20e8f 100644 --- a/bwtsw2_aux.c +++ b/bwtsw2_aux.c @@ -47,7 +47,7 @@ extern int bsw2_resolve_query_overlaps(bwtsw2_t *b, float mask_level); bsw2opt_t *bsw2_init_opt() { - bsw2opt_t *o = (bsw2opt_t*)calloc(1, sizeof(bsw2opt_t)); + bsw2opt_t *o = (bsw2opt_t*)xcalloc(1, sizeof(bsw2opt_t)); o->a = 1; o->b = 3; o->q = 5; o->r = 2; o->t = 30; o->bw = 50; o->z = 1; o->is = 3; o->t_seeds = 5; o->hard_clip = 0; @@ -86,10 +86,10 @@ void bsw2_extend_left(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *_query, int lq par.matrix = matrix; __gen_ap(par, opt); - query = calloc(lq, 1); + query = xcalloc(lq, 1); // sort according to the descending order of query end ks_introsort(hit, b->n, b->hits); - target = calloc(((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq, 1); + target = xcalloc(((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq, 1); // reverse _query for (i = 0; i < lq; ++i) query[lq - i - 1] = _query[i]; // core loop @@ -137,7 +137,7 @@ void bsw2_extend_rght(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *query, int lq, par.matrix = matrix; __gen_ap(par, opt); - target = calloc(((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq, 1); + target = xcalloc(((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq, 1); for (i = 0; i < b->n; ++i) { bsw2hit_t *p = b->hits + i; int lt = ((lq - p->beg + 1) / 2 * opt->a + opt->r) / opt->r + lq; @@ -174,17 +174,17 @@ static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], uint8_t *pa par.matrix = matrix; __gen_ap(par, opt); i = ((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq; // maximum possible target length - target = calloc(i, 1); - path = calloc(i + lq, sizeof(path_t)); + target = xcalloc(i, 1); + path = xcalloc(i + lq, sizeof(path_t)); // memory clean up for b if (b->n < b->max) { - b->max = b->n; - b->hits = realloc(b->hits, b->n * sizeof(bsw2hit_t)); + b->max = b->n > 0 ? b->n : 1; + b->hits = xrealloc(b->hits, b->max * sizeof(bsw2hit_t)); } if (b->cigar) free(b->cigar); if (b->n_cigar) free(b->n_cigar); - b->cigar = (uint32_t**)calloc(b->max, sizeof(void*)); - b->n_cigar = (int*)calloc(b->max, sizeof(int)); + b->cigar = (uint32_t**)xcalloc(b->max, sizeof(void*)); + b->n_cigar = (int*)xcalloc(b->max, sizeof(int)); // generate CIGAR for (i = 0; i < b->n; ++i) { bsw2hit_t *p = b->hits + i; @@ -200,7 +200,7 @@ static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], uint8_t *pa score = aln_global_core(target, p->len, query, end - beg, &par, path, &path_len); b->cigar[i] = aln_path2cigar32(path, path_len, &b->n_cigar[i]); if (beg != 0 || end < lq) { // write soft clipping - b->cigar[i] = realloc(b->cigar[i], 4 * (b->n_cigar[i] + 2)); + b->cigar[i] = xrealloc(b->cigar[i], 4 * (b->n_cigar[i] + 2)); if (beg != 0) { memmove(b->cigar[i] + 1, b->cigar[i], b->n_cigar[i] * 4); b->cigar[i][0] = beg<<4 | 4; @@ -232,7 +232,7 @@ static void merge_hits(bwtsw2_t *b[2], int l, int is_reverse) int i; if (b[0]->n + b[1]->n > b[0]->max) { b[0]->max = b[0]->n + b[1]->n; - b[0]->hits = realloc(b[0]->hits, b[0]->max * sizeof(bsw2hit_t)); + b[0]->hits = xrealloc(b[0]->hits, b[0]->max * sizeof(bsw2hit_t)); } for (i = 0; i < b[1]->n; ++i) { bsw2hit_t *p = b[0]->hits + b[0]->n + i; @@ -330,7 +330,7 @@ static int fix_cigar(const char *qname, const bntseq_t *bns, bsw2hit_t *p, int n int j, nc, mq[2], nlen[2]; uint32_t *cn, kk = 0; nc = mq[0] = mq[1] = nlen[0] = nlen[1] = 0; - cn = calloc(n_cigar + 3, 4); + cn = xcalloc(n_cigar + 3, 4); x = coor; y = 0; for (i = j = 0; i < n_cigar; ++i) { int op = cigar[i]&0xf, ln = cigar[i]>>4; @@ -474,7 +474,7 @@ static void bsw2_aln_core(int tid, bsw2seq_t *_seq, const bsw2opt_t *_opt, const if (pool->max_l < l) { // then enlarge working space for aln_extend_core() int tmp = ((l + 1) / 2 * opt.a + opt.r) / opt.r + l; pool->max_l = l; - pool->aln_mem = realloc(pool->aln_mem, (tmp + 2) * 24); + pool->aln_mem = xrealloc(pool->aln_mem, (tmp + 2) * 24); } // set opt->bw opt.bw = _opt->bw; @@ -484,7 +484,7 @@ static void bsw2_aln_core(int tid, bsw2seq_t *_seq, const bsw2opt_t *_opt, const if (k < 1) k = 1; // I do not know if k==0 causes troubles opt.bw = _opt->bw < k? _opt->bw : k; // set seq[2] and rseq[2] - seq[0] = calloc(l * 4, 1); + seq[0] = xcalloc(l * 4, 1); seq[1] = seq[0] + l; rseq[0] = seq[1] + l; rseq[1] = rseq[0] + l; // convert sequences to 2-bit representation @@ -563,8 +563,8 @@ static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t * int j; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); - data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t)); - tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t)); + data = (thread_aux_t*)xcalloc(opt->n_threads, sizeof(thread_aux_t)); + tid = (pthread_t*)xcalloc(opt->n_threads, sizeof(pthread_t)); for (j = 0; j < opt->n_threads; ++j) { thread_aux_t *p = data + j; p->tid = j; p->_seq = _seq; p->_opt = opt; p->bns = bns; @@ -598,7 +598,7 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target[2] uint8_t *pac; bsw2seq_t *_seq; - pac = calloc(bns->l_pac/4+1, 1); + pac = xcalloc(bns->l_pac/4+1, 1); if (pac == 0) { fprintf(stderr, "[bsw2_aln] insufficient memory!\n"); return; @@ -608,19 +608,19 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target[2] err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac); fp = xzopen(fn, "r"); ks = kseq_init(fp); - _seq = calloc(1, sizeof(bsw2seq_t)); + _seq = xcalloc(1, sizeof(bsw2seq_t)); while ((l = kseq_read(ks)) >= 0) { bsw2seq1_t *p; if (_seq->n == _seq->max) { _seq->max = _seq->max? _seq->max<<1 : 1024; - _seq->seq = realloc(_seq->seq, _seq->max * sizeof(bsw2seq1_t)); + _seq->seq = xrealloc(_seq->seq, _seq->max * sizeof(bsw2seq1_t)); } p = &_seq->seq[_seq->n++]; p->tid = -1; p->l = l; - p->name = strdup(ks->name.s); - p->seq = strdup(ks->seq.s); - p->qual = ks->qual.l? strdup(ks->qual.s) : 0; + p->name = xstrdup(ks->name.s); + p->seq = xstrdup(ks->seq.s); + p->qual = ks->qual.l? xstrdup(ks->qual.s) : 0; p->sam = 0; size += l; if (size > opt->chunk_size) { diff --git a/bwtsw2_chain.c b/bwtsw2_chain.c index c734657f..45b12c6f 100644 --- a/bwtsw2_chain.c +++ b/bwtsw2_chain.c @@ -1,5 +1,6 @@ #include #include "bwtsw2.h" +#include "utils.h" typedef struct { uint32_t tbeg, tend; @@ -48,9 +49,9 @@ void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2]) char *flag; // initialization n[0] = b[0]->n; n[1] = b[1]->n; - z[0] = calloc(n[0] + n[1], sizeof(hsaip_t)); + z[0] = xcalloc(n[0] + n[1], sizeof(hsaip_t)); z[1] = z[0] + n[0]; - chain[0] = calloc(n[0] + n[1], sizeof(hsaip_t)); + chain[0] = xcalloc(n[0] + n[1], sizeof(hsaip_t)); for (k = j = 0; k < 2; ++k) { for (i = 0; i < b[k]->n; ++i) { bsw2hit_t *p = b[k]->hits + i; @@ -72,7 +73,7 @@ void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2]) p->qbeg = len - p->qend; p->qend = len - tmp; } // filtering - flag = calloc(m[0] + m[1], 1); + flag = xcalloc(m[0] + m[1], 1); ks_introsort(hsaip, m[0] + m[1], chain[0]); for (k = 1; k < m[0] + m[1]; ++k) { hsaip_t *p = chain[0] + k; diff --git a/bwtsw2_core.c b/bwtsw2_core.c index 03360a3a..7434ecf0 100644 --- a/bwtsw2_core.c +++ b/bwtsw2_core.c @@ -6,6 +6,7 @@ #include "bwt_lite.h" #include "bwtsw2.h" #include "bwt.h" +#include "utils.h" #include "kvec.h" #include "khash.h" @@ -63,7 +64,7 @@ typedef struct __mempool_t { inline static bsw2entry_p mp_alloc(mempool_t *mp) { ++mp->cnt; - if (kv_size(mp->pool) == 0) return (bsw2entry_t*)calloc(1, sizeof(bsw2entry_t)); + if (kv_size(mp->pool) == 0) return (bsw2entry_t*)xcalloc(1, sizeof(bsw2entry_t)); else return kv_pop(mp->pool); } inline static void mp_free(mempool_t *mp, bsw2entry_p e) @@ -125,7 +126,7 @@ static void cut_tail(bsw2entry_t *u, int T, bsw2entry_t *aux) if (u->n <= T) return; if (aux->max < u->n) { aux->max = u->n; - aux->array = (bsw2cell_t*)realloc(aux->array, aux->max * sizeof(bsw2cell_t)); + aux->array = (bsw2cell_t*)xrealloc(aux->array, aux->max * sizeof(bsw2cell_t)); } a = (int*)aux->array; for (i = n = 0; i != u->n; ++i) @@ -176,7 +177,7 @@ static void merge_entry(const bsw2opt_t * __restrict opt, bsw2entry_t *u, bsw2en int i; if (u->n + v->n >= u->max) { u->max = u->n + v->n; - u->array = (bsw2cell_t*)realloc(u->array, u->max * sizeof(bsw2cell_t)); + u->array = (bsw2cell_t*)xrealloc(u->array, u->max * sizeof(bsw2cell_t)); } for (i = 0; i != v->n; ++i) { bsw2cell_t *p = v->array + i; @@ -194,7 +195,7 @@ static inline bsw2cell_t *push_array_p(bsw2entry_t *e) { if (e->n == e->max) { e->max = e->max? e->max<<1 : 256; - e->array = (bsw2cell_t*)realloc(e->array, sizeof(bsw2cell_t) * e->max); + e->array = (bsw2cell_t*)xrealloc(e->array, sizeof(bsw2cell_t) * e->max); } return e->array + e->n; } @@ -242,7 +243,7 @@ static void save_narrow_hits(const bwtl_t *bwtl, bsw2entry_t *u, bwtsw2_t *b1, i if (p->G >= t && p->ql - p->qk + 1 <= IS) { // good narrow hit if (b1->max == b1->n) { b1->max = b1->max? b1->max<<1 : 4; - b1->hits = realloc(b1->hits, b1->max * sizeof(bsw2hit_t)); + b1->hits = xrealloc(b1->hits, b1->max * sizeof(bsw2hit_t)); } q = &b1->hits[b1->n++]; q->k = p->qk; q->l = p->ql; @@ -271,7 +272,7 @@ int bsw2_resolve_duphits(const bwt_t *bwt, bwtsw2_t *b, int IS) else if (p->G > 0) ++n; } b->n = b->max = n; - b->hits = calloc(b->max, sizeof(bsw2hit_t)); + b->hits = xcalloc(b->max, sizeof(bsw2hit_t)); for (i = j = 0; i < old_n; ++i) { bsw2hit_t *p = old_hits + i; if (p->l - p->k + 1 <= IS) { @@ -383,9 +384,9 @@ bsw2global_t *bsw2_global_init() { bsw2global_t *pool; bsw2stack_t *stack; - pool = calloc(1, sizeof(bsw2global_t)); - stack = calloc(1, sizeof(bsw2stack_t)); - stack->pool = (mempool_t*)calloc(1, sizeof(mempool_t)); + pool = xcalloc(1, sizeof(bsw2global_t)); + stack = xcalloc(1, sizeof(bsw2stack_t)); + stack->pool = (mempool_t*)xcalloc(1, sizeof(mempool_t)); pool->stack = (void*)stack; return pool; } @@ -444,13 +445,13 @@ bwtsw2_t **bsw2_core(const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *qu rhash = kh_init(64); init_bwtsw2(target, query, stack); heap_size = opt->z; - heap = calloc(heap_size, sizeof(int)); + heap = xcalloc(heap_size, sizeof(int)); // initialize the return struct - b = (bwtsw2_t*)calloc(1, sizeof(bwtsw2_t)); + b = (bwtsw2_t*)xcalloc(1, sizeof(bwtsw2_t)); b->n = b->max = target->seq_len * 2; - b->hits = calloc(b->max, sizeof(bsw2hit_t)); - b1 = (bwtsw2_t*)calloc(1, sizeof(bwtsw2_t)); - b_ret = calloc(2, sizeof(void*)); + b->hits = xcalloc(b->max, sizeof(bsw2hit_t)); + b1 = (bwtsw2_t*)xcalloc(1, sizeof(bwtsw2_t)); + b_ret = xcalloc(2, sizeof(void*)); b_ret[0] = b; b_ret[1] = b1; // initialize timer getrusage(0, &last); diff --git a/cs2nt.c b/cs2nt.c index dfbce601..3084f118 100644 --- a/cs2nt.c +++ b/cs2nt.c @@ -3,6 +3,7 @@ #include #include "bwtaln.h" #include "stdaln.h" +#include "utils.h" /* Here is a delicate example. ref_nt=ATTAAC(RBRBG), read_cs=RBBOG. If we @@ -118,7 +119,7 @@ void bwa_cs2nt_core(bwa_seq_t *p, bwtint_t l_pac, ubyte_t *pac) // set temporary arrays if (p->type == BWA_TYPE_NO_MATCH) return; len = p->len + p->n_gapo + p->n_gape + 100; // leave enough space - ta = (uint8_t*)malloc(len * 7); + ta = (uint8_t*)xmalloc(len * 7); nt_ref = ta; cs_read = nt_ref + len; nt_read = cs_read + len; diff --git a/is.c b/is.c index 9e50fafc..498cd9bc 100644 --- a/is.c +++ b/is.c @@ -25,6 +25,7 @@ */ #include +#include "utils.h" typedef unsigned char ubyte_t; #define chr(i) (cs == sizeof(int) ? ((const int *)T)[i]:((const unsigned char *)T)[i]) @@ -105,7 +106,7 @@ static int sais_main(const unsigned char *T, int *SA, int fs, int n, int k, int if (k <= fs) { C = SA + n; B = (k <= (fs - k)) ? C + k : C; - } else if ((C = B = (int *) malloc(k * sizeof(int))) == NULL) return -2; + } else if ((C = B = (int *) xmalloc(k * sizeof(int))) == NULL) return -2; getCounts(T, C, n, k, cs); getBuckets(C, B, k, 1); /* find ends of buckets */ for (i = 0; i < n; ++i) SA[i] = 0; @@ -163,7 +164,7 @@ static int sais_main(const unsigned char *T, int *SA, int fs, int n, int k, int if (k <= fs) { C = SA + n; B = (k <= (fs - k)) ? C + k : C; - } else if ((C = B = (int *) malloc(k * sizeof(int))) == NULL) return -2; + } else if ((C = B = (int *) xmalloc(k * sizeof(int))) == NULL) return -2; /* put all left-most S characters into their buckets */ getCounts(T, C, n, k, cs); getBuckets(C, B, k, 1); /* find ends of buckets */ @@ -204,7 +205,7 @@ int is_sa(const ubyte_t *T, int *SA, int n) int is_bwt(ubyte_t *T, int n) { int *SA, i, primary = 0; - SA = (int*)calloc(n+1, sizeof(int)); + SA = (int*)xcalloc(n+1, sizeof(int)); is_sa(T, SA, n); for (i = 0; i <= n; ++i) { diff --git a/khash.h b/khash.h index de6be6da..7a4c9acd 100644 --- a/khash.h +++ b/khash.h @@ -147,7 +147,7 @@ static const double __ac_HASH_UPPER = 0.77; khval_t *vals; \ } kh_##name##_t; \ static inline kh_##name##_t *kh_init_##name() { \ - return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \ + return (kh_##name##_t*)xcalloc(1, sizeof(kh_##name##_t)); \ } \ static inline void kh_destroy_##name(kh_##name##_t *h) \ { \ @@ -188,12 +188,12 @@ static const double __ac_HASH_UPPER = 0.77; new_n_buckets = __ac_prime_list[t+1]; \ if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \ else { \ - new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \ + new_flags = (khint32_t*)xmalloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \ memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \ if (h->n_buckets < new_n_buckets) { \ - h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ + h->keys = (khkey_t*)xrealloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ if (kh_is_map) \ - h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + h->vals = (khval_t*)xrealloc(h->vals, new_n_buckets * sizeof(khval_t)); \ } \ } \ } \ @@ -227,9 +227,9 @@ static const double __ac_HASH_UPPER = 0.77; } \ } \ if (h->n_buckets > new_n_buckets) { \ - h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ + h->keys = (khkey_t*)xrealloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ if (kh_is_map) \ - h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + h->vals = (khval_t*)xrealloc(h->vals, new_n_buckets * sizeof(khval_t)); \ } \ free(h->flags); \ h->flags = new_flags; \ diff --git a/kseq.h b/kseq.h index fe7a9984..7e3d8ec4 100644 --- a/kseq.h +++ b/kseq.h @@ -43,9 +43,9 @@ #define __KS_BASIC(type_t, __bufsize) \ static inline kstream_t *ks_init(type_t f) \ { \ - kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \ + kstream_t *ks = (kstream_t*)xcalloc(1, sizeof(kstream_t)); \ ks->f = f; \ - ks->buf = (char*)malloc(__bufsize); \ + ks->buf = (char*)xmalloc(__bufsize); \ return ks; \ } \ static inline void ks_destroy(kstream_t *ks) \ @@ -107,7 +107,7 @@ typedef struct __kstring_t { if (str->m - str->l < i - ks->begin + 1) { \ str->m = str->l + (i - ks->begin) + 1; \ kroundup32(str->m); \ - str->s = (char*)realloc(str->s, str->m); \ + str->s = (char*)xrealloc(str->s, str->m); \ } \ memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \ str->l = str->l + (i - ks->begin); \ @@ -130,7 +130,7 @@ typedef struct __kstring_t { #define __KSEQ_BASIC(type_t) \ static inline kseq_t *kseq_init(type_t fd) \ { \ - kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \ + kseq_t *s = (kseq_t*)xcalloc(1, sizeof(kseq_t)); \ s->f = ks_init(fd); \ return s; \ } \ @@ -170,7 +170,7 @@ typedef struct __kstring_t { if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \ seq->seq.m = seq->seq.l + 2; \ kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \ - seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \ + seq->seq.s = (char*)xrealloc(seq->seq.s, seq->seq.m); \ } \ seq->seq.s[seq->seq.l++] = (char)c; \ } \ @@ -180,7 +180,7 @@ typedef struct __kstring_t { if (c != '+') return seq->seq.l; /* FASTA */ \ if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \ seq->qual.m = seq->seq.m; \ - seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \ + seq->qual.s = (char*)xrealloc(seq->qual.s, seq->qual.m); \ } \ while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \ if (c == -1) return -2; /* we should not stop here */ \ diff --git a/ksort.h b/ksort.h index 8b55c99d..e0025220 100644 --- a/ksort.h +++ b/ksort.h @@ -72,7 +72,7 @@ typedef struct { int curr, shift; \ \ a2[0] = array; \ - a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \ + a2[1] = temp? temp : (type_t*)xmalloc(sizeof(type_t) * n); \ for (curr = 0, shift = 0; (1ul< s->m - s->l) { s->m = s->l + l + 2; kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); + s->s = (char*)xrealloc(s->s, s->m); va_start(ap, fmt); l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); } @@ -26,7 +26,7 @@ int ksprintf(kstring_t *s, const char *fmt, ...) int main() { kstring_t *s; - s = (kstring_t*)calloc(1, sizeof(kstring_t)); + s = (kstring_t*)xcalloc(1, sizeof(kstring_t)); ksprintf(s, "abcdefg: %d", 100); printf("%s\n", s->s); free(s); diff --git a/kstring.h b/kstring.h index 398901f0..88cf93a9 100644 --- a/kstring.h +++ b/kstring.h @@ -3,6 +3,7 @@ #include #include +#include "utils.h" #ifndef kroundup32 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) @@ -22,7 +23,7 @@ static inline int kputs(const char *p, kstring_t *s) if (s->l + l + 1 >= s->m) { s->m = s->l + l + 2; kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); + s->s = (char*)xrealloc(s->s, s->m); } strcpy(s->s + s->l, p); s->l += l; @@ -34,7 +35,7 @@ static inline int kputc(int c, kstring_t *s) if (s->l + 1 >= s->m) { s->m = s->l + 2; kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); + s->s = (char*)xrealloc(s->s, s->m); } s->s[s->l++] = c; s->s[s->l] = 0; diff --git a/kvec.h b/kvec.h index 57204d66..52f0fbbe 100644 --- a/kvec.h +++ b/kvec.h @@ -60,7 +60,7 @@ int main() { #define kv_size(v) ((v).n) #define kv_max(v) ((v).m) -#define kv_resize(type, v, s) ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m)) +#define kv_resize(type, v, s) ((v).m = (s), (v).a = (type*)xrealloc((v).a, sizeof(type) * (v).m)) #define kv_copy(type, v1, v0) do { \ if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n); \ @@ -71,19 +71,19 @@ int main() { #define kv_push(type, v, x) do { \ if ((v).n == (v).m) { \ (v).m = (v).m? (v).m<<1 : 2; \ - (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \ + (v).a = (type*)xrealloc((v).a, sizeof(type) * (v).m); \ } \ (v).a[(v).n++] = (x); \ } while (0) #define kv_pushp(type, v) (((v).n == (v).m)? \ ((v).m = ((v).m? (v).m<<1 : 2), \ - (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ + (v).a = (type*)xrealloc((v).a, sizeof(type) * (v).m), 0) \ : 0), ((v).a + ((v).n++)) #define kv_a(type, v, i) ((v).m <= (size_t)(i)? \ ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \ - (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ + (v).a = (type*)xrealloc((v).a, sizeof(type) * (v).m), 0) \ : (v).n <= (size_t)(i)? (v).n = (i) \ : 0), (v).a[(i)] diff --git a/simple_dp.c b/simple_dp.c index 4663d6ae..4cddb0b4 100644 --- a/simple_dp.c +++ b/simple_dp.c @@ -64,20 +64,20 @@ static seqs_t *load_seqs(const char *fn) fp = xzopen(fn, "r"); seq = kseq_init(fp); - s = (seqs_t*)calloc(1, sizeof(seqs_t)); + s = (seqs_t*)xcalloc(1, sizeof(seqs_t)); s->m_seqs = 256; - s->seqs = (seq1_t*)calloc(s->m_seqs, sizeof(seq1_t)); + s->seqs = (seq1_t*)xcalloc(s->m_seqs, sizeof(seq1_t)); while ((l = kseq_read(seq)) >= 0) { if (s->n_seqs == s->m_seqs) { s->m_seqs <<= 1; - s->seqs = (seq1_t*)realloc(s->seqs, s->m_seqs * sizeof(seq1_t)); + s->seqs = (seq1_t*)xrealloc(s->seqs, s->m_seqs * sizeof(seq1_t)); } p = s->seqs + (s->n_seqs++); p->l = seq->seq.l; - p->s = (unsigned char*)malloc(p->l + 1); + p->s = (unsigned char*)xmalloc(p->l + 1); memcpy(p->s, seq->seq.s, p->l); p->s[p->l] = 0; - p->n = strdup((const char*)seq->name.s); + p->n = xstrdup((const char*)seq->name.s); } kseq_destroy(seq); err_gzclose(fp); diff --git a/stdaln.c b/stdaln.c index 7b55b2e0..90e6b9bf 100644 --- a/stdaln.c +++ b/stdaln.c @@ -28,6 +28,7 @@ #include #include #include "stdaln.h" +#include "utils.h" /* char -> 17 (=16+1) nucleotides */ unsigned char aln_nt16_table[256] = { @@ -232,7 +233,7 @@ AlnParam aln_param_aa2aa = { 10, 2, 2, aln_sm_blosum62, 22, 50 }; AlnAln *aln_init_AlnAln() { AlnAln *aa; - aa = (AlnAln*)malloc(sizeof(AlnAln)); + aa = (AlnAln*)xmalloc(sizeof(AlnAln)); aa->path = 0; aa->out1 = aa->out2 = aa->outm = 0; aa->path_len = 0; @@ -382,13 +383,13 @@ int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2 /* allocate memory */ end = (b1 + b2 <= len1)? (b1 + b2 + 1) : (len1 + 1); - dpcell = (dpcell_t**)malloc(sizeof(dpcell_t*) * (len2 + 1)); + dpcell = (dpcell_t**)xmalloc(sizeof(dpcell_t*) * (len2 + 1)); for (j = 0; j <= len2; ++j) - dpcell[j] = (dpcell_t*)malloc(sizeof(dpcell_t) * end); + dpcell[j] = (dpcell_t*)xmalloc(sizeof(dpcell_t) * end); for (j = b2 + 1; j <= len2; ++j) dpcell[j] -= j - b2; - curr = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1)); - last = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1)); + curr = (dpscore_t*)xmalloc(sizeof(dpscore_t) * (len1 + 1)); + last = (dpscore_t*)xmalloc(sizeof(dpscore_t) * (len1 + 1)); /* set first row */ SET_INF(*curr); curr->M = 0; @@ -556,11 +557,11 @@ int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, if (len1 == 0 || len2 == 0) return -1; /* allocate memory */ - suba = (int*)malloc(sizeof(int) * (len2 + 1)); - eh = (NT_LOCAL_SCORE*)malloc(sizeof(NT_LOCAL_SCORE) * (len1 + 1)); - s_array = (int**)malloc(sizeof(int*) * N_MATRIX_ROW); + suba = (int*)xmalloc(sizeof(int) * (len2 + 1)); + eh = (NT_LOCAL_SCORE*)xmalloc(sizeof(NT_LOCAL_SCORE) * (len1 + 1)); + s_array = (int**)xmalloc(sizeof(int*) * N_MATRIX_ROW); for (i = 0; i != N_MATRIX_ROW; ++i) - s_array[i] = (int*)malloc(sizeof(int) * len1); + s_array[i] = (int*)xmalloc(sizeof(int) * len1); /* initialization */ aln_init_score_array(seq1, len1, N_MATRIX_ROW, score_matrix, s_array); q = gap_open; @@ -773,9 +774,9 @@ AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap, if (len2 < 0) len2 = strlen(seq2); aa = aln_init_AlnAln(); - seq11 = (unsigned char*)malloc(sizeof(unsigned char) * len1); - seq22 = (unsigned char*)malloc(sizeof(unsigned char) * len2); - aa->path = (path_t*)malloc(sizeof(path_t) * (len1 + len2 + 1)); + seq11 = (unsigned char*)xmalloc(sizeof(unsigned char) * len1); + seq22 = (unsigned char*)xmalloc(sizeof(unsigned char) * len2); + aa->path = (path_t*)xmalloc(sizeof(path_t) * (len1 + len2 + 1)); if (ap->row < 10) { /* 4-nucleotide alignment */ for (i = 0; i < len1; ++i) @@ -805,9 +806,9 @@ AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap, aa->score = score; if (thres > 0) { - out1 = aa->out1 = (char*)malloc(sizeof(char) * (aa->path_len + 1)); - out2 = aa->out2 = (char*)malloc(sizeof(char) * (aa->path_len + 1)); - outm = aa->outm = (char*)malloc(sizeof(char) * (aa->path_len + 1)); + out1 = aa->out1 = (char*)xmalloc(sizeof(char) * (aa->path_len + 1)); + out2 = aa->out2 = (char*)xmalloc(sizeof(char) * (aa->path_len + 1)); + outm = aa->outm = (char*)xmalloc(sizeof(char) * (aa->path_len + 1)); --seq1; --seq2; --seq11; --seq22; @@ -881,10 +882,10 @@ int aln_extend_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2 if (len1 == 0 || len2 == 0) return -1; /* allocate memory */ - mem = _mem? _mem : calloc((len1 + 2) * (N_MATRIX_ROW + 1), 4); + mem = _mem? _mem : xcalloc((len1 + 2) * (N_MATRIX_ROW + 1), 4); _p = mem; eh = (uint32_t*)_p, _p += 4 * (len1 + 2); - s_array = calloc(N_MATRIX_ROW, sizeof(void*)); + s_array = xcalloc(N_MATRIX_ROW, sizeof(void*)); for (i = 0; i != N_MATRIX_ROW; ++i) s_array[i] = (int32_t*)_p, _p += 4 * len1; /* initialization */ @@ -1024,7 +1025,7 @@ uint32_t *aln_path2cigar32(const path_t *path, int path_len, int *n_cigar) last_type = path[i].ctype; } *n_cigar = n; - cigar = (uint32_t*)malloc(*n_cigar * 4); + cigar = (uint32_t*)xmalloc(*n_cigar * 4); cigar[0] = 1u << 4 | path[path_len-1].ctype; last_type = path[path_len-1].ctype; From 8348c2968f81404708cc84406bd0b2d8ed1ca5c1 Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Wed, 9 Jan 2013 14:43:36 +0000 Subject: [PATCH 4/4] Added wrappers for fputc and fputs; more efficient sequence printing Added wrappers err_fputc and err_fputs to catch failures in fput and fputs. Macros err_putchar and err_puts call the new wrappers and can be used in place of putchar and puts. To avoid having to make millions of function calls when printing out sequences, the code to print them in bwa_print_sam1 using putchar has been replaced by a new version in bwa_print_seq that puts the sequence into a buffer and then outputs the lot with err_fwrite. In testing, the new code was slightly faster than the old version, with the added benefit that it will stop promptly if IO problems are detected. --- bwase.c | 54 +++++++++++++++++++++++++++++------------------------- utils.c | 22 ++++++++++++++++++++++ utils.h | 8 ++++++-- 3 files changed, 57 insertions(+), 27 deletions(-) diff --git a/bwase.c b/bwase.c index 0c6b63e0..8a1f00c7 100644 --- a/bwase.c +++ b/bwase.c @@ -625,6 +625,26 @@ static int64_t pos_5(const bwa_seq_t *p) return -1; } +void bwa_print_seq(FILE *stream, bwa_seq_t *seq) { + char buffer[4096]; + const int bsz = sizeof(buffer); + int i, j, l; + + if (seq->strand == 0) { + for (i = 0; i < seq->full_len; i += bsz) { + l = seq->full_len - i > bsz ? bsz : seq->full_len - i; + for (j = 0; j < l; j++) buffer[j] = "ACGTN"[seq->seq[i + j]]; + err_fwrite(buffer, 1, l, stream); + } + } else { + for (i = seq->full_len - 1; i >= 0; i -= bsz) { + l = i + 1 > bsz ? bsz : i + 1; + for (j = 0; j < l; j++) buffer[j] = "TGCAN"[seq->seq[i - j]]; + err_fwrite(buffer, 1, l, stream); + } + } +} + void bwa_print_sam1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2) { int j; @@ -686,18 +706,8 @@ void bwa_print_sam1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, i } // print sequence and quality - if (p->strand == 0) { - for (j = 0; j != p->full_len; ++j) { - putchar("ACGTN"[(int)p->seq[j]]); - } - } else { - for (j = 0; j != p->full_len; ++j) { - putchar("TGCAN"[p->seq[p->full_len - 1 - j]]); - } - } - - putchar('\t'); - + bwa_print_seq(stdout, p); + err_putchar('\t'); if (p->qual) { if (p->strand) { seq_reverse(p->len, p->qual, 0); // reverse quality @@ -749,23 +759,17 @@ void bwa_print_sam1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, i } } } - - putchar('\n'); - + err_putchar('\n'); } else { // this read has no match - - ubyte_t *s = p->strand? p->rseq : p->seq; + //ubyte_t *s = p->strand? p->rseq : p->seq; int flag = p->extra_flag | SAM_FSU; if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU; err_printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag); - - for (j = 0; j != p->len; ++j) { - putchar("ACGTN"[(int)s[j]]); - } - - putchar('\t'); - + //Why did this work differently to the version above?? + //for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]); + bwa_print_seq(stdout, p); + err_putchar('\t'); if (p->qual) { if (p->strand) { seq_reverse(p->len, p->qual, 0); // reverse quality @@ -785,7 +789,7 @@ void bwa_print_sam1a(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, i err_printf("\tXC:i:%d", p->clip_len); } - putchar('\n'); + err_putchar('\n'); } return; diff --git a/utils.c b/utils.c index 430dbab0..82e64471 100644 --- a/utils.c +++ b/utils.c @@ -199,6 +199,28 @@ int err_fprintf(FILE *stream, const char *format, ...) return done; } +int err_fputc(int c, FILE *stream) +{ + int ret = putc(c, stream); + if (EOF == ret) + { + _err_fatal_simple("fputc", strerror(errno)); + } + + return ret; +} + +int err_fputs(const char *s, FILE *stream) +{ + int ret = fputs(s, stream); + if (EOF == ret) + { + _err_fatal_simple("fputs", strerror(errno)); + } + + return ret; +} + int err_fflush(FILE *stream) { int ret = fflush(stream); diff --git a/utils.h b/utils.h index 369e3ea9..cca72c54 100644 --- a/utils.h +++ b/utils.h @@ -71,8 +71,12 @@ extern "C" { ATTRIBUTE((format(printf, 2, 3))); int err_printf(const char *format, ...) ATTRIBUTE((format(printf, 1, 2))); - int err_fflush(FILE *stream); - int err_fclose(FILE *stream); + int err_fputc(int c, FILE *stream); +#define err_putchar(C) err_fputc((C), stdout) + int err_fputs(const char *s, FILE *stream); +#define err_puts(S) err_fputs((S), stdout) + int err_fflush(FILE *stream); + int err_fclose(FILE *stream); int err_gzclose(gzFile file); void *err_calloc(size_t nmemb, size_t size, const char *file, unsigned int line, const char *func); void *err_malloc(size_t size, const char *file, unsigned int line, const char *func);