From 87a5512e31b1f62d30fd2b0d9e80b6ede7e4963a Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 11 Jun 2012 15:54:49 +0100 Subject: [PATCH 1/3] always increase the size of the sequence buffer --- src/alignment_file.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/alignment_file.c b/src/alignment_file.c index f4c0f40e..582cdb76 100644 --- a/src/alignment_file.c +++ b/src/alignment_file.c @@ -253,10 +253,7 @@ int read_line(char sequence[], FILE * pFilePtr) while((pcRes = fgets(current_line_buffer, sizeof(current_line_buffer), pFilePtr)) != NULL){ - if( strlen(current_line_buffer) > MAX_READ_BUFFER - 10 ) - { - realloc(sequence, strlen(sequence) + MAX_READ_BUFFER + 10 ); - } + sequence = realloc(sequence, strlen(sequence) + strlen(current_line_buffer) + 2 ); strcat(sequence, current_line_buffer); strcpy(current_line_buffer, ""); From 3bf9039f8f48e1c5c72b99835565b80e162ebcd7 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Mon, 11 Jun 2012 16:47:19 +0100 Subject: [PATCH 2/3] use new memory location of read line --- src/alignment_file.c | 8 ++++---- src/alignment_file.h | 2 +- src/parse_phylip.c | 4 ++-- src/parse_vcf.c | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/alignment_file.c b/src/alignment_file.c index 582cdb76..8c4a9f38 100644 --- a/src/alignment_file.c +++ b/src/alignment_file.c @@ -244,11 +244,11 @@ int read_first_few_characters_of_line(char sequence[], FILE * pFilePtr, int max_ -int read_line(char sequence[], FILE * pFilePtr) +char * read_line(char sequence[], FILE * pFilePtr) { - + strcpy(sequence, ""); char *pcRes = NULL; - int lineLength = 0; + long lineLength = 0; char current_line_buffer[MAX_READ_BUFFER] = {0}; @@ -266,7 +266,7 @@ int read_line(char sequence[], FILE * pFilePtr) } - return 1; + return sequence; } diff --git a/src/alignment_file.h b/src/alignment_file.h index 261b1c6b..cf406242 100644 --- a/src/alignment_file.h +++ b/src/alignment_file.h @@ -28,7 +28,7 @@ int build_reference_sequence(char reference_sequence[], char filename[]); void advance_to_sequence(FILE * alignment_file_pointer); void advance_to_sequence_name(FILE * alignment_file_pointer); int genome_length(char filename[]); -int read_line(char sequence[], FILE * pFilePtr); +char * read_line(char sequence[], FILE * pFilePtr); int number_of_sequences_in_file(char filename[]); void get_sample_names_for_header(char filename[], char ** sequence_names, int number_of_samples); char filter_invalid_characters(char input_char); diff --git a/src/parse_phylip.c b/src/parse_phylip.c index 67d7700d..b3c8616d 100644 --- a/src/parse_phylip.c +++ b/src/parse_phylip.c @@ -267,7 +267,7 @@ void load_sequences_from_phylib(FILE * phylip_file_pointer) // The first line contains the number of samples and snps strcpy(line_buffer,""); - read_line(line_buffer, phylip_file_pointer); + line_buffer = read_line(line_buffer, phylip_file_pointer); num_samples = get_number_of_samples_from_phylip(line_buffer); num_snps = get_number_of_snps_from_phylip(line_buffer); @@ -286,7 +286,7 @@ void load_sequences_from_phylib(FILE * phylip_file_pointer) do{ strcpy(line_buffer,""); - read_line(line_buffer, phylip_file_pointer); + line_buffer = read_line(line_buffer, phylip_file_pointer); if(line_buffer[0] == '\0') { diff --git a/src/parse_vcf.c b/src/parse_vcf.c index 79c2fa6a..504df13e 100644 --- a/src/parse_vcf.c +++ b/src/parse_vcf.c @@ -175,7 +175,7 @@ int get_number_of_columns_from_file(FILE * vcf_file_pointer) do{ strcpy(szBuffer,""); // check the first character of the line to see if its in the header - read_line(szBuffer, vcf_file_pointer); + szBuffer = read_line(szBuffer, vcf_file_pointer); if(szBuffer[0] == '\0' || szBuffer[0] != '#') { break; @@ -204,7 +204,7 @@ void get_column_names(FILE * vcf_file_pointer, char ** column_names, int number_ do{ strcpy(szBuffer,""); // check the first character of the line to see if its in the header - read_line(szBuffer, vcf_file_pointer); + szBuffer = read_line(szBuffer, vcf_file_pointer); if(szBuffer[0] == '\0' || szBuffer[0] != '#') { From da27ac7aab4c122de3ca1754c49c0478782fd892 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Tue, 12 Jun 2012 09:21:01 +0100 Subject: [PATCH 3/3] dont keep extending sequence buffer --- src/alignment_file.c | 6 ++++-- src/alignment_file.h | 2 +- src/parse_phylip.c | 5 ++++- src/parse_phylip.h | 2 +- src/parse_vcf.c | 3 +++ src/parse_vcf.h | 2 +- 6 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/alignment_file.c b/src/alignment_file.c index 8c4a9f38..6b60062d 100644 --- a/src/alignment_file.c +++ b/src/alignment_file.c @@ -253,8 +253,10 @@ char * read_line(char sequence[], FILE * pFilePtr) while((pcRes = fgets(current_line_buffer, sizeof(current_line_buffer), pFilePtr)) != NULL){ - sequence = realloc(sequence, strlen(sequence) + strlen(current_line_buffer) + 2 ); - + if(strlen(sequence) > 0) + { + sequence = realloc(sequence, strlen(sequence) + strlen(current_line_buffer) + 2 ); + } strcat(sequence, current_line_buffer); strcpy(current_line_buffer, ""); lineLength = strlen(sequence) - 1; diff --git a/src/alignment_file.h b/src/alignment_file.h index cf406242..567f9e4b 100644 --- a/src/alignment_file.h +++ b/src/alignment_file.h @@ -36,7 +36,7 @@ void get_bases_for_each_snp(char filename[], int snp_locations[], char ** bases_ int read_first_few_characters_of_line(char sequence[], FILE * pFilePtr, int max_characters); -#define MAX_READ_BUFFER 262144 +#define MAX_READ_BUFFER 65536 #define MAX_READ_BUFFER_SMALL 1024 #define MAX_SAMPLE_NAME_SIZE 1024 diff --git a/src/parse_phylip.c b/src/parse_phylip.c index b3c8616d..63b16f1f 100644 --- a/src/parse_phylip.c +++ b/src/parse_phylip.c @@ -286,6 +286,9 @@ void load_sequences_from_phylib(FILE * phylip_file_pointer) do{ strcpy(line_buffer,""); + free(line_buffer); + line_buffer = (char *) malloc(MAX_READ_BUFFER*sizeof(char)); + line_buffer = read_line(line_buffer, phylip_file_pointer); if(line_buffer[0] == '\0') @@ -325,7 +328,7 @@ void load_sequences_from_phylib(FILE * phylip_file_pointer) sample_counter++; }while(line_buffer[0] != '\0'); - + free(line_buffer); initialise_statistics(); } diff --git a/src/parse_phylip.h b/src/parse_phylip.h index db79f9a7..6d4d605d 100644 --- a/src/parse_phylip.h +++ b/src/parse_phylip.h @@ -49,7 +49,7 @@ void set_number_of_blocks_for_sample(char * sample_name,int num_blocks); sample_statistics ** get_sample_statistics(); int number_of_snps_in_phylib(); -#define MAX_READ_BUFFER 262144 +#define MAX_READ_BUFFER 65536 #define MAX_SAMPLE_NAME_SIZE 1024 diff --git a/src/parse_vcf.c b/src/parse_vcf.c index 504df13e..97f1c03e 100644 --- a/src/parse_vcf.c +++ b/src/parse_vcf.c @@ -204,6 +204,8 @@ void get_column_names(FILE * vcf_file_pointer, char ** column_names, int number_ do{ strcpy(szBuffer,""); // check the first character of the line to see if its in the header + free(szBuffer); + szBuffer = (char *) malloc(MAX_READ_BUFFER*sizeof(char)); szBuffer = read_line(szBuffer, vcf_file_pointer); if(szBuffer[0] == '\0' || szBuffer[0] != '#') @@ -223,6 +225,7 @@ void get_column_names(FILE * vcf_file_pointer, char ** column_names, int number_ } }while(szBuffer[0] != '\0'); + free(szBuffer); } // Assume the sample names are unique diff --git a/src/parse_vcf.h b/src/parse_vcf.h index d14b41a4..c0ad4d73 100644 --- a/src/parse_vcf.h +++ b/src/parse_vcf.h @@ -31,6 +31,6 @@ void get_column_names(FILE * vcf_file_pointer, char ** column_names, int number_ int column_number_for_column_name(char ** column_names, char * column_name, int number_of_columns); void get_integers_from_column_in_vcf(FILE * vcf_file_pointer, int * integer_values, int number_of_snps, int column_number); -#define MAX_READ_BUFFER 262144 +#define MAX_READ_BUFFER 65536 #endif \ No newline at end of file