Skip to content

Commit

Permalink
Improve performance of sdssplitargs
Browse files Browse the repository at this point in the history
Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
  • Loading branch information
madolson committed Oct 25, 2024
1 parent b77440a commit a7bc86b
Show file tree
Hide file tree
Showing 3 changed files with 354 additions and 82 deletions.
181 changes: 100 additions & 81 deletions src/sds.c
Original file line number Diff line number Diff line change
Expand Up @@ -1032,6 +1032,86 @@ int hex_digit_to_int(char c) {
}
}

/* Helper function for sdssplitargs that parses a single argument. It
* populates either the number characters needed to store the parsed argument
* in len, if provided, or will copy the parsed string into dst, if provided.
* If the string is able to be parsed, this function returns the number of
* characters that were parsed. If the argument can't be parsed, it
* returns 0. */
static int sdsparsearg(const char *arg, unsigned int *len, char *dst) {
const char *p = arg;
int inq = 0; /* set to 1 if we are in "quotes" */
int insq = 0; /* set to 1 if we are in 'single quotes' */
int done = 0;

while (!done) {
char new_char = 0;
if (inq) {
if (*p == '\\' && *(p + 1) == 'x' && is_hex_digit(*(p + 2)) && is_hex_digit(*(p + 3))) {
new_char = (hex_digit_to_int(*(p + 2)) * 16) + hex_digit_to_int(*(p + 3));
p += 3;
} else if (*p == '\\' && *(p + 1)) {
p++;
switch (*p) {
case 'n': new_char = '\n'; break;
case 'r': new_char = '\r'; break;
case 't': new_char = '\t'; break;
case 'b': new_char = '\b'; break;
case 'a': new_char = '\a'; break;
default: new_char = *p; break;
}
} else if (*p == '"') {
/* closing quote must be followed by a space or
* nothing at all. */
if (*(p + 1) && !isspace(*(p + 1))) return 0;
done = 1;
} else if (!*p) {
/* unterminated quotes */
return 0;
} else {
new_char = *p;
}
} else if (insq) {
if (*p == '\\' && *(p + 1) == '\'') {
p++;
new_char = *p;
} else if (*p == '\'') {
/* closing quote must be followed by a space or
* nothing at all. */
if (*(p + 1) && !isspace(*(p + 1))) return 0;
done = 1;
} else if (!*p) {
/* unterminated quotes */
return 0;
} else {
new_char = *p;
}
} else {
switch (*p) {
case ' ':
case '\n':
case '\r':
case '\t':
case '\0': done = 1; break;
case '"': inq = 1; break;
case '\'': insq = 1; break;
default: new_char = *p; break;
}
}
if (new_char) {
if (len) (*len)++;
if (dst) {
*dst = new_char;
dst++;
}
}
if (*p) {
p++;
}
}
return p - arg;
}

/* Split a line into arguments, where every argument can be in the
* following programming-language REPL-alike form:
*
Expand All @@ -1049,103 +1129,42 @@ int hex_digit_to_int(char c) {
* The function returns the allocated tokens on success, even when the
* input string is empty, or NULL if the input contains unbalanced
* quotes or closed quotes followed by non space characters
* as in: "foo"bar or "foo'
* as in: "foo"bar or "foo'.
*
* The sds strings returned by this function are not initialized with
* extra space.
*/
sds *sdssplitargs(const char *line, int *argc) {
const char *p = line;
char *current = NULL;
char **vector = NULL;

*argc = 0;
while (1) {
while (*p) {
/* skip blanks */
while (*p && isspace(*p)) p++;
if (*p) {
/* get a token */
int inq = 0; /* set to 1 if we are in "quotes" */
int insq = 0; /* set to 1 if we are in 'single quotes' */
int done = 0;

if (current == NULL) current = sdsempty();
while (!done) {
if (inq) {
if (*p == '\\' && *(p + 1) == 'x' && is_hex_digit(*(p + 2)) && is_hex_digit(*(p + 3))) {
unsigned char byte;

byte = (hex_digit_to_int(*(p + 2)) * 16) + hex_digit_to_int(*(p + 3));
current = sdscatlen(current, (char *)&byte, 1);
p += 3;
} else if (*p == '\\' && *(p + 1)) {
char c;

p++;
switch (*p) {
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'b': c = '\b'; break;
case 'a': c = '\a'; break;
default: c = *p; break;
}
current = sdscatlen(current, &c, 1);
} else if (*p == '"') {
/* closing quote must be followed by a space or
* nothing at all. */
if (*(p + 1) && !isspace(*(p + 1))) goto err;
done = 1;
} else if (!*p) {
/* unterminated quotes */
goto err;
} else {
current = sdscatlen(current, p, 1);
}
} else if (insq) {
if (*p == '\\' && *(p + 1) == '\'') {
p++;
current = sdscatlen(current, "'", 1);
} else if (*p == '\'') {
/* closing quote must be followed by a space or
* nothing at all. */
if (*(p + 1) && !isspace(*(p + 1))) goto err;
done = 1;
} else if (!*p) {
/* unterminated quotes */
goto err;
} else {
current = sdscatlen(current, p, 1);
}
} else {
switch (*p) {
case ' ':
case '\n':
case '\r':
case '\t':
case '\0': done = 1; break;
case '"': inq = 1; break;
case '\'': insq = 1; break;
default: current = sdscatlen(current, p, 1); break;
}
}
if (*p) p++;
}
if (!(*p)) break;
unsigned int len = 0;
if (sdsparsearg(p, &len, NULL)) {
sds current = sdsnewlen(SDS_NOINIT, len);
int parsedlen = sdsparsearg(p, NULL, current);
assert(parsedlen > 0);
p += parsedlen;

/* add the token to the vector */
vector = s_realloc(vector, ((*argc) + 1) * sizeof(char *));
vector[*argc] = current;
(*argc)++;
current = NULL;
} else {
/* Even on empty input string return something not NULL. */
if (vector == NULL) vector = s_malloc(sizeof(void *));
return vector;
while ((*argc)--) sdsfree(vector[*argc]);
s_free(vector);
*argc = 0;
return NULL;
}
}

err:
while ((*argc)--) sdsfree(vector[*argc]);
s_free(vector);
if (current) sdsfree(current);
*argc = 0;
return NULL;
/* Even on empty input string return something not NULL. */
if (vector == NULL) vector = s_malloc(sizeof(void *));
return vector;
}

/* Modify the string substituting all the occurrences of the set of
Expand Down
4 changes: 3 additions & 1 deletion src/unit/test_files.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ int test_raxFuzz(int argc, char **argv, int flags);
int test_sds(int argc, char **argv, int flags);
int test_typesAndAllocSize(int argc, char **argv, int flags);
int test_sdsHeaderSizes(int argc, char **argv, int flags);
int test_sdssplitargs(int argc, char **argv, int flags);
int test_sdssplitargs_benchmark(int argc, char **argv, int flags);
int test_sha1(int argc, char **argv, int flags);
int test_string2ll(int argc, char **argv, int flags);
int test_string2l(int argc, char **argv, int flags);
Expand Down Expand Up @@ -157,7 +159,7 @@ unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEnco
unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict}, {NULL, NULL}};
unitTest __test_listpack_c[] = {{"test_listpackCreateIntList", test_listpackCreateIntList}, {"test_listpackCreateList", test_listpackCreateList}, {"test_listpackLpPrepend", test_listpackLpPrepend}, {"test_listpackLpPrependInteger", test_listpackLpPrependInteger}, {"test_listpackGetELementAtIndex", test_listpackGetELementAtIndex}, {"test_listpackPop", test_listpackPop}, {"test_listpackGetELementAtIndex2", test_listpackGetELementAtIndex2}, {"test_listpackIterate0toEnd", test_listpackIterate0toEnd}, {"test_listpackIterate1toEnd", test_listpackIterate1toEnd}, {"test_listpackIterate2toEnd", test_listpackIterate2toEnd}, {"test_listpackIterateBackToFront", test_listpackIterateBackToFront}, {"test_listpackIterateBackToFrontWithDelete", test_listpackIterateBackToFrontWithDelete}, {"test_listpackDeleteWhenNumIsMinusOne", test_listpackDeleteWhenNumIsMinusOne}, {"test_listpackDeleteWithNegativeIndex", test_listpackDeleteWithNegativeIndex}, {"test_listpackDeleteInclusiveRange0_0", test_listpackDeleteInclusiveRange0_0}, {"test_listpackDeleteInclusiveRange0_1", test_listpackDeleteInclusiveRange0_1}, {"test_listpackDeleteInclusiveRange1_2", test_listpackDeleteInclusiveRange1_2}, {"test_listpackDeleteWitStartIndexOutOfRange", test_listpackDeleteWitStartIndexOutOfRange}, {"test_listpackDeleteWitNumOverflow", test_listpackDeleteWitNumOverflow}, {"test_listpackBatchDelete", test_listpackBatchDelete}, {"test_listpackDeleteFooWhileIterating", test_listpackDeleteFooWhileIterating}, {"test_listpackReplaceWithSameSize", test_listpackReplaceWithSameSize}, {"test_listpackReplaceWithDifferentSize", test_listpackReplaceWithDifferentSize}, {"test_listpackRegressionGt255Bytes", test_listpackRegressionGt255Bytes}, {"test_listpackCreateLongListAndCheckIndices", test_listpackCreateLongListAndCheckIndices}, {"test_listpackCompareStrsWithLpEntries", test_listpackCompareStrsWithLpEntries}, {"test_listpackLpMergeEmptyLps", test_listpackLpMergeEmptyLps}, {"test_listpackLpMergeLp1Larger", test_listpackLpMergeLp1Larger}, {"test_listpackLpMergeLp2Larger", test_listpackLpMergeLp2Larger}, {"test_listpackLpNextRandom", test_listpackLpNextRandom}, {"test_listpackLpNextRandomCC", test_listpackLpNextRandomCC}, {"test_listpackRandomPairWithOneElement", test_listpackRandomPairWithOneElement}, {"test_listpackRandomPairWithManyElements", test_listpackRandomPairWithManyElements}, {"test_listpackRandomPairsWithOneElement", test_listpackRandomPairsWithOneElement}, {"test_listpackRandomPairsWithManyElements", test_listpackRandomPairsWithManyElements}, {"test_listpackRandomPairsUniqueWithOneElement", test_listpackRandomPairsUniqueWithOneElement}, {"test_listpackRandomPairsUniqueWithManyElements", test_listpackRandomPairsUniqueWithManyElements}, {"test_listpackPushVariousEncodings", test_listpackPushVariousEncodings}, {"test_listpackLpFind", test_listpackLpFind}, {"test_listpackLpValidateIntegrity", test_listpackLpValidateIntegrity}, {"test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN", test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN}, {"test_listpackStressWithRandom", test_listpackStressWithRandom}, {"test_listpackSTressWithVariableSize", test_listpackSTressWithVariableSize}, {"test_listpackBenchmarkInit", test_listpackBenchmarkInit}, {"test_listpackBenchmarkLpAppend", test_listpackBenchmarkLpAppend}, {"test_listpackBenchmarkLpFindString", test_listpackBenchmarkLpFindString}, {"test_listpackBenchmarkLpFindNumber", test_listpackBenchmarkLpFindNumber}, {"test_listpackBenchmarkLpSeek", test_listpackBenchmarkLpSeek}, {"test_listpackBenchmarkLpValidateIntegrity", test_listpackBenchmarkLpValidateIntegrity}, {"test_listpackBenchmarkLpCompareWithString", test_listpackBenchmarkLpCompareWithString}, {"test_listpackBenchmarkLpCompareWithNumber", test_listpackBenchmarkLpCompareWithNumber}, {"test_listpackBenchmarkFree", test_listpackBenchmarkFree}, {NULL, NULL}};
unitTest __test_rax_c[] = {{"test_raxRandomWalk", test_raxRandomWalk}, {"test_raxIteratorUnitTests", test_raxIteratorUnitTests}, {"test_raxTryInsertUnitTests", test_raxTryInsertUnitTests}, {"test_raxRegressionTest1", test_raxRegressionTest1}, {"test_raxRegressionTest2", test_raxRegressionTest2}, {"test_raxRegressionTest3", test_raxRegressionTest3}, {"test_raxRegressionTest4", test_raxRegressionTest4}, {"test_raxRegressionTest5", test_raxRegressionTest5}, {"test_raxRegressionTest6", test_raxRegressionTest6}, {"test_raxBenchmark", test_raxBenchmark}, {"test_raxHugeKey", test_raxHugeKey}, {"test_raxFuzz", test_raxFuzz}, {NULL, NULL}};
unitTest __test_sds_c[] = {{"test_sds", test_sds}, {"test_typesAndAllocSize", test_typesAndAllocSize}, {"test_sdsHeaderSizes", test_sdsHeaderSizes}, {NULL, NULL}};
unitTest __test_sds_c[] = {{"test_sds", test_sds}, {"test_typesAndAllocSize", test_typesAndAllocSize}, {"test_sdsHeaderSizes", test_sdsHeaderSizes}, {"test_sdssplitargs", test_sdssplitargs}, {"test_sdssplitargs_benchmark", test_sdssplitargs_benchmark}, {NULL, NULL}};
unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}};
unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {NULL, NULL}};
unitTest __test_ziplist_c[] = {{"test_ziplistCreateIntList", test_ziplistCreateIntList}, {"test_ziplistPop", test_ziplistPop}, {"test_ziplistGetElementAtIndex3", test_ziplistGetElementAtIndex3}, {"test_ziplistGetElementOutOfRange", test_ziplistGetElementOutOfRange}, {"test_ziplistGetLastElement", test_ziplistGetLastElement}, {"test_ziplistGetFirstElement", test_ziplistGetFirstElement}, {"test_ziplistGetElementOutOfRangeReverse", test_ziplistGetElementOutOfRangeReverse}, {"test_ziplistIterateThroughFullList", test_ziplistIterateThroughFullList}, {"test_ziplistIterateThroughListFrom1ToEnd", test_ziplistIterateThroughListFrom1ToEnd}, {"test_ziplistIterateThroughListFrom2ToEnd", test_ziplistIterateThroughListFrom2ToEnd}, {"test_ziplistIterateThroughStartOutOfRange", test_ziplistIterateThroughStartOutOfRange}, {"test_ziplistIterateBackToFront", test_ziplistIterateBackToFront}, {"test_ziplistIterateBackToFrontDeletingAllItems", test_ziplistIterateBackToFrontDeletingAllItems}, {"test_ziplistDeleteInclusiveRange0To0", test_ziplistDeleteInclusiveRange0To0}, {"test_ziplistDeleteInclusiveRange0To1", test_ziplistDeleteInclusiveRange0To1}, {"test_ziplistDeleteInclusiveRange1To2", test_ziplistDeleteInclusiveRange1To2}, {"test_ziplistDeleteWithStartIndexOutOfRange", test_ziplistDeleteWithStartIndexOutOfRange}, {"test_ziplistDeleteWithNumOverflow", test_ziplistDeleteWithNumOverflow}, {"test_ziplistDeleteFooWhileIterating", test_ziplistDeleteFooWhileIterating}, {"test_ziplistReplaceWithSameSize", test_ziplistReplaceWithSameSize}, {"test_ziplistReplaceWithDifferentSize", test_ziplistReplaceWithDifferentSize}, {"test_ziplistRegressionTestForOver255ByteStrings", test_ziplistRegressionTestForOver255ByteStrings}, {"test_ziplistRegressionTestDeleteNextToLastEntries", test_ziplistRegressionTestDeleteNextToLastEntries}, {"test_ziplistCreateLongListAndCheckIndices", test_ziplistCreateLongListAndCheckIndices}, {"test_ziplistCompareStringWithZiplistEntries", test_ziplistCompareStringWithZiplistEntries}, {"test_ziplistMergeTest", test_ziplistMergeTest}, {"test_ziplistStressWithRandomPayloadsOfDifferentEncoding", test_ziplistStressWithRandomPayloadsOfDifferentEncoding}, {"test_ziplistCascadeUpdateEdgeCases", test_ziplistCascadeUpdateEdgeCases}, {"test_ziplistInsertEdgeCase", test_ziplistInsertEdgeCase}, {"test_ziplistStressWithVariableSize", test_ziplistStressWithVariableSize}, {"test_BenchmarkziplistFind", test_BenchmarkziplistFind}, {"test_BenchmarkziplistIndex", test_BenchmarkziplistIndex}, {"test_BenchmarkziplistValidateIntegrity", test_BenchmarkziplistValidateIntegrity}, {"test_BenchmarkziplistCompareWithString", test_BenchmarkziplistCompareWithString}, {"test_BenchmarkziplistCompareWithNumber", test_BenchmarkziplistCompareWithNumber}, {"test_ziplistStress__ziplistCascadeUpdate", test_ziplistStress__ziplistCascadeUpdate}, {NULL, NULL}};
Expand Down
Loading

0 comments on commit a7bc86b

Please sign in to comment.