diff --git a/src/builtin.c b/src/builtin.c index 3e99c37615..8e1de2b56f 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -1201,7 +1201,28 @@ static jv f_string_implode(jq_state *jq, jv a) { if (jv_get_kind(a) != JV_KIND_ARRAY) { return ret_error(a, jv_string("implode input must be an array")); } - return jv_string_implode(a); + + int len = jv_array_length(jv_copy(a)); + jv s = jv_string_empty(len); + + for (int i = 0; i < len; i++) { + jv n = jv_array_get(jv_copy(a), i); + if (jv_get_kind(n) != JV_KIND_NUMBER || jvp_number_is_nan(n)) { + jv_free(a); + jv_free(s); + return type_error(n, "can't be imploded, unicode codepoint needs to be numeric"); + } + + int nv = jv_number_value(n); + jv_free(n); + // outside codepoint range or in utf16 surrogate pair range + if (nv < 0 || nv > 0x10FFFF || (nv >= 0xD800 && nv <= 0xDFFF)) + nv = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER + s = jv_string_append_codepoint(s, nv); + } + + jv_free(a); + return s; } static jv f_setpath(jq_state *jq, jv a, jv b, jv c) { return jv_setpath(a, b, c); } diff --git a/src/jv.c b/src/jv.c index 159b3f272f..b4ee8a2e7c 100644 --- a/src/jv.c +++ b/src/jv.c @@ -1368,7 +1368,8 @@ jv jv_string_implode(jv j) { assert(JVP_HAS_KIND(n, JV_KIND_NUMBER)); int nv = jv_number_value(n); jv_free(n); - if (nv > 0x10FFFF) + // outside codepoint range or in utf16 surrogate pair range + if (nv < 0 || nv > 0x10FFFF || (nv >= 0xD800 && nv <= 0xDFFF)) nv = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER s = jv_string_append_codepoint(s, nv); } diff --git a/src/jv.h b/src/jv.h index 8c96f822f0..446ffb06e6 100644 --- a/src/jv.h +++ b/src/jv.h @@ -63,6 +63,7 @@ jv jv_number(double); jv jv_number_with_literal(const char*); double jv_number_value(jv); int jv_is_integer(jv); +int jvp_number_is_nan(jv); int jv_number_has_literal(jv n); const char* jv_number_get_literal(jv); diff --git a/src/jv_type_private.h b/src/jv_type_private.h index 5996282ba5..a25254dc10 100644 --- a/src/jv_type_private.h +++ b/src/jv_type_private.h @@ -2,6 +2,5 @@ #define JV_TYPE_PRIVATE int jvp_number_cmp(jv, jv); -int jvp_number_is_nan(jv); #endif //JV_TYPE_PRIVATE diff --git a/tests/jq.test b/tests/jq.test index 466d185099..95b5136620 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -1914,3 +1914,14 @@ any(keys[]|tostring?;true) {"a":"1","b":"2","c":"3"} true + +# explode/implode +# test replacement character (65533) for outside codepoint range and 0xd800 (55296) - 0xdfff (57343) utf16 surrogate pair range +# 1.1 and 1.9 to test round down of non-ints +implode|explode +[-1,0,1,2,3,1114111,1114112,55295,55296,57343,57344,1.1,1.9] +[65533,0,1,2,3,1114111,65533,55295,65533,65533,57344,1,1] + +map(try implode catch .) +[123,["a"],[nan]] +["implode input must be an array","string (\"a\") can't be imploded, unicode codepoint needs to be numeric","number (null) can't be imploded, unicode codepoint needs to be numeric"]