From 7a7111d5b79820bd08f73bee843cbc536118aa2a Mon Sep 17 00:00:00 2001 From: Brendan Jurd Date: Fri, 30 Jul 2021 14:19:10 +1000 Subject: [PATCH 1/8] Add support for Degree Sign in DMS input. This commit adds support in dsmtor() for a Degree Sign (U+00B0), encoded as UTF-8 (`\xc2\xb0`), as an alternative symbol to `D`/`d` to designate the degree unit. Fixes #2712. --- src/dmstor.cpp | 20 ++++++++++++++------ test/unit/gie_self_tests.cpp | 3 +++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/dmstor.cpp b/src/dmstor.cpp index e65d488468..56f04f3982 100644 --- a/src/dmstor.cpp +++ b/src/dmstor.cpp @@ -27,19 +27,19 @@ dmstor(const char *is, char **rs) { double dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { - int n, nl; + int n, nl, adv; char *s, work[MAX_WORK]; const char* p; double v, tv; if (rs) *rs = (char *)is; - /* copy sting into work space */ + /* copy string into work space */ while (isspace(*is)) ++is; n = MAX_WORK; s = work; p = (char *)is; - while (isgraph(*p) && --n) + while ((isgraph(*p) || *p == (char) 0xc2 || *p == (char) 0xb0) && --n) *s++ = *p++; *s = '\0'; /* it is possible that a really odd input (like lots of leading @@ -48,10 +48,11 @@ dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { if (sign == '+' || sign == '-') s++; else sign = '+'; v = 0.; - for (nl = 0 ; nl < 3 ; nl = n + 1 ) { + for (nl = 0 ; nl < 3 ; nl = n + 1) { if (!(isdigit(*s) || *s == '.')) break; if ((tv = proj_strtod(s, &s)) == HUGE_VAL) return tv; + adv = 1; switch (*s) { case 'D': case 'd': n = 0; break; @@ -59,6 +60,13 @@ dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { n = 1; break; case '"': n = 2; break; + /* degree symbol ("\xc2\xb0" in UTF-8) */ + case (char) 0xc2: + if (s[1] == (char) 0xb0) { + n = 0; + adv = 2; + break; + } case 'r': case 'R': if (nl) { proj_context_errno_set( ctx, PROJ_ERR_INVALID_OP_ILLEGAL_ARG_VALUE ); @@ -77,9 +85,9 @@ dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { return HUGE_VAL; } v += tv * vm[n]; - ++s; + s += adv; } - /* postfix sign */ + /* postfix sign */ if (*s && (p = strchr(sym, *s))) { sign = (p - sym) >= 4 ? '-' : '+'; ++s; diff --git a/test/unit/gie_self_tests.cpp b/test/unit/gie_self_tests.cpp index c7c1ddf5a5..afb3956504 100644 --- a/test/unit/gie_self_tests.cpp +++ b/test/unit/gie_self_tests.cpp @@ -442,6 +442,9 @@ TEST(gie, info_functions) { /* we can't expect perfect numerical accuracy so testing with a tolerance */ ASSERT_NEAR(-2.0, proj_dmstor(&buf[0], NULL), 1e-7); + /* test degree sign on DMS input */ + ASSERT_NEAR(0.34512432, proj_dmstor("19°46'27\"E", NULL), 1e-7); + /* test proj_derivatives_retrieve() and proj_factors_retrieve() */ P = proj_create(PJ_DEFAULT_CTX, "+proj=merc +ellps=WGS84"); a = proj_coord(0, 0, 0, 0); From 8e8571ad1c3d99530144b6bab260d5d29fc2b6c4 Mon Sep 17 00:00:00 2001 From: Brendan Jurd Date: Fri, 30 Jul 2021 14:28:03 +1000 Subject: [PATCH 2/8] Update cs2cs docs to show example of degrees sign usage. --- docs/source/apps/cs2cs.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/apps/cs2cs.rst b/docs/source/apps/cs2cs.rst index 909912df9b..abddae9e25 100644 --- a/docs/source/apps/cs2cs.rst +++ b/docs/source/apps/cs2cs.rst @@ -261,7 +261,7 @@ The following script :: cs2cs +proj=latlong +datum=NAD83 +to +proj=utm +zone=10 +datum=NAD27 -r < Date: Wed, 4 Aug 2021 23:38:54 +1000 Subject: [PATCH 3/8] Add comment explaining byte codes in dmstor_ctx. Per review feedback from @kbevers on pull #2791. --- src/dmstor.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/dmstor.cpp b/src/dmstor.cpp index 56f04f3982..a9263fa109 100644 --- a/src/dmstor.cpp +++ b/src/dmstor.cpp @@ -29,7 +29,7 @@ dmstor(const char *is, char **rs) { dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { int n, nl, adv; char *s, work[MAX_WORK]; - const char* p; + const char* p; double v, tv; if (rs) @@ -39,11 +39,18 @@ dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { n = MAX_WORK; s = work; p = (char *)is; + + /* + * Copy characters into work until we hit a non-printable character or run + * out of space in the buffer. Make a special exception for the bytes 0xc2 + * and 0xb0, because they comprise Degree Sign U+00B0 in UTF-8. + * + * It is possible that a really odd input (like lots of leading zeros) + * could be truncated in copying into work. But ... + */ while ((isgraph(*p) || *p == (char) 0xc2 || *p == (char) 0xb0) && --n) *s++ = *p++; *s = '\0'; - /* it is possible that a really odd input (like lots of leading - zeros) could be truncated in copying into work. But ... */ int sign = *(s = work); if (sign == '+' || sign == '-') s++; else sign = '+'; From adba02aa1f1d39a96a2118c820b3829efe50006a Mon Sep 17 00:00:00 2001 From: Brendan Jurd Date: Fri, 6 Aug 2021 22:48:03 +1000 Subject: [PATCH 4/8] Replace 'switch' with 'if' in dmstor_ctx(). In response to PR feedback from @rouault on pull #2791, there was an unintentional fallthrough in the switch statement case for the degree sign. Replacing the 'switch' altogether removes the fallthrough risk, allows us to check for both bytes in the degree sign in the same test, and avoids the need to duplicate the code from the 'default' case in multiple branches. --- src/dmstor.cpp | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/dmstor.cpp b/src/dmstor.cpp index a9263fa109..92efc00c37 100644 --- a/src/dmstor.cpp +++ b/src/dmstor.cpp @@ -60,33 +60,32 @@ dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { if ((tv = proj_strtod(s, &s)) == HUGE_VAL) return tv; adv = 1; - switch (*s) { - case 'D': case 'd': - n = 0; break; - case '\'': - n = 1; break; - case '"': - n = 2; break; - /* degree symbol ("\xc2\xb0" in UTF-8) */ - case (char) 0xc2: - if (s[1] == (char) 0xb0) { - n = 0; - adv = 2; - break; - } - case 'r': case 'R': + + if (*s == 'D' || *s == 'd') { + n = 0; + } else if (*s == '\'') { + n = 1; + } else if (*s == '"') { + n = 2; + } else if (s[0] == (char) 0xc2 && s[1] == (char) 0xb0) { + /* degree symbol ("\xc2\xb0" in UTF-8) */ + n = 0; + adv = 2; + } else if (*s == 'r' || *s == 'R') { if (nl) { proj_context_errno_set( ctx, PROJ_ERR_INVALID_OP_ILLEGAL_ARG_VALUE ); return HUGE_VAL; } ++s; v = tv; - goto skip; - default: + n = 4; + continue; + } else { v += tv * vm[nl]; - skip: n = 4; + n = 4; continue; } + if (n < nl) { proj_context_errno_set( ctx, PROJ_ERR_INVALID_OP_ILLEGAL_ARG_VALUE ); return HUGE_VAL; From 8236ff62bb21d7171da9c531b4f4f30ab94f44b1 Mon Sep 17 00:00:00 2001 From: Brendan Jurd Date: Mon, 9 Aug 2021 17:34:33 +1000 Subject: [PATCH 5/8] Make Degree Sign bytes into constexprs. Also remove the cast from the raw unsigned numeric byte value to 'char', instead just initialise the constexprs for the bytes as 'char'. --- src/dmstor.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/dmstor.cpp b/src/dmstor.cpp index 92efc00c37..32d0dbd110 100644 --- a/src/dmstor.cpp +++ b/src/dmstor.cpp @@ -20,6 +20,12 @@ vm[] = { .0002908882086657216, .0000048481368110953599 }; +/* byte sequence for Degree Sign U+00B0 in UTF-8. */ + static constexpr char +DEG_SIGN1 = '\xc2'; + static constexpr char +DEG_SIGN2 = '\xb0'; + double dmstor(const char *is, char **rs) { return dmstor_ctx( pj_get_default_ctx(), is, rs ); @@ -42,13 +48,13 @@ dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { /* * Copy characters into work until we hit a non-printable character or run - * out of space in the buffer. Make a special exception for the bytes 0xc2 - * and 0xb0, because they comprise Degree Sign U+00B0 in UTF-8. + * out of space in the buffer. Make a special exception for the bytes of + * the Degree Sign in UTF-8. * * It is possible that a really odd input (like lots of leading zeros) * could be truncated in copying into work. But ... */ - while ((isgraph(*p) || *p == (char) 0xc2 || *p == (char) 0xb0) && --n) + while ((isgraph(*p) || *p == DEG_SIGN1 || *p == DEG_SIGN2) && --n) *s++ = *p++; *s = '\0'; int sign = *(s = work); @@ -67,8 +73,8 @@ dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { n = 1; } else if (*s == '"') { n = 2; - } else if (s[0] == (char) 0xc2 && s[1] == (char) 0xb0) { - /* degree symbol ("\xc2\xb0" in UTF-8) */ + } else if (s[0] == DEG_SIGN1 && s[1] == DEG_SIGN2) { + /* degree symbol in UTF-8 */ n = 0; adv = 2; } else if (*s == 'r' || *s == 'R') { From 34944e642e74fdf9f43a910a4027b05886cf9b50 Mon Sep 17 00:00:00 2001 From: Brendan Jurd Date: Sat, 14 Aug 2021 10:39:53 +1000 Subject: [PATCH 6/8] Move declaration of `adv` inside loop in dmstor_ctx. Per PR feedback on pull #2791 --- src/dmstor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dmstor.cpp b/src/dmstor.cpp index 32d0dbd110..15640d7de4 100644 --- a/src/dmstor.cpp +++ b/src/dmstor.cpp @@ -33,7 +33,7 @@ dmstor(const char *is, char **rs) { double dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { - int n, nl, adv; + int n, nl; char *s, work[MAX_WORK]; const char* p; double v, tv; @@ -65,7 +65,7 @@ dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { if (!(isdigit(*s) || *s == '.')) break; if ((tv = proj_strtod(s, &s)) == HUGE_VAL) return tv; - adv = 1; + int adv = 1; if (*s == 'D' || *s == 'd') { n = 0; From d6da53a92dd86776d99cd2390f05e9ae9ad366ce Mon Sep 17 00:00:00 2001 From: Brendan Jurd Date: Sat, 14 Aug 2021 11:26:39 +1000 Subject: [PATCH 7/8] Accept \xb0 as a single-byte degree symbol. This byte is the degree symbol in many widely used single-byte encodings, including ISO 8859 parts 1-4, 7-10, 13, 15, 16, and several Windows code pages. With this change, there are now two accepted encodings for the degree symbol in dmstor: - `\xc2\xb0` (UTF-8) - `\xb0` (various single-byte encodings) Per PR feedback on pull #2791. --- src/dmstor.cpp | 7 ++++++- test/unit/gie_self_tests.cpp | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/dmstor.cpp b/src/dmstor.cpp index 15640d7de4..eeff1841b7 100644 --- a/src/dmstor.cpp +++ b/src/dmstor.cpp @@ -67,7 +67,12 @@ dmstor_ctx(PJ_CONTEXT *ctx, const char *is, char **rs) { return tv; int adv = 1; - if (*s == 'D' || *s == 'd') { + if (*s == 'D' || *s == 'd' || *s == DEG_SIGN2) { + /* + * Accept \xb0 as a single-byte degree symbol. This byte is the + * degree symbol in various single-byte encodings: multiple ISO + * 8859 parts, several Windows code pages and others. + */ n = 0; } else if (*s == '\'') { n = 1; diff --git a/test/unit/gie_self_tests.cpp b/test/unit/gie_self_tests.cpp index afb3956504..125fc96389 100644 --- a/test/unit/gie_self_tests.cpp +++ b/test/unit/gie_self_tests.cpp @@ -442,9 +442,12 @@ TEST(gie, info_functions) { /* we can't expect perfect numerical accuracy so testing with a tolerance */ ASSERT_NEAR(-2.0, proj_dmstor(&buf[0], NULL), 1e-7); - /* test degree sign on DMS input */ + /* test UTF-8 degree sign on DMS input */ ASSERT_NEAR(0.34512432, proj_dmstor("19°46'27\"E", NULL), 1e-7); + /* test ISO 8859-1, cp1252, et al. degree sign on DMS input */ + ASSERT_NEAR(0.34512432, proj_dmstor("19\26046'27\"E", NULL), 1e-7); + /* test proj_derivatives_retrieve() and proj_factors_retrieve() */ P = proj_create(PJ_DEFAULT_CTX, "+proj=merc +ellps=WGS84"); a = proj_coord(0, 0, 0, 0); From d33dd14650ee6f521404d702ec09a2b7f1c3aa68 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sun, 15 Aug 2021 12:34:28 +0200 Subject: [PATCH 8/8] Update test/unit/gie_self_tests.cpp --- test/unit/gie_self_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/gie_self_tests.cpp b/test/unit/gie_self_tests.cpp index 125fc96389..f0d4ea8600 100644 --- a/test/unit/gie_self_tests.cpp +++ b/test/unit/gie_self_tests.cpp @@ -446,7 +446,7 @@ TEST(gie, info_functions) { ASSERT_NEAR(0.34512432, proj_dmstor("19°46'27\"E", NULL), 1e-7); /* test ISO 8859-1, cp1252, et al. degree sign on DMS input */ - ASSERT_NEAR(0.34512432, proj_dmstor("19\26046'27\"E", NULL), 1e-7); + ASSERT_NEAR(0.34512432, proj_dmstor("19" "\260" "46'27\"E", NULL), 1e-7); /* test proj_derivatives_retrieve() and proj_factors_retrieve() */ P = proj_create(PJ_DEFAULT_CTX, "+proj=merc +ellps=WGS84");