From bfbfd825f5dfe012df83116dd6f4ba0f521515be Mon Sep 17 00:00:00 2001 From: Iagoba Apellaniz Date: Fri, 23 Apr 2021 14:17:38 +0200 Subject: [PATCH] make \cdot, \interpunct, and \cdotp equivalent (#25157) Co-authored-by: Steven G. Johnson Co-authored-by: Steven G. Johnson --- NEWS.md | 2 ++ doc/src/manual/variables.md | 10 +++++++--- src/flisp/julia_charmap.h | 2 ++ src/flisp/julia_extensions.c | 10 ++++++++++ src/julia-parser.scm | 6 +++--- test/syntax.jl | 5 ++++- 6 files changed, 28 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 24be1feb2108d..8321cb599411a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,7 +14,9 @@ New language features Language changes ---------------- + * `macroexpand`, `@macroexpand`, and `@macroexpand1` no longer wrap errors in a `LoadError`. To reduce breakage, `@test_throws` has been modified so that many affected tests will still pass ([#38379]]. +* The middle dot `·` (`\cdotp` U+00b7) and the Greek interpunct `·` (U+0387) are now treated as equivalent to the dot operator `⋅` (`\cdot` U+22c5) (#25157). Compiler/Runtime improvements ----------------------------- diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md index e8e60a15d54c1..4461ee5c32afe 100644 --- a/doc/src/manual/variables.md +++ b/doc/src/manual/variables.md @@ -136,9 +136,13 @@ ERROR: syntax: unexpected "=" Some Unicode characters are considered to be equivalent in identifiers. Different ways of entering Unicode combining characters (e.g., accents) are treated as equivalent (specifically, Julia identifiers are [NFC](http://www.macchiato.com/unicode/nfc-faq)-normalized). -The Unicode characters `ɛ` (U+025B: Latin small letter open e) -and `µ` (U+00B5: micro sign) are treated as equivalent to the corresponding -Greek letters, because the former are easily accessible via some input methods. +Julia also includes a few non-standard equivalences for characters that are +visually similar and are easily entered by some input methods. The Unicode +characters `ɛ` (U+025B: Latin small letter open e) and `µ` (U+00B5: micro sign) +are treated as equivalent to the corresponding Greek letters. The middle dot +`·` (U+00B7) and the Greek +[interpunct](https://en.wikipedia.org/wiki/Interpunct) `·` (U+0387) are both +treated as the mathematical dot operator `⋅` (U+22C5). ## Stylistic Conventions diff --git a/src/flisp/julia_charmap.h b/src/flisp/julia_charmap.h index bed88a9ace4cd..7384df0c7f506 100644 --- a/src/flisp/julia_charmap.h +++ b/src/flisp/julia_charmap.h @@ -4,4 +4,6 @@ static const uint32_t charmap[][2] = { { 0x025B, 0x03B5 }, // latin small letter open e -> greek small letter epsilon { 0x00B5, 0x03BC }, // micro sign -> greek small letter mu + { 0x00B7, 0x22C5 }, // middot char -> dot operator (#25098) + { 0x0387, 0x22C5 }, // Greek interpunct -> dot operator (#25098) }; diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c index e6ffcfcde131c..dbe94e1388069 100644 --- a/src/flisp/julia_extensions.c +++ b/src/flisp/julia_extensions.c @@ -351,6 +351,15 @@ value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t narg return symbol(fl_ctx, allascii ? str.buf : normalize(fl_ctx, str.buf)); } +/* convert a string to a symbol, first applying normalization */ +value_t fl_string2normsymbol(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) +{ + argcount(fl_ctx, "string->normsymbol", nargs, 1); + if (!fl_isstring(fl_ctx, args[0])) + type_error(fl_ctx, "string->normsymbol", "string", args[0]); + return symbol(fl_ctx, normalize(fl_ctx, (char*)cvalue_data(args[0]))); +} + static const builtinspec_t julia_flisp_func_info[] = { { "skip-ws", fl_skipws }, { "accum-julia-symbol", fl_accum_julia_symbol }, @@ -360,6 +369,7 @@ static const builtinspec_t julia_flisp_func_info[] = { { "op-suffix-char?", fl_julia_op_suffix_char }, { "strip-op-suffix", fl_julia_strip_op_suffix }, { "underscore-symbol?", fl_julia_underscore_symbolp }, + { "string->normsymbol", fl_string2normsymbol }, { NULL, NULL } }; diff --git a/src/julia-parser.scm b/src/julia-parser.scm index 60cd047d12c76..c6510dcbd9536 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -21,7 +21,7 @@ (define prec-colon (append! '(: |..|) (add-dots '(… ⁝ ⋮ ⋱ ⋰ ⋯)))) (define prec-plus (append! '($) (add-dots '(+ - ¦ |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣)))) -(define prec-times (add-dots '(* / ⌿ ÷ % & ⋅ ∘ × |\\| ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗ ⨟))) +(define prec-times (add-dots '(* / ⌿ ÷ % & · · ⋅ ∘ × |\\| ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗ ⨟))) (define prec-rational (add-dots '(//))) (define prec-bitshift (add-dots '(<< >> >>>))) ;; `where` @@ -234,7 +234,7 @@ (if (and (eqv? c0 #\*) (eqv? (peek-char port) #\*)) (error "use \"x^y\" instead of \"x**y\" for exponentiation, and \"x...\" instead of \"**x\" for splatting.")) (if (or (eof-object? (peek-char port)) (not (op-or-sufchar? (peek-char port)))) - (symbol (string c0)) ; 1-char operator + (string->normsymbol (string c0)) ; 1-char operator (let ((str (let loop ((str (string c0)) (c (peek-char port)) (in-suffix? #f)) @@ -267,7 +267,7 @@ (loop newop (peek-char port) sufchar?)) str)) str)))))) - (string->symbol str)))) + (string->normsymbol str)))) (define (accum-digits c pred port _-digit-sep) (let loop ((str '()) diff --git a/test/syntax.jl b/test/syntax.jl index ba71d750254e7..fc79d355e9c11 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -819,7 +819,7 @@ let f = function (x; kw...) end # normalization of Unicode symbols (#19464) -let ε=1, μ=2, x=3, î=4 +let ε=1, μ=2, x=3, î=4, ⋅=5 # issue #5434 (mu vs micro): @test Meta.parse("\u00b5") === Meta.parse("\u03bc") @test µ == μ == 2 @@ -829,6 +829,9 @@ let ε=1, μ=2, x=3, î=4 # latin vs greek ε (#14751) @test Meta.parse("\u025B") === Meta.parse("\u03B5") @test ɛ == ε == 1 + # middot char · or · vs math dot operator ⋅ (#25098) + @test Meta.parse("\u00b7") === Meta.parse("\u0387") === Meta.parse("\u22c5") + @test (·) == (·) == (⋅) == 5 end # issue #8925