Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: allow operator suffixes — combining characters and primes #22089

Merged
merged 10 commits into from
Sep 20, 2017
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ New language features
* Local variables can be tested for being defined
using the new `@isdefined variable` macro ([#TBD]).

* Custom infix operators can now be defined by appending Unicode
combining marks, primes, and sub/superscripts to other operators.
For example, `+̂ₐ″` is parsed as an infix operator with the same
precedence as `+` ([#22089]).

Language changes
----------------

Expand Down
3 changes: 2 additions & 1 deletion doc/src/manual/variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ Operators like `+` are also valid identifiers, but are parsed specially. In some
can be used just like variables; for example `(+)` refers to the addition function, and `(+) = f`
will reassign it. Most of the Unicode infix operators (in category Sm), such as `⊕`, are parsed
as infix operators and are available for user-defined methods (e.g. you can use `const ⊗ = kron`
to define `⊗` as an infix Kronecker product).
to define `⊗` as an infix Kronecker product). Operators can also be suffixed with modifying marks,
primes, and sub/superscripts, e.g. `+̂ₐ″` is parsed as an infix operator with the same precedence as `+`.

The only explicitly disallowed names for variables are the names of built-in statements:

Expand Down
2 changes: 0 additions & 2 deletions src/flisp/flisp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2304,7 +2304,6 @@ static const builtinspec_t core_builtin_info[] = {

extern void builtins_init(fl_context_t *fl_ctx);
extern void comparehash_init(fl_context_t *fl_ctx);
extern void jl_charmap_init(fl_context_t *fl_ctx);

static void lisp_init(fl_context_t *fl_ctx, size_t initial_heapsize)
{
Expand Down Expand Up @@ -2337,7 +2336,6 @@ static void lisp_init(fl_context_t *fl_ctx, size_t initial_heapsize)
fl_ctx->consflags = bitvector_new(fl_ctx->heapsize/sizeof(cons_t), 1);
fl_print_init(fl_ctx);
comparehash_init(fl_ctx);
jl_charmap_init(fl_ctx);
fl_ctx->N_STACK = 262144;
fl_ctx->Stack = (value_t*)malloc(fl_ctx->N_STACK*sizeof(value_t));
CHECK_ALIGN8(fl_ctx->Stack);
Expand Down
2 changes: 1 addition & 1 deletion src/flisp/flisp.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ int fl_load_system_image_str(fl_context_t *fl_ctx, char* str, size_t len);
/* julia extensions */
JL_DLLEXPORT int jl_id_char(uint32_t wc);
JL_DLLEXPORT int jl_id_start_char(uint32_t wc);
JL_DLLEXPORT int jl_op_suffix_char(uint32_t wc);

struct _fl_context_t {
symbol_t *symtab;
Expand Down Expand Up @@ -406,7 +407,6 @@ struct _fl_context_t {
fltype_t *builtintype;

htable_t equal_eq_hashtable;
htable_t jl_charmap;

value_t tablesym;
fltype_t *tabletype;
Expand Down
101 changes: 77 additions & 24 deletions src/flisp/julia_extensions.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
extern "C" {
#endif

#define _equal_wchar_(x, y, ctx) ((x) == (y))
#define _hash_wchar_(x, ctx) inthash((uint32_t) ((uintptr_t) (x)))
#include "htable.inc"
HTIMPL_R(wcharhash, _hash_wchar_, _equal_wchar_)

static int is_uws(uint32_t wc)
{
return (wc==9 || wc==10 || wc==11 || wc==12 || wc==13 || wc==32 ||
Expand Down Expand Up @@ -134,6 +139,28 @@ JL_DLLEXPORT int jl_id_char(uint32_t wc)
return 0;
}

#include "julia_opsuffs.h"

// chars that can follow an operator (e.g. +) and be parsed as part of the operator
int jl_op_suffix_char(uint32_t wc)
{
static htable_t jl_opsuffs;
if (!jl_opsuffs.size) { // initialize hash table of suffixes
size_t i, opsuffs_len = sizeof(opsuffs) / (sizeof(uint32_t));
htable_t *h = htable_new(&jl_opsuffs, opsuffs_len);
assert(sizeof(uint32_t) <= sizeof(void*));
for (i = 0; i < opsuffs_len; ++i)
wcharhash_put_r(h, (void*)((uintptr_t)opsuffs[i]), NULL, NULL);
}
if (wc < 0xA1 || wc > 0x10ffff) return 0;
utf8proc_category_t cat = utf8proc_category((utf8proc_int32_t) wc);
if (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC ||
cat == UTF8PROC_CATEGORY_ME)
return 1;
// use hash table of other allowed characters: primes and sub/superscripts
return HT_NOTFOUND != wcharhash_get_r(&jl_opsuffs, (void*)((uintptr_t)wc), NULL);
}

value_t fl_julia_identifier_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
{
argcount(fl_ctx, "identifier-char?", nargs, 1);
Expand All @@ -152,33 +179,57 @@ value_t fl_julia_identifier_start_char(fl_context_t *fl_ctx, value_t *args, uint
return jl_id_start_char(wc) ? fl_ctx->T : fl_ctx->F;
}

#include "julia_charmap.h"
#define _equal_wchar_(x, y, ctx) ((x) == (y))
#define _hash_wchar_(x, ctx) inthash((uint32_t) ((uintptr_t) (x)))
#include "htable.inc"
HTIMPL_R(wcharhash, _hash_wchar_, _equal_wchar_)
value_t fl_julia_op_suffix_char(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
{
argcount(fl_ctx, "op-suffix-char?", nargs, 1);
if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype)
type_error(fl_ctx, "op-suffix-char?", "wchar", args[0]);
uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
return jl_op_suffix_char(wc) ? fl_ctx->T : fl_ctx->F;
}

void jl_charmap_init(fl_context_t *fl_ctx)
value_t fl_julia_strip_op_suffix(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
{
size_t charmap_len = sizeof(charmap) / (2*sizeof(uint32_t));
size_t i;
htable_t *h = htable_new(&fl_ctx->jl_charmap, charmap_len);
assert(sizeof(uint32_t) <= sizeof(void*));
for (i = 0; i < charmap_len; ++i) {
/* Store charmap in a hash table. Typecasting codepoints
directly to pointer keys works because pointers are at
least 32 bits on all Julia-supported systems, and because
we never map anything to U+0001 (since HT_NOTFOUND is (void*)1). */
assert((void*)(uintptr_t)charmap[i][1] != HT_NOTFOUND);
wcharhash_put_r(h, (void*)((uintptr_t)charmap[i][0]),
(void*)((uintptr_t)charmap[i][1]), (void*)fl_ctx);
argcount(fl_ctx, "strip-op-suffix", nargs, 1);
if (!issymbol(args[0]))
type_error(fl_ctx, "strip-op-suffix", "symbol", args[0]);
char *op = symbol_name(fl_ctx, args[0]);
size_t i = 0;
while (op[i]) {
size_t j = i;
if (jl_op_suffix_char(u8_nextchar(op, &j)))
break;
i = j;
}
if (!op[i]) return args[0]; // no suffix to strip
if (!i) lerror(fl_ctx, symbol(fl_ctx, "error"), "invalid operator");
char *opnew = strncpy(malloc(i+1), op, i);
opnew[i] = 0;
value_t opnew_symbol = symbol(fl_ctx, opnew);
free(opnew);
return opnew_symbol;
}
utf8proc_int32_t jl_charmap_map(utf8proc_int32_t c, void *fl_ctx_)

#include "julia_charmap.h"

utf8proc_int32_t jl_charmap_map(utf8proc_int32_t c, void *ctx)
{
fl_context_t *fl_ctx = (fl_context_t *) fl_ctx_;
htable_t *h = &fl_ctx->jl_charmap;
void *v = wcharhash_get_r(h, (void*)((uintptr_t)c), (void*) fl_ctx);
static htable_t jl_charmap;
if (!jl_charmap.size) { // initialize hash table
size_t i, charmap_len = sizeof(charmap) / (2*sizeof(uint32_t));
htable_t *h = htable_new(&jl_charmap, charmap_len);
assert(sizeof(uint32_t) <= sizeof(void*));
for (i = 0; i < charmap_len; ++i) {
/* Store charmap in a hash table. Typecasting codepoints
directly to pointer keys works because pointers are at
least 32 bits on all Julia-supported systems, and because
we never map anything to U+0001 (since HT_NOTFOUND is (void*)1). */
assert((void*)(uintptr_t)charmap[i][1] != HT_NOTFOUND);
wcharhash_put_r(h, (void*)((uintptr_t)charmap[i][0]),
(void*)((uintptr_t)charmap[i][1]), NULL);
}
}
void *v = wcharhash_get_r(&jl_charmap, (void*)((uintptr_t)c), NULL);
return v == HT_NOTFOUND ? c : (utf8proc_int32_t) ((uintptr_t) v);
}

Expand All @@ -191,7 +242,7 @@ static char *normalize(fl_context_t *fl_ctx, char *s)
ssize_t result;
size_t newlen;
result = utf8proc_decompose_custom((uint8_t*) s, 0, NULL, 0, (utf8proc_option_t)options,
jl_charmap_map, (void*) fl_ctx);
jl_charmap_map, NULL);
if (result < 0) goto error;
newlen = result * sizeof(int32_t) + 1;
if (newlen > fl_ctx->jlbuflen) {
Expand All @@ -200,7 +251,7 @@ static char *normalize(fl_context_t *fl_ctx, char *s)
if (!fl_ctx->jlbuf) lerror(fl_ctx, fl_ctx->OutOfMemoryError, "error allocating UTF8 buffer");
}
result = utf8proc_decompose_custom((uint8_t*)s,0, (int32_t*)fl_ctx->jlbuf,result, (utf8proc_option_t)options,
jl_charmap_map, (void*) fl_ctx);
jl_charmap_map, NULL);
if (result < 0) goto error;
result = utf8proc_reencode((int32_t*)fl_ctx->jlbuf,result, (utf8proc_option_t)options);
if (result < 0) goto error;
Expand Down Expand Up @@ -245,6 +296,8 @@ static const builtinspec_t julia_flisp_func_info[] = {
{ "accum-julia-symbol", fl_accum_julia_symbol },
{ "identifier-char?", fl_julia_identifier_char },
{ "identifier-start-char?", fl_julia_identifier_start_char },
{ "op-suffix-char?", fl_julia_op_suffix_char },
{ "strip-op-suffix", fl_julia_strip_op_suffix },
{ NULL, NULL }
};

Expand Down
127 changes: 127 additions & 0 deletions src/flisp/julia_opsuffs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/* Array of codepoints allowed as operator suffixes in Julia:
primes and Latin/Greek/math super/subscripts.

produced by:

for c in sort(unique(collect("₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ ⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ" * "′″‴‵‶‷⁗")))
println(" ", repr(UInt32(c)), ", // ", c)
end
*/

static const uint32_t opsuffs[] = {
0x000000b2, // ²
0x000000b3, // ³
0x000000b9, // ¹
0x000002b0, // ʰ
0x000002b2, // ʲ
0x000002b3, // ʳ
0x000002b7, // ʷ
0x000002b8, // ʸ
0x000002e1, // ˡ
0x000002e2, // ˢ
0x000002e3, // ˣ
0x00001d2c, // ᴬ
0x00001d2e, // ᴮ
0x00001d30, // ᴰ
0x00001d31, // ᴱ
0x00001d33, // ᴳ
0x00001d34, // ᴴ
0x00001d35, // ᴵ
0x00001d36, // ᴶ
0x00001d37, // ᴷ
0x00001d38, // ᴸ
0x00001d39, // ᴹ
0x00001d3a, // ᴺ
0x00001d3c, // ᴼ
0x00001d3e, // ᴾ
0x00001d3f, // ᴿ
0x00001d40, // ᵀ
0x00001d41, // ᵁ
0x00001d42, // ᵂ
0x00001d43, // ᵃ
0x00001d47, // ᵇ
0x00001d48, // ᵈ
0x00001d49, // ᵉ
0x00001d4d, // ᵍ
0x00001d4f, // ᵏ
0x00001d50, // ᵐ
0x00001d52, // ᵒ
0x00001d56, // ᵖ
0x00001d57, // ᵗ
0x00001d58, // ᵘ
0x00001d5b, // ᵛ
0x00001d5d, // ᵝ
0x00001d5e, // ᵞ
0x00001d5f, // ᵟ
0x00001d60, // ᵠ
0x00001d61, // ᵡ
0x00001d62, // ᵢ
0x00001d63, // ᵣ
0x00001d64, // ᵤ
0x00001d65, // ᵥ
0x00001d66, // ᵦ
0x00001d67, // ᵧ
0x00001d68, // ᵨ
0x00001d69, // ᵩ
0x00001d6a, // ᵪ
0x00001d9c, // ᶜ
0x00001da0, // ᶠ
0x00001da5, // ᶥ
0x00001da6, // ᶦ
0x00001dab, // ᶫ
0x00001db0, // ᶰ
0x00001db8, // ᶸ
0x00001dbb, // ᶻ
0x00001dbf, // ᶿ
0x00002009, //  
0x00002032, // ′
0x00002033, // ″
0x00002034, // ‴
0x00002035, // ‵
0x00002036, // ‶
0x00002037, // ‷
0x00002057, // ⁗
0x00002070, // ⁰
0x00002071, // ⁱ
0x00002074, // ⁴
0x00002075, // ⁵
0x00002076, // ⁶
0x00002077, // ⁷
0x00002078, // ⁸
0x00002079, // ⁹
0x0000207a, // ⁺
0x0000207b, // ⁻
0x0000207c, // ⁼
0x0000207d, // ⁽
0x0000207e, // ⁾
0x0000207f, // ⁿ
0x00002080, // ₀
0x00002081, // ₁
0x00002082, // ₂
0x00002083, // ₃
0x00002084, // ₄
0x00002085, // ₅
0x00002086, // ₆
0x00002087, // ₇
0x00002088, // ₈
0x00002089, // ₉
0x0000208a, // ₊
0x0000208b, // ₋
0x0000208c, // ₌
0x0000208d, // ₍
0x0000208e, // ₎
0x00002090, // ₐ
0x00002091, // ₑ
0x00002092, // ₒ
0x00002093, // ₓ
0x00002095, // ₕ
0x00002096, // ₖ
0x00002097, // ₗ
0x00002098, // ₘ
0x00002099, // ₙ
0x0000209a, // ₚ
0x0000209b, // ₛ
0x0000209c, // ₜ
0x00002c7c, // ⱼ
0x00002c7d // ⱽ
};
Loading