diff --git a/src/array.c b/src/array.c index d8ecf73c8dadf..47eb5e782a3be 100644 --- a/src/array.c +++ b/src/array.c @@ -514,32 +514,39 @@ JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a) return jl_pchar_to_string((const char*)jl_array_data(a), len); } -JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len) +JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) { + if (len == 0) + return jl_an_empty_string; size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size if (sz < len) // overflow jl_throw(jl_memory_exception); - if (len == 0) - return jl_an_empty_string; jl_task_t *ct = jl_current_task; - jl_value_t *s = jl_gc_alloc_(ct->ptls, sz, jl_string_type); // force inlining + jl_value_t *s; + jl_ptls_t ptls = ct->ptls; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { + int pool_id = jl_gc_szclass_align8(allocsz); + jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; + int osize = jl_gc_sizeclasses[pool_id]; + s = jl_gc_pool_alloc(ptls, (char*)p - (char*)ptls, osize); + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + s = jl_gc_big_alloc(ptls, allocsz); + } + jl_set_typeof(s, jl_string_type); *(size_t*)s = len; - memcpy((char*)s + sizeof(size_t), str, len); - ((char*)s + sizeof(size_t))[len] = 0; + jl_string_data(s)[len] = 0; return s; } -JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) +JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len) { - size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size - if (sz < len) // overflow - jl_throw(jl_memory_exception); - if (len == 0) - return jl_an_empty_string; - jl_task_t *ct = jl_current_task; - jl_value_t *s = jl_gc_alloc_(ct->ptls, sz, jl_string_type); // force inlining - *(size_t*)s = len; - ((char*)s + sizeof(size_t))[len] = 0; + jl_value_t *s = jl_alloc_string(len); + if (len > 0) + memcpy(jl_string_data(s), str, len); return s; } diff --git a/src/gc.c b/src/gc.c index 5429510f08651..3734ec40d0795 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3286,9 +3286,6 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_thread_heap_t *heap = &ptls->heap; jl_gc_pool_t *p = heap->norm_pools; for (int i = 0; i < JL_GC_N_POOLS; i++) { - assert((jl_gc_sizeclasses[i] < 16 && - jl_gc_sizeclasses[i] % sizeof(void*) == 0) || - (jl_gc_sizeclasses[i] % 16 == 0)); p[i].osize = jl_gc_sizeclasses[i]; p[i].freelist = NULL; p[i].newpages = NULL; diff --git a/src/julia_internal.h b/src/julia_internal.h index 2f14e3ad6832b..c9ff2716530eb 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -247,12 +247,15 @@ static const int jl_gc_sizeclasses[] = { 4, 8, 12, #endif - // 16 pools at 16-byte spacing - 16, 32, 48, 64, 80, 96, 112, 128, + // 16 pools at 8-byte spacing + // the 8-byte aligned pools are only used for Strings + 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, + // 8 pools at 16-byte spacing 144, 160, 176, 192, 208, 224, 240, 256, // the following tables are computed for maximum packing efficiency via the formula: - // sz=(div(2^14-8,rng)÷16)*16; hcat(sz, (2^14-8)÷sz, 2^14-(2^14-8)÷sz.*sz)' + // pg = 2^14 + // sz = (div.(pg-8, rng).÷16)*16; hcat(sz, (pg-8).÷sz, pg .- (pg-8).÷sz.*sz)' // rng = 60:-4:32 (8 pools) 272, 288, 304, 336, 368, 400, 448, 496, @@ -293,15 +296,14 @@ STATIC_INLINE int jl_gc_alignment(size_t sz) } JL_DLLEXPORT int jl_alignment(size_t sz); -// the following table is computed from jl_gc_sizeclasses via the formula: -// [searchsortedfirst(TABLE, i) for i = 0:16:table[end]] -static const uint8_t szclass_table[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40}; +// the following table is computed as: +// [searchsortedfirst(jl_gc_sizeclasses, i) - 1 for i = 0:16:jl_gc_sizeclasses[end]] +static const uint8_t szclass_table[] = {0, 1, 3, 5, 7, 9, 11, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 29, 29, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48}; static_assert(sizeof(szclass_table) == 128, ""); STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz) { assert(sz <= 2032); - uint8_t klass = szclass_table[(sz + 15) / 16]; #ifdef _P64 if (sz <= 8) return 0; @@ -315,9 +317,25 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz) return (sz >= 8 ? 2 : (sz >= 4 ? 1 : 0)); const int N = 2; #endif + uint8_t klass = szclass_table[(sz + 15) / 16]; return klass + N; } +STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) +{ + if (sz >= 16 && sz <= 152) { +#ifdef _P64 + const int N = 0; +#elif MAX_ALIGN == 8 + const int N = 1; +#else + const int N = 2; +#endif + return (sz + 7)/8 - 1 + N; + } + return jl_gc_szclass(sz); +} + #define JL_SMALL_BYTE_ALIGNMENT 16 #define JL_CACHE_BYTE_ALIGNMENT 64 // JL_HEAP_ALIGNMENT is the maximum alignment that the GC can provide diff --git a/src/julia_threads.h b/src/julia_threads.h index d0d70f88c79db..f10c9f538915d 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -141,11 +141,11 @@ typedef struct { // variables for allocating objects from pools #ifdef _P64 -# define JL_GC_N_POOLS 41 +# define JL_GC_N_POOLS 49 #elif MAX_ALIGN == 8 -# define JL_GC_N_POOLS 42 +# define JL_GC_N_POOLS 50 #else -# define JL_GC_N_POOLS 43 +# define JL_GC_N_POOLS 51 #endif jl_gc_pool_t norm_pools[JL_GC_N_POOLS];