Skip to content

Commit

Permalink
upd
Browse files Browse the repository at this point in the history
  • Loading branch information
wjr-z committed Feb 2, 2024
1 parent 8fa69d0 commit 5cc6e00
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 82 deletions.
72 changes: 43 additions & 29 deletions include/wjr/math/div.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,20 @@ fallback_div_qr_1_without_shift(T *dst, T &rem, const T *src, size_t n,
dst[n - 1] = 0;
}

if (WJR_UNLIKELY(n == 1)) {
rem = hi;
return;
}

n -= 1;

do {
lo = src[n - 1];
dst[n - 1] = div.divide(divisor, value, lo, hi);
if (WJR_UNLIKELY(n == 1)) {
break;
}

--n;
} while (WJR_LIKELY(n != 0));

do {
lo = src[n - 1];
dst[n - 1] = div.divide(divisor, value, lo, hi);
--n;
} while (WJR_LIKELY(n != 0));

} while (0);

rem = hi;
return;
Expand All @@ -67,14 +69,19 @@ fallback_div_qr_1_with_shift(T *dst, T &rem, const T *src, size_t n,
--n;
hi = rbp >> (64 - shift);

if (WJR_LIKELY(n != 0)) {
do {
if (WJR_UNLIKELY(n == 0)) {
break;
}

do {
lo = src[n - 1];
dst[n] = div.divide(divisor, value, shld(rbp, lo, shift), hi);
rbp = lo;
--n;
} while (WJR_LIKELY(n != 0));
}

} while (0);

dst[0] = div.divide(divisor, value, rbp << shift, hi);
rem = hi >> shift;
Expand Down Expand Up @@ -174,19 +181,20 @@ fallback_div_qr_2_without_shift(T *dst, T *rem, const T *src, size_t n,
dst[n - 2] = 0;
}

if (WJR_UNLIKELY(n == 2)) {
rem[0] = u1;
rem[1] = u2;
return;
}
do {
if (WJR_UNLIKELY(n == 2)) {
break;
}

n -= 2;
n -= 2;

do {
u0 = src[n - 1];
dst[n - 1] = div.divide(divisor0, divisor1, value, u0, u1, u2);
--n;
} while (WJR_LIKELY(n != 0));
do {
u0 = src[n - 1];
dst[n - 1] = div.divide(divisor0, divisor1, value, u0, u1, u2);
--n;
} while (WJR_LIKELY(n != 0));

} while (0);

rem[0] = u1;
rem[1] = u2;
Expand Down Expand Up @@ -215,17 +223,21 @@ fallback_div_qr_2_with_shift(T *dst, T *rem, const T *src, size_t n,

n -= 2;

if (WJR_LIKELY(n != 0)) {
do {
if (WJR_UNLIKELY(n == 0)) {
break;
}

do {
u0 = src[n - 1];
dst[n] = div.divide(divisor0, divisor1, value, shld(rbp, u0, shift), u1, u2);
rbp = u0;
--n;
} while (WJR_LIKELY(n != 0));
}

dst[0] = div.divide(divisor0, divisor1, value, rbp << shift, u1, u2);
} while (0);

dst[0] = div.divide(divisor0, divisor1, value, rbp << shift, u1, u2);
rem[0] = shrd(u1, u2, shift);
rem[1] = u2 >> shift;
return;
Expand All @@ -252,10 +264,10 @@ WJR_INTRINSIC_CONSTEXPR20 void div_qr_2(T *dst, T *rem, const T *src, size_t n,
return fallback_div_qr_2(dst, rem, src, n, div3by2_divider<T>(d[0], d[1]));
}

// reference : gmp
// reference : GMP
template <typename T>
WJR_NOINLINE WJR_CONSTEXPR20 void schoolbook_div_qr_s(T *dst, T *src, size_t n, T *div,
size_t m, T dinv) {
WJR_NOINLINE WJR_CONSTEXPR20 T sb_div_qr_s(T *dst, T *src, size_t n, T *div, size_t m,
T dinv) {
using divider = div3by2_divider<T>;
constexpr T mask = std::numeric_limits<T>::max();

Expand Down Expand Up @@ -313,6 +325,8 @@ WJR_NOINLINE WJR_CONSTEXPR20 void schoolbook_div_qr_s(T *dst, T *src, size_t n,
}

src[1] = n1;

return qh;
}

template <typename T>
Expand Down
107 changes: 54 additions & 53 deletions include/wjr/stack_allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,51 +50,21 @@ class stack_alloc {
template <size_t cache, size_t bufsize>
class __stack_alloc {

class __stack_alloc_node {
public:
__stack_alloc_node() = delete;
constexpr __stack_alloc_node(size_t size) : m_alloc(buffer, buffer + size) {}

__stack_alloc_node(const __stack_alloc_node &) = delete;
__stack_alloc_node &operator=(const __stack_alloc_node &) = delete;
~__stack_alloc_node() = default;

WJR_NODISCARD constexpr void *allocate(size_t n) {
return m_alloc.allocate(n);
}

WJR_INTRINSIC_CONSTEXPR void deallocate(void *old,
WJR_MAYBE_UNUSED size_t n) {
m_alloc.deallocate(old, n);
}

WJR_INTRINSIC_CONSTEXPR size_t size() const { return m_alloc.ptr() - buffer; }
WJR_INTRINSIC_CONSTEXPR size_t rest() const {
return m_alloc.end() - m_alloc.ptr();
}
WJR_INTRINSIC_CONSTEXPR size_t capacity() const {
return m_alloc.end() - buffer;
}

private:
basic_stack_alloc m_alloc;
alignas(alignment) char buffer[];
};

WJR_COLD WJR_CONSTEXPR20 void __malloc_node() {
WJR_ASSERT(idx == stk.size());
size_t size = 0;
size_t capacity = 0;

if (WJR_UNLIKELY(idx == 0)) {
size = cache;
if (WJR_UNLIKELY(m_idx == 0)) {
capacity = cache;
} else {
size = stk.back()->capacity();
size += size / 3;
auto &node = m_stk.back();
capacity = node.end - node.buffer;
capacity += capacity / 3;
}

auto node = (__stack_alloc_node *)malloc(sizeof(__stack_alloc_node) + size);
new (node) __stack_alloc_node(size);
stk.emplace_back(node);
auto buffer = (char *)malloc(capacity);
alloc_node node = {buffer, buffer, buffer + capacity};

m_stk.emplace_back(node);
}

public:
Expand All @@ -106,31 +76,62 @@ class stack_alloc {
~__stack_alloc() = default;

WJR_NODISCARD WJR_CONSTEXPR20 void *allocate(size_t n) {
if (WJR_UNLIKELY(idx == -1ull || stk[idx]->rest() < n)) {
++idx;
if (WJR_UNLIKELY(idx == stk.size())) {
if (WJR_UNLIKELY(m_end - m_ptr < n)) {

if (WJR_LIKELY(m_idx != -1ull)) {
m_stk[m_idx].ptr = m_ptr;
}

++m_idx;
if (WJR_UNLIKELY(m_idx == m_stk.size())) {
__malloc_node();
}

const auto &node = m_stk[m_idx];
m_buffer = node.buffer;
m_ptr = m_buffer;
m_end = node.end;
}

return stk[idx]->allocate(n);
auto ret = static_cast<void *>(m_ptr);
m_ptr += n;
return ret;
}

WJR_CONSTEXPR20 void deallocate(void *ptr, size_t n) {
stk[idx]->deallocate(ptr, n);

if (WJR_UNLIKELY(idx && stk[idx]->size() == 0)) {
--idx;
if (WJR_UNLIKELY(idx + bufsize + 1 <= stk.size())) {
delete stk.back();
stk.pop_back();
m_ptr = static_cast<char *>(ptr);

if (WJR_UNLIKELY(m_ptr == m_buffer)) {
--m_idx;
if (WJR_UNLIKELY(m_idx == -1ull)) {
m_buffer = m_ptr = m_end = nullptr;
} else {
if (WJR_UNLIKELY(m_stk.size() - m_idx >= bufsize + 2)) {
delete m_stk.back().buffer;
m_stk.pop_back();
}

const auto &node = m_stk[m_idx];
m_buffer = node.buffer;
m_ptr = node.ptr;
m_end = node.end;
}
}
}

private:
size_t idx = -1ull;
std::vector<__stack_alloc_node *> stk;
struct alloc_node {
char *buffer;
char *ptr;
char *end;
};

char *m_buffer = nullptr;
char *m_ptr = nullptr;
char *m_end = nullptr;

size_t m_idx = -1ull;
std::vector<alloc_node> m_stk;
};

public:
Expand Down

0 comments on commit 5cc6e00

Please sign in to comment.