diff --git a/newton-raphson-div.png b/newton-raphson-div.png new file mode 100644 index 00000000..a62ba7a6 Binary files /dev/null and b/newton-raphson-div.png differ diff --git a/newton-raphson-sqrt.png b/newton-raphson-sqrt.png new file mode 100644 index 00000000..dcc4948b Binary files /dev/null and b/newton-raphson-sqrt.png differ diff --git a/src/num.c b/src/num.c index e45aa62d..631d59e0 100644 --- a/src/num.c +++ b/src/num.c @@ -1631,6 +1631,430 @@ bc_num_divExtend(BcNum* restrict a, BcNum* restrict b, BcBigDig divisor) bc_num_shiftLeft(b, pow); } + +// a, b are nonnegative integers +static ssize_t +__basic_cmp( + const BcDig* a, size_t alen, + const BcDig* b, size_t blen) +{ + size_t i; + + if(alen > blen) + { + return 1; + } + if(alen < blen) + { + return -1; + } + for(i = alen-1; i < alen; i--) + { + if(a[i] > b[i]) + { + return 1; + } + if(a[i] < b[i]) + { + return -1; + } + } + return 0; +} + +// a has enough space. +static void +__basic_inc(BcDig* restrict a, size_t* restrict palen) +{ + size_t i, alen = *palen; + for(i = 0; i < alen && a[i] == BC_BASE_POW-1; i++) + { + a[i] = 0; + } + if(i < alen) + { + a[i]++; + } + else + { + a[i++] = 1; + *palen = i; + } +} + +// a > 0 +static void +__basic_dec(BcDig* restrict a, size_t* restrict palen) +{ + size_t i, alen = *palen; + + assert(*palen > 0); + + for(i = 0; i < alen && a[i] <= 0; i++) + { + a[i] = BC_BASE_POW-1; + } + a[i]--; + for(i = alen-1; i < alen && a[i] <= 0; i--) + { + ; + } + *palen = i+1; +} + +static void +__basic_fit(BcDig* restrict a, size_t* restrict palen) +{ + size_t i, alen = *palen; + for(i = alen-1; i < alen && a[i] <= 0; i--) + { + ; + } + *palen = i+1; +} + +// a, b are integers and 0 <= b <= a +static void +__basic_sub( + BcDig* restrict a, + size_t* restrict palen, + const BcDig* restrict b, + size_t blen) +{ + BcDig d, carry = 0; + size_t alen = *palen, i; + + assert(__basic_cmp(a, *palen, b, blen) >= 0); + + for(i = 0; i < blen; i++) + { + d = carry + a[i] - b[i]; + if(d >= 0) + { + a[i] = d; + carry = 0; + } + else + { + a[i] = BC_BASE_POW + d; + carry = -1; + } + } + for(; i < alen && carry < 0; i++) + { + if(a[i] > 0) + { + a[i]--; + carry = 0; + } + else + { + a[i] = BC_BASE_POW - 1; + carry = -1; + } + } + for(i = alen-1; i < alen && a[i] <= 0; i--) + { + ; + } + *palen = i+1; +} + +// 0 < blen < alen +static void +__basic_div( + BcDig* restrict a, size_t alen, + const BcDig* restrict b, size_t blen, + BcDig* restrict c, size_t* restrict pclen) +{ + BcNum temp; + BcDig *aa, *t; + size_t i, j, aalen, tlen, diff; + + assert(0 < blen && blen < alen); + + bc_num_init(&temp, blen+1); + t = temp.num; + + aalen = blen; + diff = alen - blen; + for(i = diff; i <= diff; i--) + { + BcDig res_dig = 0; + aa = a+i; + while(1) + { + BcDig dig; + int cmp = 0; + + if(aalen > blen) + { + cmp = 1; + dig = (BcDig)( ( ((BcBigDig)aa[blen])*BC_BASE_POW + aa[blen-1] ) / (b[blen-1]+1) ); + } + else if(aalen == blen) + { + dig = aa[blen-1] / ( b[blen-1]+1 ); + if(dig <= 0) + { + dig = 1; + } + for(j = blen-1; j < blen; j--) + { + if(aa[j] > b[j]) + { + cmp = 1; + break; + } + if(aa[j] < b[j]) + { + cmp = -1; + break; + } + } + } + else + { + dig = 0; + cmp = -1; + } + if(cmp < 0) + { + break; + } + + // aa >= dig * b + { + BcBigDig d, carry; + tlen = blen; + carry = 0; + for(j = 0; j < tlen; j++) + { + d = ((BcBigDig)b[j])*((BcBigDig)dig) + carry; + t[j] = d % BC_BASE_POW; + carry = d / BC_BASE_POW; + } + if(carry > 0) + { + t[tlen++] = (BcDig)carry; + } + } + __basic_sub(aa, &aalen, t, tlen); + res_dig += dig; + } + aalen++; + c[i] = res_dig; + } + bc_num_free(&temp); + for(i = diff; i <= diff && c[i] <= 0; i--) + { + ; + } + *pclen = i+1; +} + + + + +/** + * Newton-Raphson division + * @param a The first operand. Positive integer. + * @param b The second operand. Positive integer. + * @param c The return parameter. Initially 0 + */ + +/* + +1 < b < a < R^L +a/b == a*(1/b) + +Newton-Raphson mehtod to get 1/b + +y = 1/x - b +y' = -1/x^2 +X := X - ( 1/X - b ) / ( -1/(X^2) ) = X + ( X - b*X^2 ) = X*(2-b*X) +X := X * ( 2 - b*X ) + +Y = X*R^L +X = Y*R^(-L) + +Y*R^(-L) := Y*R^(-L) * ( 2 - b*Y*R^(-L) ) +Y := Y * ( 2*R^L - b*Y ) * R^(-L) + +---- + +Proof of correctness. (TeX) +(newton-raphson-div.png) + +*/ + +#if 1 + #define NEWTON_RAPHSON_MIN_A_LEN 10000 + #define NEWTON_RAPHSON_MIN_AB_DIFF 1000 + #define MAX_BASIC_DIV_B_LEN 50 +#else // for test + #define NEWTON_RAPHSON_MIN_A_LEN 1000 + #define NEWTON_RAPHSON_MIN_AB_DIFF 100 + #define MAX_BASIC_DIV_B_LEN 50 +#endif + +// len(b) >= NEWTON_RAPHSON_MIN_AB_DIFF. Long enough +static void +__newton_raphson_div(BcNum* restrict a, const BcNum* restrict b, BcNum* restrict c) +{ + BcNum y1, y2, tnum; + //BcNum aa = { .num = a->num, .len = a->len, .cap = a->cap, .rdx = a->rdx, .scale = a->scale }; + BcNum aa = *a, bb = *b, cc = *c, shell; + size_t i, j, prec; + + __basic_fit(aa.num, &aa.len); + __basic_fit(bb.num, &bb.len); + + if(aa.len <= bb.len) + { + __basic_div(aa.num, aa.len, bb.num, bb.len, cc.num, &cc.len); + return; + } + + // Now, 2 <= len(b) < len(a) + + prec = aa.len; + + BC_SIG_LOCK; + + bc_num_init(&y1, prec+1); + bc_num_init(&y2, 2*prec+1); + bc_num_init(&tnum, prec+1); + + BC_SETJMP_LOCKED(vm, err); + + BC_SIG_UNLOCK; + + // tnum <-- 1 * R^prec + memset(tnum.num, 0, prec*sizeof(BcDig)); + tnum.num[prec] = 1; + tnum.len = prec+1; + + // Initial estimation of (1/b)*R^prec + if(bb.len <= MAX_BASIC_DIV_B_LEN) + { + __basic_div(tnum.num, tnum.len, bb.num, bb.len, y1.num, &y1.len); + } + else + { + BcDig tb[MAX_BASIC_DIV_B_LEN+1]; + size_t tblen, trunc_len = bb.len - MAX_BASIC_DIV_B_LEN; + + tblen = MAX_BASIC_DIV_B_LEN; + memcpy(tb, bb.num + trunc_len, tblen*sizeof(BcDig)); + for(i = 0; i < trunc_len && bb.num[i] <= 0; i++) { ; } + if(i < trunc_len) + { + __basic_inc(tb, &tblen); + } + __basic_div(tnum.num, tnum.len, tb, tblen, y1.num, &y1.len); + for(i = trunc_len, j = 0; i < y1.len; i++, j++) + { + y1.num[j] = y1.num[i]; + } + y1.len = j; + } + // ( len(b) < len(a) ) ==> y1 > 0 + + while(1) + { + // tnum <-- b*y1; + bc_num_zero(&tnum); + bc_num_k(&bb, &y1, &tnum); + __basic_fit(tnum.num, &tnum.len); + + // tnum = 2*R^prec - b*y1; + for(i = 0; i < tnum.len && 0 == tnum.num[i]; i++) + { + ; + } + if(i < prec) + { + tnum.num[i] = BC_BASE_POW - tnum.num[i]; + i++; + for(; i < tnum.len; i++) + { + tnum.num[i] = BC_BASE_POW-1 - tnum.num[i]; + } + for(; i < prec; i++) + { + tnum.num[i] = BC_BASE_POW-1; + } + } + tnum.num[prec] = 1; + tnum.len = prec+1; + + // y2 <-- y1*( 2*R^prec - b*y1 ) + bc_num_zero(&y2); + bc_num_k(&y1, &tnum, &y2); + __basic_fit(y2.num, &y2.len); + + // shell <-- y1*( 2*R^prec - b*y1 ) * R^(-prec) + shell = y2; + shell.num += prec; + if(shell.len > prec) + { + shell.len -= prec; + } + else + { + shell.len = 0; + } + shell.cap -= prec; + + // new approx. <= prev approx --> break + if(__basic_cmp(shell.num, shell.len, y1.num, y1.len) <= 0) + { + break; + } + + // y1 <-- y1*( 2*R^prec - b*y1 ) * R^(-prec) + memcpy(y1.num, shell.num, shell.len*sizeof(BcDig)); + y1.len = shell.len; + } + + // y2 <-- a * { approx. (1/b)*R^prec } + // y2 := { approx. (a/b) * R^prec } + bc_num_zero(&y2); + bc_num_k(&aa, &y1, &y2); + __basic_fit(y2.num, &y2.len); + + // cc <-- { approx. (a/b) * R^prec } * R^(-prec) + // cc := ( approx. a/b } + cc.len = y2.len-prec; + memcpy(cc.num, y2.num + prec, cc.len*sizeof(BcDig)); + + // y1 <-- b * ( approx. a/b } + // y1 := { approx. a } + bc_num_zero(&y1); + bc_num_k(&bb, &cc, &y1); + __basic_fit(y1.num, &y1.len); + + // aa -= { approx. a } + __basic_sub(aa.num, &aa.len, y1.num, y1.len); + + // At most once... + while(__basic_cmp(aa.num, aa.len, bb.num, bb.len) >= 0) + { + // aa -= b + __basic_sub(aa.num, &aa.len, bb.num, bb.len); + + // cc += 1 + __basic_inc(cc.num, &cc.len); + } +err: + BC_SIG_MAYLOCK; + bc_num_free(&tnum); + bc_num_free(&y2); + bc_num_free(&y1); + BC_LONGJMP_CONT(vm); +} + /** * Actually does division. This is a rewrite of my original code by Stefan Esser * from FreeBSD. @@ -1751,67 +2175,75 @@ bc_num_d_long(BcNum* restrict a, BcNum* restrict b, BcNum* restrict c, BC_SIG_UNLOCK; - // This is the actual division loop. - for (i = realend - 1; i < realend && i >= rdx && BC_NUM_NONZERO(a); --i) + if( (a->len - rdx) < NEWTON_RAPHSON_MIN_A_LEN || len < NEWTON_RAPHSON_MIN_AB_DIFF || (a->len - rdx) < len + NEWTON_RAPHSON_MIN_AB_DIFF ) { - ssize_t cmp; - BcDig* n; - BcBigDig result; + // This is the actual division loop. + for (i = realend - 1; i < realend && i >= rdx && BC_NUM_NONZERO(a); --i) + { + ssize_t cmp; + BcDig* n; + BcBigDig result; - n = a->num + i; - assert(n >= a->num); - result = 0; + n = a->num + i; + assert(n >= a->num); + result = 0; - cmp = bc_num_divCmp(n, b, len); + cmp = bc_num_divCmp(n, b, len); - // This is true if n is greater than b, which means that division can - // proceed, so this inner loop is the part that implements one instance - // of the division. - while (cmp >= 0) - { - BcBigDig n1, dividend, quotient; - - // These should be named obviously enough. Just imagine that it's a - // division of one limb. Because that's what it is. - n1 = (BcBigDig) n[len]; - dividend = n1 * BC_BASE_POW + (BcBigDig) n[len - 1]; - quotient = (dividend / divisor); - - // If this is true, then we can just subtract. Remember: setting - // quotient to 1 is not bad because we already know that n is - // greater than b. - if (quotient <= 1) - { - quotient = 1; - bc_num_subArrays(n, b->num, len); - } - else + // This is true if n is greater than b, which means that division can + // proceed, so this inner loop is the part that implements one instance + // of the division. + while (cmp >= 0) { - assert(quotient <= BC_BASE_POW); + BcBigDig n1, dividend, quotient; + + // These should be named obviously enough. Just imagine that it's a + // division of one limb. Because that's what it is. + n1 = (BcBigDig) n[len]; + dividend = n1 * BC_BASE_POW + (BcBigDig) n[len - 1]; + quotient = (dividend / divisor); + + // If this is true, then we can just subtract. Remember: setting + // quotient to 1 is not bad because we already know that n is + // greater than b. + if (quotient <= 1) + { + quotient = 1; + bc_num_subArrays(n, b->num, len); + } + else + { + assert(quotient <= BC_BASE_POW); - // We need to multiply and subtract for a quotient above 1. - bc_num_mulArray(b, (BcBigDig) quotient, &cpb); - bc_num_subArrays(n, cpb.num, cpb.len); - } + // We need to multiply and subtract for a quotient above 1. + bc_num_mulArray(b, (BcBigDig) quotient, &cpb); + bc_num_subArrays(n, cpb.num, cpb.len); + } - // The result is the *real* quotient, by the way, but it might take - // multiple trips around this loop to get it. - result += quotient; - assert(result <= BC_BASE_POW); + // The result is the *real* quotient, by the way, but it might take + // multiple trips around this loop to get it. + result += quotient; + assert(result <= BC_BASE_POW); - // And here's why it might take multiple trips: n might *still* be - // greater than b. So we have to loop again. That's what this is - // setting up for: the condition of the while loop. - if (realnonzero) cmp = bc_num_divCmp(n, b, len); - else cmp = -1; - } + // And here's why it might take multiple trips: n might *still* be + // greater than b. So we have to loop again. That's what this is + // setting up for: the condition of the while loop. + if (realnonzero) cmp = bc_num_divCmp(n, b, len); + else cmp = -1; + } - assert(result < BC_BASE_POW); + assert(result < BC_BASE_POW); - // Store the actual limb quotient. - c->num[i] = (BcDig) result; + // Store the actual limb quotient. + c->num[i] = (BcDig) result; + } + } + else + { + BcNum ta = { a->num+rdx, 0, 0, a->len - rdx, a->cap - rdx }; + BcNum tc = { c->num+rdx, 0, 0, c->len - rdx, c->cap - rdx }; + __newton_raphson_div(&ta, b, &tc); } - err: BC_SIG_MAYLOCK; bc_num_free(&cpb); @@ -4061,6 +4493,10 @@ bc_num_rshift(BcNum* a, BcNum* b, BcNum* c, size_t scale) } #endif // BC_ENABLE_EXTRA_MATH +#define NEWTON_RAPHSON_SQRT + +#ifndef NEWTON_RAPHSON_SQRT + void bc_num_sqrt(BcNum* restrict a, BcNum* restrict b, size_t scale) { @@ -4214,6 +4650,351 @@ bc_num_sqrt(BcNum* restrict a, BcNum* restrict b, size_t scale) BC_LONGJMP_CONT(vm); } +#else + +static BcBigDig +__basic_sqrt(BcBigDig n) +{ + BcBigDig u = 0, v = BC_BASE_POW-1, m, d; + + while(u <= v) + { + m = (u+v)/2; + d = m*m; + if(d < n) + { + u = m+1; // u >= 1 + } + else if(d > n) + { + v = m-1; // d >= 1 ==> m >= 1 ==> v >= 0 + } + else + { + return m; + } + } + return v; +} + +/* +s <-- max( {global scale}, scale(a) ) +b^2 <= a*R^(2s) < (b+1)^2 +( b*R^(-s) )^2 <= a < ( (b+1)*R^(-s) )^2 +*/ + +/* + + 1. Calculate 1/sqrt(a) with y = x^(-2) - a + 2. Calculate sqrt(a) from a*(1/sqrt(a)) + + Newton-Raphson iteration for y = x^(-2) - a + x <- x*(3-a*x^2)/2 + + divide by 2 : Multiply by RADIX/2 and shift right by 1. + + No division needed. + +X := X * ( 3 - b*X*X ) + +Y = X*R^L +X = Y*R^(-L) + +Y*R^(-L) := Y*R^(-L) * ( 3 - a*Y*Y*R^(-2*L) ) +Y := Y * ( 3*R^L - a*Y*Y*R^(-L) ) * R^(-L) + + +Proof of correctness +(newton-raphson-sqrt.png) + +*/ + +// a >= 1 +// Get floor( sqrt( a * R^ashift ) ) +static void +__newton_raphson_sqrt(BcNum* const restrict a, size_t ashift, BcNum* restrict b) +{ + BcNum y1, y2, tnum, shell; + size_t i, prec; + + prec = bc_vm_growSize(a->len, ashift); + // now prec >= 1 + if(prec&1) // odd + { + // make positive even + prec = bc_vm_growSize(prec, 1); + } + // new prec is positive even. + // and a*R^ashift < R^prec + + BC_SIG_LOCK; + + bc_num_init(&y1, bc_vm_growSize(bc_vm_growSize(prec, prec), 1)); // prepare 2*prec + 1 digits. + bc_num_init(&y2, 2*prec+1); // already checked overflow. + bc_num_init(&tnum, 2*prec+1); // already checked overflow. + + BC_SETJMP_LOCKED(vm, err); + + BC_SIG_UNLOCK; + + // Let A = a * R^ashift + // + // R^(prec/2) / R^prec is good to be an initial estimation of sqrt(1/A) + // Proof) + // 0 < R^(prec/2) / R^prec = sqrt( 1 / R^prec ) < sqrt(1/A) ( why? A = a*R^ashift < R^prec ) + // + // If y1 = R^(prec/2) then R^prec <= A*y1*y1 < R^(2*prec) ( So sliding down A*y1*y1 by prec digits possible ) + // Proof) + // A*y1^2 = A*R^prec + // R^prec <= A*R^prec < R^prec * R^prec = R^(2*prec) + // + { + size_t half_prec = prec/2; + for(i = 0; i < half_prec; i++) + { + y1.num[i] = 0; + } + y1.num[half_prec] = 1; + y1.len = half_prec+1; + } + + while(1) + { + // temp <-- a * y1 + bc_num_zero(&tnum); + bc_num_k(a, &y1, &tnum); + __basic_fit(tnum.num, &tnum.len); + + // y2 <-- a * y1 * y1 + bc_num_zero(&y2); + bc_num_k(&tnum, &y1, &y2); + __basic_fit(y2.num, &y2.len); + + // y2 <-- ( a * y1 * y1 ) / R^(prec-ashift) + // y2 <-- a*R^ashift * y1*y1 * R^(-prec) + if(y2.len > prec - ashift) + { + y2.len -= prec - ashift; + memmove(y2.num, y2.num + (prec - ashift), y2.len*sizeof(BcDig)); + } + else + { + y2.len = 0; + } + + // tnum <-- 3*R^prec - a*R^ashift * y1*y1 * R^(-prec) + for(i = 0; i < y2.len && 0 == y2.num[i]; i++) { ; } + if(i > 0) + { + memset(tnum.num, 0, i*sizeof(BcDig)); + } + if(i < prec) + { + tnum.num[i] = BC_BASE_POW - y2.num[i]; + for(i++; i < y2.len; i++) + { + tnum.num[i] = BC_BASE_POW-1 - y2.num[i]; + } + for(; i < prec; i++) + { + tnum.num[i] = BC_BASE_POW-1; + } + } + tnum.num[prec] = 2; + tnum.len = prec + 1; + + // y2 <-- y1 * ( 3*R^prec - a*R^ashift * y1*y1 * R^(-prec) ) + bc_num_zero(&y2); + bc_num_k(&y1, &tnum, &y2); + __basic_fit(y2.num, &y2.len); + + // y2 <-- y2 / 2 + // y2 <-- y1 * ( 3*R^prec - a*R^ashift * y1*y1 * R^(-prec) ) / 2 + // x/2 = ( x*(BC_BASE_POW/2) ) / BC_BASE_POW ( multiply by BC_BASE_POW/2 and slide down by 1 digit ) + { + BcBigDig ddig; + ddig = ( ((BcBigDig)y2.num[0]) * (BC_BASE_POW/2) ) / BC_BASE_POW; + for(i = 1; i < y2.len; i++) + { + ddig = ( (BcBigDig)y2.num[i] )*(BC_BASE_POW/2) + ddig; + y2.num[i-1] = (BcDig)(ddig % BC_BASE_POW); + ddig /= BC_BASE_POW; + } + if(ddig > 0) + { + y2.num[i-1] = (BcDig)ddig; + } + else + { + y2.len--; + } + } + + // shell <-- y1 * ( 3*R^prec - a*R^ashift * y1*y1 * R^(-prec) ) * R^(-prec) + shell = y2; + shell.num += prec; + if(shell.len > prec) + { + shell.len -= prec; + } + else + { + shell.len = 0; + } + shell.cap -= prec; + + // new approx. <= prev approx --> break + if(__basic_cmp(shell.num, shell.len, y1.num, y1.len) <= 0) + { + break; + } + // y1 <-- new approx. + memcpy(y1.num, shell.num, shell.len*sizeof(BcDig)); + y1.len = shell.len; + } + + // tnum <-- a * y1 / R^(prec-ashift) + // a * x1*R^prec / R^(prec-ashift) + // a*R^ashift * x1 --> approx. of sqrt(a*R^ashift) + bc_num_zero(&tnum); + bc_num_k(a, &y1, &tnum); + __basic_fit(tnum.num, &tnum.len); + if(tnum.len > prec - ashift) + { + b->len = tnum.len - (prec - ashift); + memcpy(b->num, tnum.num + (prec - ashift), b->len*sizeof(BcDig)); + } + else + { + b->len = 0; + } + + // check b+1 + while(1) + { + int cmp = 0; + + __basic_inc(b->num, &b->len); + + bc_num_zero(&tnum); + bc_num_k(b, b, &tnum); + __basic_fit(tnum.num, &tnum.len); + if(tnum.len > a->len + ashift) + { + cmp = 1; + } + else if(tnum.len == a->len + ashift) + { + cmp = __basic_cmp(tnum.num + ashift, tnum.len - ashift, a->num, a->len); + if(0 == cmp) + { + for(i = 0; i < ashift && tnum.num[i] == 0; i++) { ; } + if(i < ashift) + { + cmp = 1; + } + } + } + else + { + cmp = -1; + } + if(0 == cmp) + { + break; + } + if(cmp > 0) + { + __basic_dec(b->num, &b->len); + break; + } + } +err: + BC_SIG_MAYLOCK; + bc_num_free(&tnum); + bc_num_free(&y2); + bc_num_free(&y1); + BC_LONGJMP_CONT(vm); +} + +void +bc_num_sqrt(BcNum* restrict a, BcNum* restrict b, size_t scale) +{ + BcNum aa = *a, y1, y2; + size_t alen, ashift, rdx, realscale, prec; + +#if BC_ENABLE_LIBRARY + BcVm* vm = bcl_getspecific(); +#endif // BC_ENABLE_LIBRARY + + assert(a != NULL && b != NULL && a != b); + + if (BC_ERR(BC_NUM_NEG(a))) bc_err(BC_ERR_MATH_NEGATIVE); + + realscale = BC_MAX(a->scale, scale); + rdx = BC_NUM_RDX(realscale); + + // ashift = 2*rdx - BC_NUM_RDX_VAL(a); + ashift = rdx - BC_NUM_RDX_VAL(a); + ashift = bc_vm_growSize(ashift, rdx); + + aa.scale = 0; + aa.rdx = 0; + __basic_fit(aa.num, &aa.len); + + // Let's calculate floor( sqrt( 1/aa ) * R^(ashift) ) ) * R^(-rdx) + + BC_SIG_LOCK; + + bc_num_init(b, bc_vm_growSize((aa.len/2)+1, rdx)); + + BC_SETJMP_LOCKED(vm, err); + + BC_SIG_UNLOCK; + + // Easy case. + if (BC_NUM_ZERO(&aa)) + { + bc_num_setToZero(b, realscale); + return; + } + + // Another easy case. + if (BC_NUM_ONE(&aa)) + { + bc_num_one(b); + bc_num_extend(b, realscale); + return; + } + + if(ashift <= 0 && aa.len <= 2) + { + // aa.len >= 1 + BcBigDig n = (BcBigDig)aa.num[0]; + if(2 == aa.len) + { + n += ((BcBigDig)aa.num[1])*BC_BASE_POW; + } + b->num[0] = (BcDig)__basic_sqrt(n); + b->len = 1; + } + else + { + __newton_raphson_sqrt(&aa, ashift, b); + } + BC_NUM_RDX_SET(b, rdx); + b->scale = rdx*BC_BASE_DIGS; + if(b->scale > realscale) + { + bc_num_truncate(b, b->scale - realscale); + } +err: + BC_SIG_MAYLOCK; + BC_LONGJMP_CONT(vm); +} + +#endif + void bc_num_divmod(BcNum* a, BcNum* b, BcNum* c, BcNum* d, size_t scale) {