* bignum.c: (bigsub_core): Use bary_sub.
(bary_sub): Returns a borrow flag. Use bary_subb. (bary_subb): New function for actually calculating subtraction with borrow. (bary_sub_one): New function. (bigadd_core): Use bary_add. (bary_add): Returns a carry flag. Use bary_addc. (bary_addc): New function for actually calculating addition with carry. (bary_add_one): New function. (bary_muladd_1xN): Extracted from bary_mul_normal. (bigmul1_normal): Removed. (bary_mul_karatsuba): New function. (bary_mul1): Invoke rb_thread_check_ints after bary_mul_normal. (bary_mul): Remove most and least significant zeros before actual multiplication. Use bary_sq_fast, bary_mul_balance, bary_mul_karatsuba and bigmul1_toom3 as bigmul0. (bigmul1_balance): Removed. (bigmul1_karatsuba): Removed. (bigsqr_fast): Removed. (bary_sparse_p): Extracted from big_sparse_p. (big_sparse_p): Removed. (bigmul0): Use bary_mul. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@41821 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
8f50a21064
commit
8dbf566094
26
ChangeLog
26
ChangeLog
@ -1,3 +1,29 @@
|
||||
Sun Jul 7 14:41:57 2013 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* bignum.c: (bigsub_core): Use bary_sub.
|
||||
(bary_sub): Returns a borrow flag. Use bary_subb.
|
||||
(bary_subb): New function for actually calculating subtraction with
|
||||
borrow.
|
||||
(bary_sub_one): New function.
|
||||
(bigadd_core): Use bary_add.
|
||||
(bary_add): Returns a carry flag. Use bary_addc.
|
||||
(bary_addc): New function for actually calculating addition with
|
||||
carry.
|
||||
(bary_add_one): New function.
|
||||
(bary_muladd_1xN): Extracted from bary_mul_normal.
|
||||
(bigmul1_normal): Removed.
|
||||
(bary_mul_karatsuba): New function.
|
||||
(bary_mul1): Invoke rb_thread_check_ints after bary_mul_normal.
|
||||
(bary_mul): Remove most and least significant zeros before actual
|
||||
multiplication. Use bary_sq_fast, bary_mul_balance,
|
||||
bary_mul_karatsuba and bigmul1_toom3 as bigmul0.
|
||||
(bigmul1_balance): Removed.
|
||||
(bigmul1_karatsuba): Removed.
|
||||
(bigsqr_fast): Removed.
|
||||
(bary_sparse_p): Extracted from big_sparse_p.
|
||||
(big_sparse_p): Removed.
|
||||
(bigmul0): Use bary_mul.
|
||||
|
||||
Sun Jul 7 11:54:33 2013 Kouhei Sutou <kou@cozmixng.org>
|
||||
|
||||
* NEWS: Add REXML::Text#<< related updates.
|
||||
|
615
bignum.c
615
bignum.c
@ -101,14 +101,18 @@ static void bary_small_rshift(BDIGIT *zds, BDIGIT *xds, long n, int shift, int s
|
||||
static void bary_unpack(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags);
|
||||
static void bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl);
|
||||
static void bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl);
|
||||
static void bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
|
||||
static int bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
|
||||
static int bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow);
|
||||
static void bary_divmod(BDIGIT *qds, size_t nq, BDIGIT *rds, size_t nr, BDIGIT *xds, size_t nx, BDIGIT *yds, size_t ny);
|
||||
static void bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
|
||||
static int bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
|
||||
static int bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry);
|
||||
static int bary_pack(int sign, BDIGIT *ds, size_t num_bdigits, void *words, size_t numwords, size_t wordsize, size_t nails, int flags);
|
||||
static int bary_2comp(BDIGIT *ds, size_t n);
|
||||
|
||||
static VALUE bigsqr_fast(VALUE x);
|
||||
static void bary_sq_fast(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn);
|
||||
static inline int bary_sparse_p(BDIGIT *ds, size_t n);
|
||||
static VALUE bigmul0(VALUE x, VALUE y);
|
||||
static VALUE bigmul1_toom3(VALUE x, VALUE y);
|
||||
|
||||
#define BIGNUM_DEBUG 0
|
||||
#if BIGNUM_DEBUG
|
||||
@ -3459,38 +3463,58 @@ rb_big_neg(VALUE x)
|
||||
static void
|
||||
bigsub_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
|
||||
{
|
||||
BDIGIT_DBL_SIGNED num;
|
||||
long i;
|
||||
bary_sub(zds, zn, xds, xn, yds, yn);
|
||||
}
|
||||
|
||||
for (i = 0, num = 0; i < yn; i++) {
|
||||
static int
|
||||
bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow)
|
||||
{
|
||||
BDIGIT_DBL_SIGNED num;
|
||||
size_t i;
|
||||
|
||||
assert(yn <= xn);
|
||||
assert(xn <= zn);
|
||||
|
||||
num = borrow ? -1 : 0;
|
||||
for (i = 0; i < yn; i++) {
|
||||
num += (BDIGIT_DBL_SIGNED)xds[i] - yds[i];
|
||||
zds[i] = BIGLO(num);
|
||||
num = BIGDN(num);
|
||||
}
|
||||
while (num && i < xn) {
|
||||
for (; i < xn; i++) {
|
||||
if (num == 0) goto num_is_zero;
|
||||
num += xds[i];
|
||||
zds[i++] = BIGLO(num);
|
||||
zds[i] = BIGLO(num);
|
||||
num = BIGDN(num);
|
||||
}
|
||||
if (num == 0) goto num_is_zero;
|
||||
for (; i < zn; i++) {
|
||||
zds[i] = BDIGMAX;
|
||||
}
|
||||
return 1;
|
||||
|
||||
num_is_zero:
|
||||
if (xds == zds && xn == zn)
|
||||
return;
|
||||
while (i < xn) {
|
||||
return 0;
|
||||
for (; i < xn; i++) {
|
||||
zds[i] = xds[i];
|
||||
i++;
|
||||
}
|
||||
assert(i <= zn);
|
||||
while (i < zn) {
|
||||
zds[i++] = 0;
|
||||
for (; i < zn; i++) {
|
||||
zds[i] = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn)
|
||||
{
|
||||
assert(yn <= xn);
|
||||
assert(xn <= zn);
|
||||
return bary_subb(zds, zn, xds, xn, yds, yn, 0);
|
||||
}
|
||||
|
||||
bigsub_core(xds, xn, yds, yn, zds, zn);
|
||||
static int
|
||||
bary_sub_one(BDIGIT *zds, size_t zn)
|
||||
{
|
||||
return bary_subb(zds, zn, zds, zn, NULL, 0, 1);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
@ -3712,8 +3736,17 @@ bigadd_int(VALUE x, long y)
|
||||
static void
|
||||
bigadd_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
|
||||
{
|
||||
BDIGIT_DBL num = 0;
|
||||
long i;
|
||||
bary_add(zds, zn, xds, xn, yds, yn);
|
||||
}
|
||||
|
||||
static int
|
||||
bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry)
|
||||
{
|
||||
BDIGIT_DBL num;
|
||||
size_t i;
|
||||
|
||||
assert(xn <= zn);
|
||||
assert(yn <= zn);
|
||||
|
||||
if (xn > yn) {
|
||||
BDIGIT *tds;
|
||||
@ -3721,32 +3754,47 @@ bigadd_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
|
||||
i = xn; xn = yn; yn = i;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (i < xn) {
|
||||
num = carry ? 1 : 0;
|
||||
for (i = 0; i < xn; i++) {
|
||||
num += (BDIGIT_DBL)xds[i] + yds[i];
|
||||
zds[i++] = BIGLO(num);
|
||||
zds[i] = BIGLO(num);
|
||||
num = BIGDN(num);
|
||||
}
|
||||
while (num && i < yn) {
|
||||
for (; i < yn; i++) {
|
||||
if (num == 0) goto num_is_zero;
|
||||
num += yds[i];
|
||||
zds[i++] = BIGLO(num);
|
||||
zds[i] = BIGLO(num);
|
||||
num = BIGDN(num);
|
||||
}
|
||||
while (i < yn) {
|
||||
for (; i < zn; i++) {
|
||||
if (num == 0) goto num_is_zero;
|
||||
zds[i] = BIGLO(num);
|
||||
num = BIGDN(num);
|
||||
}
|
||||
return num != 0;
|
||||
|
||||
num_is_zero:
|
||||
if (yds == zds && yn == zn)
|
||||
return 0;
|
||||
for (; i < yn; i++) {
|
||||
zds[i] = yds[i];
|
||||
i++;
|
||||
}
|
||||
if (num) zds[i++] = (BDIGIT)num;
|
||||
assert(i <= zn);
|
||||
while (i < zn) {
|
||||
zds[i++] = 0;
|
||||
for (; i < zn; i++) {
|
||||
zds[i] = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn)
|
||||
{
|
||||
bigadd_core(xds, xn, yds, yn, zds, zn);
|
||||
return bary_addc(zds, zn, xds, xn, yds, yn, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
bary_add_one(BDIGIT *zds, size_t zn)
|
||||
{
|
||||
return bary_addc(zds, zn, NULL, 0, zds, zn, 1);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
@ -3871,65 +3919,55 @@ bary_mul_single(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT y)
|
||||
zds[1] = (BDIGIT)BIGDN(n);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
bigmul1_single(VALUE x, VALUE y)
|
||||
static int
|
||||
bary_muladd_1xN(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT *yds, size_t yl)
|
||||
{
|
||||
VALUE z = bignew(2, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
|
||||
BDIGIT *xds, *yds, *zds;
|
||||
BDIGIT_DBL n;
|
||||
BDIGIT_DBL dd;
|
||||
size_t j;
|
||||
|
||||
xds = BDIGITS(x);
|
||||
yds = BDIGITS(y);
|
||||
zds = BDIGITS(z);
|
||||
assert(zl > yl);
|
||||
|
||||
bary_mul_single(zds, 2, xds[0], yds[0]);
|
||||
if (x == 0)
|
||||
return 0;
|
||||
dd = x;
|
||||
n = 0;
|
||||
for (j = 0; j < yl; j++) {
|
||||
BDIGIT_DBL ee = n + dd * yds[j];
|
||||
if (ee) {
|
||||
n = zds[j] + ee;
|
||||
zds[j] = BIGLO(n);
|
||||
n = BIGDN(n);
|
||||
}
|
||||
else {
|
||||
n = 0;
|
||||
}
|
||||
|
||||
return z;
|
||||
}
|
||||
for (; j < zl; j++) {
|
||||
if (n == 0)
|
||||
break;
|
||||
n += zds[j];
|
||||
zds[j] = BIGLO(n);
|
||||
n = BIGDN(n);
|
||||
}
|
||||
return n != 0;
|
||||
}
|
||||
|
||||
static void
|
||||
bary_mul_normal(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
|
||||
{
|
||||
size_t i;
|
||||
size_t j = zl;
|
||||
BDIGIT_DBL n = 0;
|
||||
|
||||
assert(xl + yl <= zl);
|
||||
|
||||
while (j--) zds[j] = 0;
|
||||
MEMZERO(zds, BDIGIT, zl);
|
||||
for (i = 0; i < xl; i++) {
|
||||
BDIGIT_DBL dd;
|
||||
dd = xds[i];
|
||||
if (dd == 0) continue;
|
||||
n = 0;
|
||||
for (j = 0; j < yl; j++) {
|
||||
BDIGIT_DBL ee = n + dd * yds[j];
|
||||
n = zds[i + j] + ee;
|
||||
if (ee) zds[i + j] = BIGLO(n);
|
||||
n = BIGDN(n);
|
||||
}
|
||||
if (n) {
|
||||
zds[i + j] = (BDIGIT)n;
|
||||
}
|
||||
bary_muladd_1xN(zds+i, zl-i, xds[i], yds, yl);
|
||||
}
|
||||
}
|
||||
|
||||
static VALUE
|
||||
bigmul1_normal(VALUE x, VALUE y)
|
||||
{
|
||||
size_t xl = RBIGNUM_LEN(x), yl = RBIGNUM_LEN(y), zl = xl + yl;
|
||||
VALUE z = bignew(zl, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
|
||||
BDIGIT *xds, *yds, *zds;
|
||||
|
||||
xds = BDIGITS(x);
|
||||
yds = BDIGITS(y);
|
||||
zds = BDIGITS(z);
|
||||
|
||||
bary_mul_normal(zds, zl, xds, xl, yds, yl);
|
||||
|
||||
rb_thread_check_ints();
|
||||
return z;
|
||||
}
|
||||
|
||||
/* balancing multiplication by slicing larger argument */
|
||||
static void
|
||||
bary_mul_balance(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
|
||||
{
|
||||
@ -3961,6 +3999,168 @@ bary_mul_balance(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, si
|
||||
ALLOCV_END(work);
|
||||
}
|
||||
|
||||
/* multiplication by karatsuba method */
|
||||
static void
|
||||
bary_mul_karatsuba(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
|
||||
{
|
||||
VALUE work = 0;
|
||||
BDIGIT *wds;
|
||||
size_t wl;
|
||||
|
||||
size_t n;
|
||||
int sub_p, borrow, carry1, carry2, carry3;
|
||||
|
||||
int odd_x = 0;
|
||||
int odd_y = 0;
|
||||
|
||||
BDIGIT *xds0, *xds1, *yds0, *yds1, *zds0, *zds1, *zds2, *zds3;
|
||||
|
||||
assert(xl + yl <= zl);
|
||||
assert(xl <= yl);
|
||||
assert(yl < 2 * xl);
|
||||
|
||||
if (yl & 1) {
|
||||
odd_y = 1;
|
||||
yl--;
|
||||
if (yl < xl) {
|
||||
odd_x = 1;
|
||||
xl--;
|
||||
}
|
||||
}
|
||||
|
||||
n = yl / 2;
|
||||
|
||||
assert(n < xl);
|
||||
|
||||
wl = n;
|
||||
wds = ALLOCV_N(BDIGIT, work, wl);
|
||||
|
||||
/* Karatsuba algorithm:
|
||||
*
|
||||
* x = x0 + r*x1
|
||||
* y = y0 + r*y1
|
||||
* z = x*y
|
||||
* = (x0 + r*x1) * (y0 + r*y1)
|
||||
* = x0*y0 + r*(x1*y0 + x0*y1) + r*r*x1*y1
|
||||
* = x0*y0 + r*(x0*y0 + x1*y1 - (x1-x0)*(y1-y0)) + r*r*x1*y1
|
||||
* = x0*y0 + r*(x0*y0 + x1*y1 - (x0-x1)*(y0-y1)) + r*r*x1*y1
|
||||
*/
|
||||
|
||||
xds0 = xds;
|
||||
xds1 = xds + n;
|
||||
yds0 = yds;
|
||||
yds1 = yds + n;
|
||||
zds0 = zds;
|
||||
zds1 = zds + n;
|
||||
zds2 = zds + 2*n;
|
||||
zds3 = zds + 3*n;
|
||||
|
||||
sub_p = 1;
|
||||
|
||||
/* zds0:? zds1:? zds2:? zds3:? wds:? */
|
||||
|
||||
if (bary_sub(zds0, n, xds, n, xds+n, xl-n)) {
|
||||
bary_2comp(zds0, n);
|
||||
sub_p = !sub_p;
|
||||
}
|
||||
|
||||
/* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:? */
|
||||
|
||||
if (bary_sub(wds, n, yds, n, yds+n, n)) {
|
||||
bary_2comp(wds, n);
|
||||
sub_p = !sub_p;
|
||||
}
|
||||
|
||||
/* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:|y1-y0| */
|
||||
|
||||
bary_mul(zds1, 2*n, zds0, n, wds, n);
|
||||
|
||||
/* zds0:|x1-x0| zds1,zds2:|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
|
||||
|
||||
borrow = 0;
|
||||
if (sub_p) {
|
||||
borrow = !bary_2comp(zds1, 2*n);
|
||||
}
|
||||
/* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
|
||||
|
||||
MEMCPY(wds, zds1, BDIGIT, n);
|
||||
|
||||
/* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
|
||||
|
||||
bary_mul(zds0, 2*n, xds0, n, yds0, n);
|
||||
|
||||
/* zds0,zds1:x0*y0 zds2:hi(-?|x1-x0|*|y1-y0|) zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
|
||||
|
||||
carry1 = bary_add(wds, n, wds, n, zds0, n);
|
||||
carry1 = bary_addc(zds2, n, zds2, n, zds1, n, carry1);
|
||||
|
||||
/* zds0,zds1:x0*y0 zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
|
||||
|
||||
carry2 = bary_add(zds1, n, zds1, n, wds, n);
|
||||
|
||||
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
|
||||
|
||||
MEMCPY(wds, zds2, BDIGIT, n);
|
||||
|
||||
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:_ zds3:? wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
|
||||
|
||||
bary_mul(zds2, zl-2*n, xds1, xl-n, yds1, n);
|
||||
|
||||
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
|
||||
|
||||
carry3 = bary_add(zds1, n, zds1, n, zds2, n);
|
||||
|
||||
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
|
||||
|
||||
carry3 = bary_addc(zds2, n, zds2, n, zds3, (4*n < zl ? n : zl-3*n), carry3);
|
||||
|
||||
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1) wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
|
||||
|
||||
bary_add(zds2, zl-2*n, zds2, zl-2*n, wds, n);
|
||||
|
||||
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1)+hi(x0*y0-?|x1-x0|*|y1-y0|) wds:_ */
|
||||
|
||||
if (carry2)
|
||||
bary_add_one(zds2, zl-2*n);
|
||||
|
||||
if (borrow && carry1)
|
||||
borrow = carry1 = 0;
|
||||
if (borrow && carry3)
|
||||
borrow = carry3 = 0;
|
||||
|
||||
if (borrow)
|
||||
bary_sub_one(zds3, zl-3*n);
|
||||
else if (carry1 || carry3) {
|
||||
BDIGIT c = carry1 + carry3;
|
||||
bary_add(zds3, zl-3*n, zds3, zl-3*n, &c, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
if (SIZEOF_BDIGITS * zl <= 16) {
|
||||
uint128_t z, x, y;
|
||||
ssize_t i;
|
||||
for (x = 0, i = xl-1; 0 <= i; i--) { x <<= SIZEOF_BDIGITS*CHAR_BIT; x |= xds[i]; }
|
||||
for (y = 0, i = yl-1; 0 <= i; i--) { y <<= SIZEOF_BDIGITS*CHAR_BIT; y |= yds[i]; }
|
||||
for (z = 0, i = zl-1; 0 <= i; i--) { z <<= SIZEOF_BDIGITS*CHAR_BIT; z |= zds[i]; }
|
||||
assert(z == x * y);
|
||||
}
|
||||
*/
|
||||
|
||||
if (odd_x && odd_y) {
|
||||
bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl);
|
||||
bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl+1);
|
||||
}
|
||||
else if (odd_x) {
|
||||
bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl);
|
||||
}
|
||||
else if (odd_y) {
|
||||
bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl);
|
||||
}
|
||||
|
||||
if (work)
|
||||
ALLOCV_END(work);
|
||||
}
|
||||
|
||||
static void
|
||||
bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
|
||||
{
|
||||
@ -3975,6 +4175,7 @@ bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl
|
||||
else {
|
||||
l = xl + yl;
|
||||
bary_mul_normal(zds, zl, xds, xl, yds, yl);
|
||||
rb_thread_check_ints();
|
||||
}
|
||||
MEMZERO(zds + l, BDIGIT, zl - l);
|
||||
}
|
||||
@ -3982,45 +4183,92 @@ bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl
|
||||
static void
|
||||
bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
|
||||
{
|
||||
size_t nlsz; /* number of least significant zero BDIGITs */
|
||||
|
||||
assert(xl + yl <= zl);
|
||||
|
||||
if ((xl < yl ? xl : yl) < KARATSUBA_MUL_DIGITS)
|
||||
bary_mul1(zds, zl, xds, xl, yds, yl);
|
||||
else {
|
||||
while (0 < xl && xds[xl-1] == 0)
|
||||
xl--;
|
||||
while (0 < yl && yds[yl-1] == 0)
|
||||
yl--;
|
||||
|
||||
nlsz = 0;
|
||||
while (0 < xl && xds[0] == 0) {
|
||||
xds++;
|
||||
xl--;
|
||||
nlsz++;
|
||||
}
|
||||
while (0 < yl && yds[0] == 0) {
|
||||
yds++;
|
||||
yl--;
|
||||
nlsz++;
|
||||
}
|
||||
if (nlsz) {
|
||||
MEMZERO(zds, BDIGIT, nlsz);
|
||||
zds += nlsz;
|
||||
zl -= nlsz;
|
||||
}
|
||||
|
||||
/* make sure that y is longer than x */
|
||||
if (xl > yl) {
|
||||
BDIGIT *tds;
|
||||
size_t tl;
|
||||
tds = xds; xds = yds; yds = tds;
|
||||
tl = xl; xl = yl; yl = tl;
|
||||
}
|
||||
assert(xl <= yl);
|
||||
|
||||
if (xl == 0) {
|
||||
MEMZERO(zds, BDIGIT, zl);
|
||||
return;
|
||||
}
|
||||
|
||||
/* normal multiplication when x is small */
|
||||
if (xl < KARATSUBA_MUL_DIGITS) {
|
||||
normal:
|
||||
if (xds == yds && xl == yl)
|
||||
bary_sq_fast(zds, zl, xds, xl);
|
||||
else
|
||||
bary_mul1(zds, zl, xds, xl, yds, yl);
|
||||
return;
|
||||
}
|
||||
|
||||
/* normal multiplication when x or y is a sparse bignum */
|
||||
if (bary_sparse_p(xds, xl)) goto normal;
|
||||
if (bary_sparse_p(yds, yl)) {
|
||||
bary_mul1(zds, zl, yds, yl, xds, xl);
|
||||
return;
|
||||
}
|
||||
|
||||
/* balance multiplication by slicing y when x is much smaller than y */
|
||||
if (2 * xl <= yl) {
|
||||
bary_mul_balance(zds, zl, xds, xl, yds, yl);
|
||||
return;
|
||||
}
|
||||
|
||||
if (xl < TOOM3_MUL_DIGITS) {
|
||||
/* multiplication by karatsuba method */
|
||||
bary_mul_karatsuba(zds, zl, xds, xl, yds, yl);
|
||||
return;
|
||||
}
|
||||
|
||||
if (3*xl <= 2*(yl + 2)) {
|
||||
bary_mul_balance(zds, zl, xds, xl, yds, yl);
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
VALUE x, y, z;
|
||||
x = bignew(xl, 1);
|
||||
MEMCPY(BDIGITS(x), xds, BDIGIT, xl);
|
||||
y = bignew(yl, 1);
|
||||
MEMCPY(BDIGITS(y), yds, BDIGIT, yl);
|
||||
z = bigtrunc(bigmul0(x, y));
|
||||
z = bigtrunc(bigmul1_toom3(x, y));
|
||||
MEMCPY(zds, BDIGITS(z), BDIGIT, RBIGNUM_LEN(z));
|
||||
MEMZERO(zds + RBIGNUM_LEN(z), BDIGIT, zl - RBIGNUM_LEN(z));
|
||||
}
|
||||
}
|
||||
|
||||
/* balancing multiplication by slicing larger argument */
|
||||
static VALUE
|
||||
bigmul1_balance(VALUE x, VALUE y)
|
||||
{
|
||||
VALUE z;
|
||||
long xn, yn, zn;
|
||||
BDIGIT *xds, *yds, *zds;
|
||||
|
||||
xn = RBIGNUM_LEN(x);
|
||||
yn = RBIGNUM_LEN(y);
|
||||
assert(2 * xn <= yn || 3 * xn <= 2*(yn+2));
|
||||
|
||||
zn = xn + yn;
|
||||
z = bignew(zn, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
|
||||
|
||||
xds = BDIGITS(x);
|
||||
yds = BDIGITS(y);
|
||||
zds = BDIGITS(z);
|
||||
|
||||
bary_mul_balance(zds, zn, xds, xn, yds, yn);
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
/* split a bignum into high and low bignums */
|
||||
static void
|
||||
@ -4054,102 +4302,6 @@ big_split(VALUE v, long n, volatile VALUE *ph, volatile VALUE *pl)
|
||||
*ph = h;
|
||||
}
|
||||
|
||||
/* multiplication by karatsuba method */
|
||||
static VALUE
|
||||
bigmul1_karatsuba(VALUE x, VALUE y)
|
||||
{
|
||||
long i, n, xn, yn, t1n, t2n;
|
||||
VALUE xh, xl, yh, yl, z, t1, t2, t3;
|
||||
BDIGIT *zds;
|
||||
|
||||
xn = RBIGNUM_LEN(x);
|
||||
yn = RBIGNUM_LEN(y);
|
||||
n = yn / 2;
|
||||
big_split(x, n, &xh, &xl);
|
||||
if (x == y) {
|
||||
yh = xh; yl = xl;
|
||||
}
|
||||
else big_split(y, n, &yh, &yl);
|
||||
|
||||
/* x = xh * b + xl
|
||||
* y = yh * b + yl
|
||||
*
|
||||
* Karatsuba method:
|
||||
* x * y = z2 * b^2 + z1 * b + z0
|
||||
* where
|
||||
* z2 = xh * yh
|
||||
* z0 = xl * yl
|
||||
* z1 = (xh + xl) * (yh + yl) - z2 - z0
|
||||
*
|
||||
* ref: http://en.wikipedia.org/wiki/Karatsuba_algorithm
|
||||
*/
|
||||
|
||||
/* allocate a result bignum */
|
||||
z = bignew(xn + yn, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
|
||||
zds = BDIGITS(z);
|
||||
|
||||
/* t1 <- xh * yh */
|
||||
t1 = bigmul0(xh, yh);
|
||||
t1n = big_real_len(t1);
|
||||
|
||||
/* copy t1 into high bytes of the result (z2) */
|
||||
MEMCPY(zds + 2 * n, BDIGITS(t1), BDIGIT, t1n);
|
||||
for (i = 2 * n + t1n; i < xn + yn; i++) zds[i] = 0;
|
||||
|
||||
if (!BIGZEROP(xl) && !BIGZEROP(yl)) {
|
||||
/* t2 <- xl * yl */
|
||||
t2 = bigmul0(xl, yl);
|
||||
t2n = big_real_len(t2);
|
||||
|
||||
/* copy t2 into low bytes of the result (z0) */
|
||||
MEMCPY(zds, BDIGITS(t2), BDIGIT, t2n);
|
||||
for (i = t2n; i < 2 * n; i++) zds[i] = 0;
|
||||
}
|
||||
else {
|
||||
t2 = Qundef;
|
||||
t2n = 0;
|
||||
|
||||
/* copy 0 into low bytes of the result (z0) */
|
||||
for (i = 0; i < 2 * n; i++) zds[i] = 0;
|
||||
}
|
||||
|
||||
/* xh <- xh + xl */
|
||||
if (RBIGNUM_LEN(xl) > RBIGNUM_LEN(xh)) {
|
||||
t3 = xl; xl = xh; xh = t3;
|
||||
}
|
||||
/* xh has a margin for carry */
|
||||
bigadd_core(BDIGITS(xh), RBIGNUM_LEN(xh),
|
||||
BDIGITS(xl), RBIGNUM_LEN(xl),
|
||||
BDIGITS(xh), RBIGNUM_LEN(xh));
|
||||
|
||||
/* yh <- yh + yl */
|
||||
if (x != y) {
|
||||
if (RBIGNUM_LEN(yl) > RBIGNUM_LEN(yh)) {
|
||||
t3 = yl; yl = yh; yh = t3;
|
||||
}
|
||||
/* yh has a margin for carry */
|
||||
bigadd_core(BDIGITS(yh), RBIGNUM_LEN(yh),
|
||||
BDIGITS(yl), RBIGNUM_LEN(yl),
|
||||
BDIGITS(yh), RBIGNUM_LEN(yh));
|
||||
}
|
||||
else yh = xh;
|
||||
|
||||
/* t3 <- xh * yh */
|
||||
t3 = bigmul0(xh, yh);
|
||||
|
||||
i = xn + yn - n;
|
||||
/* subtract t1 from t3 */
|
||||
bigsub_core(BDIGITS(t3), big_real_len(t3), BDIGITS(t1), t1n, BDIGITS(t3), big_real_len(t3));
|
||||
|
||||
/* subtract t2 from t3; t3 is now the middle term of the product */
|
||||
if (t2 != Qundef) bigsub_core(BDIGITS(t3), big_real_len(t3), BDIGITS(t2), t2n, BDIGITS(t3), big_real_len(t3));
|
||||
|
||||
/* add t3 to middle bytes of the result (z1) */
|
||||
bigadd_core(zds + n, i, BDIGITS(t3), big_real_len(t3), zds + n, i);
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
static void
|
||||
biglsh_bang(BDIGIT *xds, long xn, unsigned long shift)
|
||||
{
|
||||
@ -4421,70 +4573,41 @@ bary_sq_fast(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn)
|
||||
}
|
||||
}
|
||||
|
||||
static VALUE
|
||||
bigsqr_fast(VALUE x)
|
||||
{
|
||||
long xn = RBIGNUM_LEN(x);
|
||||
VALUE z = bignew(2 * xn, 1);
|
||||
BDIGIT *xds = BDIGITS(x), *zds = BDIGITS(z);
|
||||
|
||||
bary_sq_fast(zds, RBIGNUM_LEN(z), xds, xn);
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
/* determine whether a bignum is sparse or not by random sampling */
|
||||
static inline VALUE
|
||||
big_sparse_p(VALUE x)
|
||||
static inline int
|
||||
bary_sparse_p(BDIGIT *ds, size_t n)
|
||||
{
|
||||
long c = 0, n = RBIGNUM_LEN(x);
|
||||
long c = 0;
|
||||
|
||||
if ( BDIGITS(x)[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
|
||||
if (c <= 1 && BDIGITS(x)[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
|
||||
if (c <= 1 && BDIGITS(x)[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
|
||||
if ( ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
|
||||
if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
|
||||
if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
|
||||
|
||||
return (c <= 1) ? Qtrue : Qfalse;
|
||||
return (c <= 1) ? 1 : 0;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
bigmul0(VALUE x, VALUE y)
|
||||
{
|
||||
long xn, yn;
|
||||
long xn, yn, zn;
|
||||
VALUE z;
|
||||
BDIGIT *xds, *yds, *zds;
|
||||
|
||||
xn = RBIGNUM_LEN(x);
|
||||
yn = RBIGNUM_LEN(y);
|
||||
zn = xn + yn;
|
||||
|
||||
/* make sure that y is longer than x */
|
||||
if (xn > yn) {
|
||||
VALUE t;
|
||||
long tn;
|
||||
t = x; x = y; y = t;
|
||||
tn = xn; xn = yn; yn = tn;
|
||||
}
|
||||
assert(xn <= yn);
|
||||
z = bignew(zn, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
|
||||
|
||||
/* normal multiplication when x is small */
|
||||
if (xn < KARATSUBA_MUL_DIGITS) {
|
||||
normal:
|
||||
if (x == y) return bigsqr_fast(x);
|
||||
if (xn == 1 && yn == 1) return bigmul1_single(x, y);
|
||||
return bigmul1_normal(x, y);
|
||||
}
|
||||
xds = BDIGITS(x);
|
||||
yds = BDIGITS(y);
|
||||
zds = BDIGITS(z);
|
||||
|
||||
/* normal multiplication when x or y is a sparse bignum */
|
||||
if (big_sparse_p(x)) goto normal;
|
||||
if (big_sparse_p(y)) return bigmul1_normal(y, x);
|
||||
bary_mul(zds, zn, xds, xn, yds, yn);
|
||||
|
||||
/* balance multiplication by slicing y when x is much smaller than y */
|
||||
if (2 * xn <= yn) return bigmul1_balance(x, y);
|
||||
|
||||
if (xn < TOOM3_MUL_DIGITS) {
|
||||
/* multiplication by karatsuba method */
|
||||
return bigmul1_karatsuba(x, y);
|
||||
}
|
||||
else if (3*xn <= 2*(yn + 2))
|
||||
return bigmul1_balance(x, y);
|
||||
return bigmul1_toom3(x, y);
|
||||
RB_GC_GUARD(x);
|
||||
RB_GC_GUARD(y);
|
||||
return z;
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user