* bignum.c: (bigsub_core): Use bary_sub.

(bary_sub): Returns a borrow flag.  Use bary_subb.
  (bary_subb): New function for actually calculating subtraction with
  borrow.
  (bary_sub_one): New function.
  (bigadd_core): Use bary_add.
  (bary_add): Returns a carry flag.  Use bary_addc.
  (bary_addc): New function for actually calculating addition with
  carry.
  (bary_add_one): New function.
  (bary_muladd_1xN): Extracted from bary_mul_normal.
  (bigmul1_normal): Removed.
  (bary_mul_karatsuba): New function.
  (bary_mul1): Invoke rb_thread_check_ints after bary_mul_normal.
  (bary_mul): Remove most and least significant zeros before actual
  multiplication.  Use bary_sq_fast, bary_mul_balance,
  bary_mul_karatsuba and bigmul1_toom3 as bigmul0.
  (bigmul1_balance): Removed.
  (bigmul1_karatsuba): Removed.
  (bigsqr_fast): Removed.
  (bary_sparse_p): Extracted from big_sparse_p.
  (big_sparse_p): Removed.
  (bigmul0): Use bary_mul.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@41821 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2013-07-07 06:03:52 +00:00
parent 8f50a21064
commit 8dbf566094
2 changed files with 395 additions and 246 deletions

View File

@ -1,3 +1,29 @@
Sun Jul 7 14:41:57 2013 Tanaka Akira <akr@fsij.org>
* bignum.c: (bigsub_core): Use bary_sub.
(bary_sub): Returns a borrow flag. Use bary_subb.
(bary_subb): New function for actually calculating subtraction with
borrow.
(bary_sub_one): New function.
(bigadd_core): Use bary_add.
(bary_add): Returns a carry flag. Use bary_addc.
(bary_addc): New function for actually calculating addition with
carry.
(bary_add_one): New function.
(bary_muladd_1xN): Extracted from bary_mul_normal.
(bigmul1_normal): Removed.
(bary_mul_karatsuba): New function.
(bary_mul1): Invoke rb_thread_check_ints after bary_mul_normal.
(bary_mul): Remove most and least significant zeros before actual
multiplication. Use bary_sq_fast, bary_mul_balance,
bary_mul_karatsuba and bigmul1_toom3 as bigmul0.
(bigmul1_balance): Removed.
(bigmul1_karatsuba): Removed.
(bigsqr_fast): Removed.
(bary_sparse_p): Extracted from big_sparse_p.
(big_sparse_p): Removed.
(bigmul0): Use bary_mul.
Sun Jul 7 11:54:33 2013 Kouhei Sutou <kou@cozmixng.org>
* NEWS: Add REXML::Text#<< related updates.

615
bignum.c
View File

@ -101,14 +101,18 @@ static void bary_small_rshift(BDIGIT *zds, BDIGIT *xds, long n, int shift, int s
static void bary_unpack(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags);
static void bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl);
static void bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl);
static void bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
static int bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
static int bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow);
static void bary_divmod(BDIGIT *qds, size_t nq, BDIGIT *rds, size_t nr, BDIGIT *xds, size_t nx, BDIGIT *yds, size_t ny);
static void bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
static int bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
static int bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry);
static int bary_pack(int sign, BDIGIT *ds, size_t num_bdigits, void *words, size_t numwords, size_t wordsize, size_t nails, int flags);
static int bary_2comp(BDIGIT *ds, size_t n);
static VALUE bigsqr_fast(VALUE x);
static void bary_sq_fast(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn);
static inline int bary_sparse_p(BDIGIT *ds, size_t n);
static VALUE bigmul0(VALUE x, VALUE y);
static VALUE bigmul1_toom3(VALUE x, VALUE y);
#define BIGNUM_DEBUG 0
#if BIGNUM_DEBUG
@ -3459,38 +3463,58 @@ rb_big_neg(VALUE x)
static void
bigsub_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
{
BDIGIT_DBL_SIGNED num;
long i;
bary_sub(zds, zn, xds, xn, yds, yn);
}
for (i = 0, num = 0; i < yn; i++) {
static int
bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow)
{
BDIGIT_DBL_SIGNED num;
size_t i;
assert(yn <= xn);
assert(xn <= zn);
num = borrow ? -1 : 0;
for (i = 0; i < yn; i++) {
num += (BDIGIT_DBL_SIGNED)xds[i] - yds[i];
zds[i] = BIGLO(num);
num = BIGDN(num);
}
while (num && i < xn) {
for (; i < xn; i++) {
if (num == 0) goto num_is_zero;
num += xds[i];
zds[i++] = BIGLO(num);
zds[i] = BIGLO(num);
num = BIGDN(num);
}
if (num == 0) goto num_is_zero;
for (; i < zn; i++) {
zds[i] = BDIGMAX;
}
return 1;
num_is_zero:
if (xds == zds && xn == zn)
return;
while (i < xn) {
return 0;
for (; i < xn; i++) {
zds[i] = xds[i];
i++;
}
assert(i <= zn);
while (i < zn) {
zds[i++] = 0;
for (; i < zn; i++) {
zds[i] = 0;
}
return 0;
}
static void
static int
bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn)
{
assert(yn <= xn);
assert(xn <= zn);
return bary_subb(zds, zn, xds, xn, yds, yn, 0);
}
bigsub_core(xds, xn, yds, yn, zds, zn);
static int
bary_sub_one(BDIGIT *zds, size_t zn)
{
return bary_subb(zds, zn, zds, zn, NULL, 0, 1);
}
static VALUE
@ -3712,8 +3736,17 @@ bigadd_int(VALUE x, long y)
static void
bigadd_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
{
BDIGIT_DBL num = 0;
long i;
bary_add(zds, zn, xds, xn, yds, yn);
}
static int
bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry)
{
BDIGIT_DBL num;
size_t i;
assert(xn <= zn);
assert(yn <= zn);
if (xn > yn) {
BDIGIT *tds;
@ -3721,32 +3754,47 @@ bigadd_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
i = xn; xn = yn; yn = i;
}
i = 0;
while (i < xn) {
num = carry ? 1 : 0;
for (i = 0; i < xn; i++) {
num += (BDIGIT_DBL)xds[i] + yds[i];
zds[i++] = BIGLO(num);
zds[i] = BIGLO(num);
num = BIGDN(num);
}
while (num && i < yn) {
for (; i < yn; i++) {
if (num == 0) goto num_is_zero;
num += yds[i];
zds[i++] = BIGLO(num);
zds[i] = BIGLO(num);
num = BIGDN(num);
}
while (i < yn) {
for (; i < zn; i++) {
if (num == 0) goto num_is_zero;
zds[i] = BIGLO(num);
num = BIGDN(num);
}
return num != 0;
num_is_zero:
if (yds == zds && yn == zn)
return 0;
for (; i < yn; i++) {
zds[i] = yds[i];
i++;
}
if (num) zds[i++] = (BDIGIT)num;
assert(i <= zn);
while (i < zn) {
zds[i++] = 0;
for (; i < zn; i++) {
zds[i] = 0;
}
return 0;
}
static void
static int
bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn)
{
bigadd_core(xds, xn, yds, yn, zds, zn);
return bary_addc(zds, zn, xds, xn, yds, yn, 0);
}
static int
bary_add_one(BDIGIT *zds, size_t zn)
{
return bary_addc(zds, zn, NULL, 0, zds, zn, 1);
}
static VALUE
@ -3871,65 +3919,55 @@ bary_mul_single(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT y)
zds[1] = (BDIGIT)BIGDN(n);
}
static VALUE
bigmul1_single(VALUE x, VALUE y)
static int
bary_muladd_1xN(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT *yds, size_t yl)
{
VALUE z = bignew(2, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
BDIGIT *xds, *yds, *zds;
BDIGIT_DBL n;
BDIGIT_DBL dd;
size_t j;
xds = BDIGITS(x);
yds = BDIGITS(y);
zds = BDIGITS(z);
assert(zl > yl);
bary_mul_single(zds, 2, xds[0], yds[0]);
if (x == 0)
return 0;
dd = x;
n = 0;
for (j = 0; j < yl; j++) {
BDIGIT_DBL ee = n + dd * yds[j];
if (ee) {
n = zds[j] + ee;
zds[j] = BIGLO(n);
n = BIGDN(n);
}
else {
n = 0;
}
return z;
}
for (; j < zl; j++) {
if (n == 0)
break;
n += zds[j];
zds[j] = BIGLO(n);
n = BIGDN(n);
}
return n != 0;
}
static void
bary_mul_normal(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
{
size_t i;
size_t j = zl;
BDIGIT_DBL n = 0;
assert(xl + yl <= zl);
while (j--) zds[j] = 0;
MEMZERO(zds, BDIGIT, zl);
for (i = 0; i < xl; i++) {
BDIGIT_DBL dd;
dd = xds[i];
if (dd == 0) continue;
n = 0;
for (j = 0; j < yl; j++) {
BDIGIT_DBL ee = n + dd * yds[j];
n = zds[i + j] + ee;
if (ee) zds[i + j] = BIGLO(n);
n = BIGDN(n);
}
if (n) {
zds[i + j] = (BDIGIT)n;
}
bary_muladd_1xN(zds+i, zl-i, xds[i], yds, yl);
}
}
static VALUE
bigmul1_normal(VALUE x, VALUE y)
{
size_t xl = RBIGNUM_LEN(x), yl = RBIGNUM_LEN(y), zl = xl + yl;
VALUE z = bignew(zl, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
BDIGIT *xds, *yds, *zds;
xds = BDIGITS(x);
yds = BDIGITS(y);
zds = BDIGITS(z);
bary_mul_normal(zds, zl, xds, xl, yds, yl);
rb_thread_check_ints();
return z;
}
/* balancing multiplication by slicing larger argument */
static void
bary_mul_balance(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
{
@ -3961,6 +3999,168 @@ bary_mul_balance(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, si
ALLOCV_END(work);
}
/* multiplication by karatsuba method */
static void
bary_mul_karatsuba(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
{
VALUE work = 0;
BDIGIT *wds;
size_t wl;
size_t n;
int sub_p, borrow, carry1, carry2, carry3;
int odd_x = 0;
int odd_y = 0;
BDIGIT *xds0, *xds1, *yds0, *yds1, *zds0, *zds1, *zds2, *zds3;
assert(xl + yl <= zl);
assert(xl <= yl);
assert(yl < 2 * xl);
if (yl & 1) {
odd_y = 1;
yl--;
if (yl < xl) {
odd_x = 1;
xl--;
}
}
n = yl / 2;
assert(n < xl);
wl = n;
wds = ALLOCV_N(BDIGIT, work, wl);
/* Karatsuba algorithm:
*
* x = x0 + r*x1
* y = y0 + r*y1
* z = x*y
* = (x0 + r*x1) * (y0 + r*y1)
* = x0*y0 + r*(x1*y0 + x0*y1) + r*r*x1*y1
* = x0*y0 + r*(x0*y0 + x1*y1 - (x1-x0)*(y1-y0)) + r*r*x1*y1
* = x0*y0 + r*(x0*y0 + x1*y1 - (x0-x1)*(y0-y1)) + r*r*x1*y1
*/
xds0 = xds;
xds1 = xds + n;
yds0 = yds;
yds1 = yds + n;
zds0 = zds;
zds1 = zds + n;
zds2 = zds + 2*n;
zds3 = zds + 3*n;
sub_p = 1;
/* zds0:? zds1:? zds2:? zds3:? wds:? */
if (bary_sub(zds0, n, xds, n, xds+n, xl-n)) {
bary_2comp(zds0, n);
sub_p = !sub_p;
}
/* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:? */
if (bary_sub(wds, n, yds, n, yds+n, n)) {
bary_2comp(wds, n);
sub_p = !sub_p;
}
/* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:|y1-y0| */
bary_mul(zds1, 2*n, zds0, n, wds, n);
/* zds0:|x1-x0| zds1,zds2:|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
borrow = 0;
if (sub_p) {
borrow = !bary_2comp(zds1, 2*n);
}
/* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
MEMCPY(wds, zds1, BDIGIT, n);
/* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
bary_mul(zds0, 2*n, xds0, n, yds0, n);
/* zds0,zds1:x0*y0 zds2:hi(-?|x1-x0|*|y1-y0|) zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
carry1 = bary_add(wds, n, wds, n, zds0, n);
carry1 = bary_addc(zds2, n, zds2, n, zds1, n, carry1);
/* zds0,zds1:x0*y0 zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
carry2 = bary_add(zds1, n, zds1, n, wds, n);
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
MEMCPY(wds, zds2, BDIGIT, n);
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:_ zds3:? wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
bary_mul(zds2, zl-2*n, xds1, xl-n, yds1, n);
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
carry3 = bary_add(zds1, n, zds1, n, zds2, n);
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
carry3 = bary_addc(zds2, n, zds2, n, zds3, (4*n < zl ? n : zl-3*n), carry3);
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1) wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
bary_add(zds2, zl-2*n, zds2, zl-2*n, wds, n);
/* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1)+hi(x0*y0-?|x1-x0|*|y1-y0|) wds:_ */
if (carry2)
bary_add_one(zds2, zl-2*n);
if (borrow && carry1)
borrow = carry1 = 0;
if (borrow && carry3)
borrow = carry3 = 0;
if (borrow)
bary_sub_one(zds3, zl-3*n);
else if (carry1 || carry3) {
BDIGIT c = carry1 + carry3;
bary_add(zds3, zl-3*n, zds3, zl-3*n, &c, 1);
}
/*
if (SIZEOF_BDIGITS * zl <= 16) {
uint128_t z, x, y;
ssize_t i;
for (x = 0, i = xl-1; 0 <= i; i--) { x <<= SIZEOF_BDIGITS*CHAR_BIT; x |= xds[i]; }
for (y = 0, i = yl-1; 0 <= i; i--) { y <<= SIZEOF_BDIGITS*CHAR_BIT; y |= yds[i]; }
for (z = 0, i = zl-1; 0 <= i; i--) { z <<= SIZEOF_BDIGITS*CHAR_BIT; z |= zds[i]; }
assert(z == x * y);
}
*/
if (odd_x && odd_y) {
bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl);
bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl+1);
}
else if (odd_x) {
bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl);
}
else if (odd_y) {
bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl);
}
if (work)
ALLOCV_END(work);
}
static void
bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
{
@ -3975,6 +4175,7 @@ bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl
else {
l = xl + yl;
bary_mul_normal(zds, zl, xds, xl, yds, yl);
rb_thread_check_ints();
}
MEMZERO(zds + l, BDIGIT, zl - l);
}
@ -3982,45 +4183,92 @@ bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl
static void
bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
{
size_t nlsz; /* number of least significant zero BDIGITs */
assert(xl + yl <= zl);
if ((xl < yl ? xl : yl) < KARATSUBA_MUL_DIGITS)
bary_mul1(zds, zl, xds, xl, yds, yl);
else {
while (0 < xl && xds[xl-1] == 0)
xl--;
while (0 < yl && yds[yl-1] == 0)
yl--;
nlsz = 0;
while (0 < xl && xds[0] == 0) {
xds++;
xl--;
nlsz++;
}
while (0 < yl && yds[0] == 0) {
yds++;
yl--;
nlsz++;
}
if (nlsz) {
MEMZERO(zds, BDIGIT, nlsz);
zds += nlsz;
zl -= nlsz;
}
/* make sure that y is longer than x */
if (xl > yl) {
BDIGIT *tds;
size_t tl;
tds = xds; xds = yds; yds = tds;
tl = xl; xl = yl; yl = tl;
}
assert(xl <= yl);
if (xl == 0) {
MEMZERO(zds, BDIGIT, zl);
return;
}
/* normal multiplication when x is small */
if (xl < KARATSUBA_MUL_DIGITS) {
normal:
if (xds == yds && xl == yl)
bary_sq_fast(zds, zl, xds, xl);
else
bary_mul1(zds, zl, xds, xl, yds, yl);
return;
}
/* normal multiplication when x or y is a sparse bignum */
if (bary_sparse_p(xds, xl)) goto normal;
if (bary_sparse_p(yds, yl)) {
bary_mul1(zds, zl, yds, yl, xds, xl);
return;
}
/* balance multiplication by slicing y when x is much smaller than y */
if (2 * xl <= yl) {
bary_mul_balance(zds, zl, xds, xl, yds, yl);
return;
}
if (xl < TOOM3_MUL_DIGITS) {
/* multiplication by karatsuba method */
bary_mul_karatsuba(zds, zl, xds, xl, yds, yl);
return;
}
if (3*xl <= 2*(yl + 2)) {
bary_mul_balance(zds, zl, xds, xl, yds, yl);
return;
}
{
VALUE x, y, z;
x = bignew(xl, 1);
MEMCPY(BDIGITS(x), xds, BDIGIT, xl);
y = bignew(yl, 1);
MEMCPY(BDIGITS(y), yds, BDIGIT, yl);
z = bigtrunc(bigmul0(x, y));
z = bigtrunc(bigmul1_toom3(x, y));
MEMCPY(zds, BDIGITS(z), BDIGIT, RBIGNUM_LEN(z));
MEMZERO(zds + RBIGNUM_LEN(z), BDIGIT, zl - RBIGNUM_LEN(z));
}
}
/* balancing multiplication by slicing larger argument */
static VALUE
bigmul1_balance(VALUE x, VALUE y)
{
VALUE z;
long xn, yn, zn;
BDIGIT *xds, *yds, *zds;
xn = RBIGNUM_LEN(x);
yn = RBIGNUM_LEN(y);
assert(2 * xn <= yn || 3 * xn <= 2*(yn+2));
zn = xn + yn;
z = bignew(zn, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
xds = BDIGITS(x);
yds = BDIGITS(y);
zds = BDIGITS(z);
bary_mul_balance(zds, zn, xds, xn, yds, yn);
return z;
}
/* split a bignum into high and low bignums */
static void
@ -4054,102 +4302,6 @@ big_split(VALUE v, long n, volatile VALUE *ph, volatile VALUE *pl)
*ph = h;
}
/* multiplication by karatsuba method */
static VALUE
bigmul1_karatsuba(VALUE x, VALUE y)
{
long i, n, xn, yn, t1n, t2n;
VALUE xh, xl, yh, yl, z, t1, t2, t3;
BDIGIT *zds;
xn = RBIGNUM_LEN(x);
yn = RBIGNUM_LEN(y);
n = yn / 2;
big_split(x, n, &xh, &xl);
if (x == y) {
yh = xh; yl = xl;
}
else big_split(y, n, &yh, &yl);
/* x = xh * b + xl
* y = yh * b + yl
*
* Karatsuba method:
* x * y = z2 * b^2 + z1 * b + z0
* where
* z2 = xh * yh
* z0 = xl * yl
* z1 = (xh + xl) * (yh + yl) - z2 - z0
*
* ref: http://en.wikipedia.org/wiki/Karatsuba_algorithm
*/
/* allocate a result bignum */
z = bignew(xn + yn, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
zds = BDIGITS(z);
/* t1 <- xh * yh */
t1 = bigmul0(xh, yh);
t1n = big_real_len(t1);
/* copy t1 into high bytes of the result (z2) */
MEMCPY(zds + 2 * n, BDIGITS(t1), BDIGIT, t1n);
for (i = 2 * n + t1n; i < xn + yn; i++) zds[i] = 0;
if (!BIGZEROP(xl) && !BIGZEROP(yl)) {
/* t2 <- xl * yl */
t2 = bigmul0(xl, yl);
t2n = big_real_len(t2);
/* copy t2 into low bytes of the result (z0) */
MEMCPY(zds, BDIGITS(t2), BDIGIT, t2n);
for (i = t2n; i < 2 * n; i++) zds[i] = 0;
}
else {
t2 = Qundef;
t2n = 0;
/* copy 0 into low bytes of the result (z0) */
for (i = 0; i < 2 * n; i++) zds[i] = 0;
}
/* xh <- xh + xl */
if (RBIGNUM_LEN(xl) > RBIGNUM_LEN(xh)) {
t3 = xl; xl = xh; xh = t3;
}
/* xh has a margin for carry */
bigadd_core(BDIGITS(xh), RBIGNUM_LEN(xh),
BDIGITS(xl), RBIGNUM_LEN(xl),
BDIGITS(xh), RBIGNUM_LEN(xh));
/* yh <- yh + yl */
if (x != y) {
if (RBIGNUM_LEN(yl) > RBIGNUM_LEN(yh)) {
t3 = yl; yl = yh; yh = t3;
}
/* yh has a margin for carry */
bigadd_core(BDIGITS(yh), RBIGNUM_LEN(yh),
BDIGITS(yl), RBIGNUM_LEN(yl),
BDIGITS(yh), RBIGNUM_LEN(yh));
}
else yh = xh;
/* t3 <- xh * yh */
t3 = bigmul0(xh, yh);
i = xn + yn - n;
/* subtract t1 from t3 */
bigsub_core(BDIGITS(t3), big_real_len(t3), BDIGITS(t1), t1n, BDIGITS(t3), big_real_len(t3));
/* subtract t2 from t3; t3 is now the middle term of the product */
if (t2 != Qundef) bigsub_core(BDIGITS(t3), big_real_len(t3), BDIGITS(t2), t2n, BDIGITS(t3), big_real_len(t3));
/* add t3 to middle bytes of the result (z1) */
bigadd_core(zds + n, i, BDIGITS(t3), big_real_len(t3), zds + n, i);
return z;
}
static void
biglsh_bang(BDIGIT *xds, long xn, unsigned long shift)
{
@ -4421,70 +4573,41 @@ bary_sq_fast(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn)
}
}
static VALUE
bigsqr_fast(VALUE x)
{
long xn = RBIGNUM_LEN(x);
VALUE z = bignew(2 * xn, 1);
BDIGIT *xds = BDIGITS(x), *zds = BDIGITS(z);
bary_sq_fast(zds, RBIGNUM_LEN(z), xds, xn);
return z;
}
/* determine whether a bignum is sparse or not by random sampling */
static inline VALUE
big_sparse_p(VALUE x)
static inline int
bary_sparse_p(BDIGIT *ds, size_t n)
{
long c = 0, n = RBIGNUM_LEN(x);
long c = 0;
if ( BDIGITS(x)[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
if (c <= 1 && BDIGITS(x)[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
if (c <= 1 && BDIGITS(x)[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
if ( ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++;
return (c <= 1) ? Qtrue : Qfalse;
return (c <= 1) ? 1 : 0;
}
static VALUE
bigmul0(VALUE x, VALUE y)
{
long xn, yn;
long xn, yn, zn;
VALUE z;
BDIGIT *xds, *yds, *zds;
xn = RBIGNUM_LEN(x);
yn = RBIGNUM_LEN(y);
zn = xn + yn;
/* make sure that y is longer than x */
if (xn > yn) {
VALUE t;
long tn;
t = x; x = y; y = t;
tn = xn; xn = yn; yn = tn;
}
assert(xn <= yn);
z = bignew(zn, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
/* normal multiplication when x is small */
if (xn < KARATSUBA_MUL_DIGITS) {
normal:
if (x == y) return bigsqr_fast(x);
if (xn == 1 && yn == 1) return bigmul1_single(x, y);
return bigmul1_normal(x, y);
}
xds = BDIGITS(x);
yds = BDIGITS(y);
zds = BDIGITS(z);
/* normal multiplication when x or y is a sparse bignum */
if (big_sparse_p(x)) goto normal;
if (big_sparse_p(y)) return bigmul1_normal(y, x);
bary_mul(zds, zn, xds, xn, yds, yn);
/* balance multiplication by slicing y when x is much smaller than y */
if (2 * xn <= yn) return bigmul1_balance(x, y);
if (xn < TOOM3_MUL_DIGITS) {
/* multiplication by karatsuba method */
return bigmul1_karatsuba(x, y);
}
else if (3*xn <= 2*(yn + 2))
return bigmul1_balance(x, y);
return bigmul1_toom3(x, y);
RB_GC_GUARD(x);
RB_GC_GUARD(y);
return z;
}
/*