Retro68/gcc/libgcc/config/libbid/bid64_div.c

/* Copyright (C) 2007-2016 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.

You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
<http://www.gnu.org/licenses/>.  */

/*****************************************************************************
 *    BID64 divide
 *****************************************************************************
 *
 *  Algorithm description:
 *
 *  if(coefficient_x<coefficient_y)
 *    p = number_digits(coefficient_y) - number_digits(coefficient_x)
 *    A = coefficient_x*10^p
 *    B = coefficient_y
 *    CA= A*10^(15+j), j=0 for A>=B, 1 otherwise
 *    Q = 0
 *  else
 *    get Q=(int)(coefficient_x/coefficient_y)
 *        (based on double precision divide)
 *    check for exact divide case
 *    Let R = coefficient_x - Q*coefficient_y
 *    Let m=16-number_digits(Q)
 *    CA=R*10^m, Q=Q*10^m
 *    B = coefficient_y
 *  endif
 *    if (CA<2^64)
 *      Q += CA/B  (64-bit unsigned divide)
 *    else
 *      get final Q using double precision divide, followed by 3 integer
 *          iterations
 *    if exact result, eliminate trailing zeros
 *    check for underflow
 *    round coefficient to nearest
 *
 ****************************************************************************/

#include "bid_internal.h"
#include "bid_div_macros.h"
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
#include <fenv.h>

#define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
#endif

extern UINT32 convert_table[5][128][2];
extern SINT8 factors[][2];
extern UINT8 packed_10000_zeros[];


#if DECIMAL_CALL_BY_REFERENCE

void
bid64_div (UINT64 * pres, UINT64 * px,
	   UINT64 *
	   py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
	   _EXC_INFO_PARAM) {
  UINT64 x, y;
#else

UINT64
bid64_div (UINT64 x,
	   UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
	   _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
#endif
  UINT128 CA, CT;
  UINT64 sign_x, sign_y, coefficient_x, coefficient_y, A, B, QX, PD;
  UINT64 A2, Q, Q2, B2, B4, B5, R, T, DU, res;
  UINT64 valid_x, valid_y;
  SINT64 D;
  int_double t_scale, tempq, temp_b;
  int_float tempx, tempy;
  double da, db, dq, da_h, da_l;
  int exponent_x, exponent_y, bin_expon_cx;
  int diff_expon, ed1, ed2, bin_index;
  int rmode, amount;
  int nzeros, i, j, k, d5;
  UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
  fexcept_t binaryflags = 0;
#endif

#if DECIMAL_CALL_BY_REFERENCE
#if !DECIMAL_GLOBAL_ROUNDING
  _IDEC_round rnd_mode = *prnd_mode;
#endif
  x = *px;
  y = *py;
#endif

  valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
  valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);

  // unpack arguments, check for NaN or Infinity
  if (!valid_x) {
    // x is Inf. or NaN
#ifdef SET_STATUS_FLAGS
    if ((y & SNAN_MASK64) == SNAN_MASK64)	// y is sNaN
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif

    // test if x is NaN
    if ((x & NAN_MASK64) == NAN_MASK64) {
#ifdef SET_STATUS_FLAGS
      if ((x & SNAN_MASK64) == SNAN_MASK64)	// sNaN
	__set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      BID_RETURN (coefficient_x & QUIET_MASK64);
    }
    // x is Infinity?
    if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
      // check if y is Inf or NaN
      if ((y & INFINITY_MASK64) == INFINITY_MASK64) {
	// y==Inf, return NaN
	if ((y & NAN_MASK64) == INFINITY_MASK64) {	// Inf/Inf
#ifdef SET_STATUS_FLAGS
	  __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
	  BID_RETURN (NAN_MASK64);
	}
      } else {
	// otherwise return +/-Inf
	BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
		    INFINITY_MASK64);
      }
    }
    // x==0
    if (((y & INFINITY_MASK64) != INFINITY_MASK64)
	&& !(coefficient_y)) {
      // y==0 , return NaN
#ifdef SET_STATUS_FLAGS
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      BID_RETURN (NAN_MASK64);
    }
    if (((y & INFINITY_MASK64) != INFINITY_MASK64)) {
      if ((y & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64)
	exponent_y = ((UINT32) (y >> 51)) & 0x3ff;
      else
	exponent_y = ((UINT32) (y >> 53)) & 0x3ff;
      sign_y = y & 0x8000000000000000ull;

      exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
      if (exponent_x > DECIMAL_MAX_EXPON_64)
	exponent_x = DECIMAL_MAX_EXPON_64;
      else if (exponent_x < 0)
	exponent_x = 0;
      BID_RETURN ((sign_x ^ sign_y) | (((UINT64) exponent_x) << 53));
    }

  }
  if (!valid_y) {
    // y is Inf. or NaN

    // test if y is NaN
    if ((y & NAN_MASK64) == NAN_MASK64) {
#ifdef SET_STATUS_FLAGS
      if ((y & SNAN_MASK64) == SNAN_MASK64)	// sNaN
	__set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      BID_RETURN (coefficient_y & QUIET_MASK64);
    }
    // y is Infinity?
    if ((y & INFINITY_MASK64) == INFINITY_MASK64) {
      // return +/-0
      BID_RETURN (((x ^ y) & 0x8000000000000000ull));
    }
    // y is 0
#ifdef SET_STATUS_FLAGS
    __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
#endif
    BID_RETURN ((sign_x ^ sign_y) | INFINITY_MASK64);
  }
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
  (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
  diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;

  if (coefficient_x < coefficient_y) {
    // get number of decimal digits for c_x, c_y

    //--- get number of bits in the coefficients of x and y ---
    tempx.d = (float) coefficient_x;
    tempy.d = (float) coefficient_y;
    bin_index = (tempy.i - tempx.i) >> 23;

    A = coefficient_x * power10_index_binexp[bin_index];
    B = coefficient_y;

    temp_b.d = (double) B;

    // compare A, B
    DU = (A - B) >> 63;
    ed1 = 15 + (int) DU;
    ed2 = estimate_decimal_digits[bin_index] + ed1;
    T = power10_table_128[ed1].w[0];
    __mul_64x64_to_128 (CA, A, T);

    Q = 0;
    diff_expon = diff_expon - ed2;

    // adjust double precision db, to ensure that later A/B - (int)(da/db) > -1
    if (coefficient_y < 0x0020000000000000ull) {
      temp_b.i += 1;
      db = temp_b.d;
    } else
      db = (double) (B + 2 + (B & 1));

  } else {
    // get c_x/c_y

    //  set last bit before conversion to DP
    A2 = coefficient_x | 1;
    da = (double) A2;

    db = (double) coefficient_y;

    tempq.d = da / db;
    Q = (UINT64) tempq.d;

    R = coefficient_x - coefficient_y * Q;

    // will use to get number of dec. digits of Q
    bin_expon_cx = (tempq.i >> 52) - 0x3ff;

    // R<0 ?
    D = ((SINT64) R) >> 63;
    Q += D;
    R += (coefficient_y & D);

    // exact result ?
    if (((SINT64) R) <= 0) {
      // can have R==-1 for coeff_y==1
      res =
	get_BID64 (sign_x ^ sign_y, diff_expon, (Q + R), rnd_mode,
		   pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
      (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
      BID_RETURN (res);
    }
    // get decimal digits of Q
    DU = power10_index_binexp[bin_expon_cx] - Q - 1;
    DU >>= 63;

    ed2 = 16 - estimate_decimal_digits[bin_expon_cx] - (int) DU;

    T = power10_table_128[ed2].w[0];
    __mul_64x64_to_128 (CA, R, T);
    B = coefficient_y;

    Q *= power10_table_128[ed2].w[0];
    diff_expon -= ed2;

  }

  if (!CA.w[1]) {
    Q2 = CA.w[0] / B;
    B2 = B + B;
    B4 = B2 + B2;
    R = CA.w[0] - Q2 * B;
    Q += Q2;
  } else {

    // 2^64
    t_scale.i = 0x43f0000000000000ull;
    // convert CA to DP
    da_h = CA.w[1];
    da_l = CA.w[0];
    da = da_h * t_scale.d + da_l;

    // quotient
    dq = da / db;
    Q2 = (UINT64) dq;

    // get w[0] remainder
    R = CA.w[0] - Q2 * B;

    // R<0 ?
    D = ((SINT64) R) >> 63;
    Q2 += D;
    R += (B & D);

    // now R<6*B

    // quick divide

    // 4*B
    B2 = B + B;
    B4 = B2 + B2;

    R = R - B4;
    // R<0 ?
    D = ((SINT64) R) >> 63;
    // restore R if negative
    R += (B4 & D);
    Q2 += ((~D) & 4);

    R = R - B2;
    // R<0 ?
    D = ((SINT64) R) >> 63;
    // restore R if negative
    R += (B2 & D);
    Q2 += ((~D) & 2);

    R = R - B;
    // R<0 ?
    D = ((SINT64) R) >> 63;
    // restore R if negative
    R += (B & D);
    Q2 += ((~D) & 1);

    Q += Q2;
  }

#ifdef SET_STATUS_FLAGS
  if (R) {
    // set status flags
    __set_status_flags (pfpsf, INEXACT_EXCEPTION);
  }
#ifndef LEAVE_TRAILING_ZEROS
  else
#endif
#else
#ifndef LEAVE_TRAILING_ZEROS
  if (!R)
#endif
#endif
#ifndef LEAVE_TRAILING_ZEROS
  {
    // eliminate trailing zeros

    // check whether CX, CY are short
    if ((coefficient_x <= 1024) && (coefficient_y <= 1024)) {
      i = (int) coefficient_y - 1;
      j = (int) coefficient_x - 1;
      // difference in powers of 2 factors for Y and X
      nzeros = ed2 - factors[i][0] + factors[j][0];
      // difference in powers of 5 factors
      d5 = ed2 - factors[i][1] + factors[j][1];
      if (d5 < nzeros)
	nzeros = d5;

      __mul_64x64_to_128 (CT, Q, reciprocals10_64[nzeros]);

      // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
      amount = short_recip_scale[nzeros];
      Q = CT.w[1] >> amount;

      diff_expon += nzeros;
    } else {
      tdigit[0] = Q & 0x3ffffff;
      tdigit[1] = 0;
      QX = Q >> 26;
      QX32 = QX;
      nzeros = 0;

      for (j = 0; QX32; j++, QX32 >>= 7) {
	k = (QX32 & 127);
	tdigit[0] += convert_table[j][k][0];
	tdigit[1] += convert_table[j][k][1];
	if (tdigit[0] >= 100000000) {
	  tdigit[0] -= 100000000;
	  tdigit[1]++;
	}
      }

      digit = tdigit[0];
      if (!digit && !tdigit[1])
	nzeros += 16;
      else {
	if (!digit) {
	  nzeros += 8;
	  digit = tdigit[1];
	}
	// decompose digit
	PD = (UINT64) digit *0x068DB8BBull;
	digit_h = (UINT32) (PD >> 40);
	digit_low = digit - digit_h * 10000;

	if (!digit_low)
	  nzeros += 4;
	else
	  digit_h = digit_low;

	if (!(digit_h & 1))
	  nzeros +=
	    3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
			  (digit_h & 7));
      }

      if (nzeros) {
	__mul_64x64_to_128 (CT, Q, reciprocals10_64[nzeros]);

	// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
	amount = short_recip_scale[nzeros];
	Q = CT.w[1] >> amount;
      }
      diff_expon += nzeros;

    }
    if (diff_expon >= 0) {
      res =
	fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, Q,
				 rnd_mode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
      (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
      BID_RETURN (res);
    }
  }
#endif

  if (diff_expon >= 0) {
#ifdef IEEE_ROUND_NEAREST
    // round to nearest code
    // R*10
    R += R;
    R = (R << 2) + R;
    B5 = B4 + B;

    // compare 10*R to 5*B
    R = B5 - R;
    // correction for (R==0 && (Q&1))
    R -= (Q & 1);
    // R<0 ?
    D = ((UINT64) R) >> 63;
    Q += D;
#else
#ifdef IEEE_ROUND_NEAREST_TIES_AWAY
    // round to nearest code
    // R*10
    R += R;
    R = (R << 2) + R;
    B5 = B4 + B;

    // compare 10*R to 5*B
    R = B5 - R;
    // correction for (R==0 && (Q&1))
    R -= (Q & 1);
    // R<0 ?
    D = ((UINT64) R) >> 63;
    Q += D;
#else
    rmode = rnd_mode;
    if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
      rmode = 3 - rmode;
    switch (rmode) {
    case 0:	// round to nearest code
    case ROUNDING_TIES_AWAY:
      // R*10
      R += R;
      R = (R << 2) + R;
      B5 = B4 + B;
      // compare 10*R to 5*B
      R = B5 - R;
      // correction for (R==0 && (Q&1))
      R -= ((Q | (rmode >> 2)) & 1);
      // R<0 ?
      D = ((UINT64) R) >> 63;
      Q += D;
      break;
    case ROUNDING_DOWN:
    case ROUNDING_TO_ZERO:
      break;
    default:	// rounding up
      Q++;
      break;
    }
#endif
#endif

    res =
      fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, Q, rnd_mode,
			       pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);
  } else {
    // UF occurs

#ifdef SET_STATUS_FLAGS
    if ((diff_expon + 16 < 0)) {
      // set status flags
      __set_status_flags (pfpsf, INEXACT_EXCEPTION);
    }
#endif
    rmode = rnd_mode;
    res =
      get_BID64_UF (sign_x ^ sign_y, diff_expon, Q, R, rmode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);

  }
}


TYPE0_FUNCTION_ARGTYPE1_ARG128 (UINT64, bid64dq_div, UINT64, x, y)
     UINT256 CA4 =
       { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256;
UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Tmp;
UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, valid_y, PD, res;
int_float fx, fy, f64;
UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
  digits_q, amount;
int nzeros, i, j, k, d5, done = 0;
unsigned rmode;
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
fexcept_t binaryflags = 0;
#endif

valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y);

	// unpack arguments, check for NaN or Infinity
CX.w[1] = 0;
if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], (x))) {
#ifdef SET_STATUS_FLAGS
    if (((y.w[1] & SNAN_MASK64) == SNAN_MASK64) ||	// y is sNaN
		((x & SNAN_MASK64) == SNAN_MASK64))
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
  // test if x is NaN
  if (((x) & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
    res = CX.w[0];
    BID_RETURN (res & QUIET_MASK64);
  }
  // x is Infinity?
  if (((x) & 0x7800000000000000ull) == 0x7800000000000000ull) {
    // check if y is Inf.
    if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull))
      // return NaN
    {
#ifdef SET_STATUS_FLAGS
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      res = 0x7c00000000000000ull;
      BID_RETURN (res);
    }
	if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) {
    // otherwise return +/-Inf
    res =
      (((x) ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
    BID_RETURN (res);
	}
  }
  // x is 0
  if ((y.w[1] & INFINITY_MASK64) != INFINITY_MASK64) {
    if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) {
#ifdef SET_STATUS_FLAGS
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      // x=y=0, return NaN
      res = 0x7c00000000000000ull;
      BID_RETURN (res);
    }
    // return 0
    res = ((x) ^ y.w[1]) & 0x8000000000000000ull;
    exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128;
    if (exponent_x > DECIMAL_MAX_EXPON_64)
      exponent_x = DECIMAL_MAX_EXPON_64;
    else if (exponent_x < 0)
      exponent_x = 0;
    res |= (((UINT64) exponent_x) << 53);
    BID_RETURN (res);
  }
}
exponent_x += (DECIMAL_EXPONENT_BIAS_128 - DECIMAL_EXPONENT_BIAS);
if (!valid_y) {
  // y is Inf. or NaN

  // test if y is NaN
  if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
#ifdef SET_STATUS_FLAGS
    if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)	// sNaN
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
    Tmp.w[1] = (CY.w[1] & 0x00003fffffffffffull);
    Tmp.w[0] = CY.w[0];
    TP128 = reciprocals10_128[18];
    __mul_128x128_high (Qh, Tmp, TP128);
    amount = recip_scale[18];
    __shr_128 (Tmp, Qh, amount);
    res = (CY.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
    BID_RETURN (res);
  }
  // y is Infinity?
  if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
    // return +/-0
    res = sign_x ^ sign_y;
    BID_RETURN (res);
  }
  // y is 0, return +/-Inf
  res =
    (((x) ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
#ifdef SET_STATUS_FLAGS
  __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
#endif
  BID_RETURN (res);
}
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;

if (__unsigned_compare_gt_128 (CY, CX)) {
  // CX < CY

  // 2^64
  f64.i = 0x5f800000;

  // fx ~ CX,   fy ~ CY
  fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
  fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
  // expon_cy - expon_cx
  bin_index = (fy.i - fx.i) >> 23;

  if (CX.w[1]) {
    T = power10_index_binexp_128[bin_index].w[0];
    __mul_64x128_short (CA, T, CX);
  } else {
    T128 = power10_index_binexp_128[bin_index];
    __mul_64x128_short (CA, CX.w[0], T128);
  }

  ed2 = 15;
  if (__unsigned_compare_gt_128 (CY, CA))
    ed2++;

  T128 = power10_table_128[ed2];
  __mul_128x128_to_256 (CA4, CA, T128);

  ed2 += estimate_decimal_digits[bin_index];
  CQ.w[0] = CQ.w[1] = 0;
  diff_expon = diff_expon - ed2;

} else {
  // get CQ = CX/CY
  __div_128_by_128 (&CQ, &CR, CX, CY);

  // get number of decimal digits in CQ
  // 2^64
  f64.i = 0x5f800000;
  fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
  // binary expon. of CQ
  bin_expon = (fx.i - 0x3f800000) >> 23;

  digits_q = estimate_decimal_digits[bin_expon];
  TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
  TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
  if (__unsigned_compare_ge_128 (CQ, TP128))
    digits_q++;

  if (digits_q <= 16) {
    if (!CR.w[1] && !CR.w[0]) {
      res = get_BID64 (sign_x ^ sign_y, diff_expon,
		       CQ.w[0], rnd_mode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
      (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
      BID_RETURN (res);
    }

    ed2 = 16 - digits_q;
    T128.w[0] = power10_table_128[ed2].w[0];
    __mul_64x128_to_192 (CA4, (T128.w[0]), CR);
    diff_expon = diff_expon - ed2;
    CQ.w[0] *= T128.w[0];
  } else {
    ed2 = digits_q - 16;
    diff_expon += ed2;
    T128 = reciprocals10_128[ed2];
    __mul_128x128_to_256 (P256, CQ, T128);
    amount = recip_scale[ed2];
    CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount));
    CQ.w[1] = 0;

    __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0]));

    __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]);
    QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0];

    CA4.w[1] = CX.w[1] - QB256.w[1];
    CA4.w[0] = CX.w[0] - QB256.w[0];
    if (CX.w[0] < QB256.w[0])
      CA4.w[1]--;
    if (CR.w[0] || CR.w[1])
      CA4.w[0] |= 1;
    done = 1;

  }

}
if (!done) {
  __div_256_by_128 (&CQ, &CA4, CY);
}


#ifdef SET_STATUS_FLAGS
  if (CA4.w[0] || CA4.w[1]) {
    // set status flags
    __set_status_flags (pfpsf, INEXACT_EXCEPTION);
  }
#ifndef LEAVE_TRAILING_ZEROS
  else
#endif
#else
#ifndef LEAVE_TRAILING_ZEROS
  if (!CA4.w[0] && !CA4.w[1])
#endif
#endif
#ifndef LEAVE_TRAILING_ZEROS
    // check whether result is exact
  {
    // check whether CX, CY are short
    if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
      i = (int) CY.w[0] - 1;
      j = (int) CX.w[0] - 1;
      // difference in powers of 2 factors for Y and X
      nzeros = ed2 - factors[i][0] + factors[j][0];
      // difference in powers of 5 factors
      d5 = ed2 - factors[i][1] + factors[j][1];
      if (d5 < nzeros)
	nzeros = d5;
      // get P*(2^M[extra_digits])/10^extra_digits
      __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]);

      // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
      amount = recip_scale[nzeros];
      __shr_128_long (CQ, Qh, amount);

      diff_expon += nzeros;
    } else {
      // decompose Q as Qh*10^17 + Ql
      Q_low = CQ.w[0];

      {
	tdigit[0] = Q_low & 0x3ffffff;
	tdigit[1] = 0;
	QX = Q_low >> 26;
	QX32 = QX;
	nzeros = 0;

	for (j = 0; QX32; j++, QX32 >>= 7) {
	  k = (QX32 & 127);
	  tdigit[0] += convert_table[j][k][0];
	  tdigit[1] += convert_table[j][k][1];
	  if (tdigit[0] >= 100000000) {
	    tdigit[0] -= 100000000;
	    tdigit[1]++;
	  }
	}

	if (tdigit[1] >= 100000000) {
	  tdigit[1] -= 100000000;
	  if (tdigit[1] >= 100000000)
	    tdigit[1] -= 100000000;
	}

	digit = tdigit[0];
	if (!digit && !tdigit[1])
	  nzeros += 16;
	else {
	  if (!digit) {
	    nzeros += 8;
	    digit = tdigit[1];
	  }
	  // decompose digit
	  PD = (UINT64) digit *0x068DB8BBull;
	  digit_h = (UINT32) (PD >> 40);
	  digit_low = digit - digit_h * 10000;

	  if (!digit_low)
	    nzeros += 4;
	  else
	    digit_h = digit_low;

	  if (!(digit_h & 1))
	    nzeros +=
	      3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
			    (digit_h & 7));
	}

	if (nzeros) {
	  // get P*(2^M[extra_digits])/10^extra_digits
	  __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]);

	  // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
	  amount = recip_scale[nzeros];
	  __shr_128 (CQ, Qh, amount);
	}
	diff_expon += nzeros;

      }
    }
	if(diff_expon>=0){
    res =
      fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0],
			       rnd_mode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);
	}
  }
#endif

  if (diff_expon >= 0) {
#ifdef IEEE_ROUND_NEAREST
  // rounding
  // 2*CA4 - CY
  CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
  CA4r.w[0] = CA4.w[0] + CA4.w[0];
  __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
  CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;

  D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
  carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);

  CQ.w[0] += carry64;
#else
#ifdef IEEE_ROUND_NEAREST_TIES_AWAY
  // rounding
  // 2*CA4 - CY
  CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
  CA4r.w[0] = CA4.w[0] + CA4.w[0];
  __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
  CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;

  D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
  carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;

  CQ.w[0] += carry64;
  if (CQ.w[0] < carry64)
    CQ.w[1]++;
#else
  rmode = rnd_mode;
  if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
    rmode = 3 - rmode;
  switch (rmode) {
  case ROUNDING_TO_NEAREST:	// round to nearest code
    // rounding
    // 2*CA4 - CY
    CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
    CA4r.w[0] = CA4.w[0] + CA4.w[0];
    __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
    CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
    D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
    carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
    CQ.w[0] += carry64;
    if (CQ.w[0] < carry64)
      CQ.w[1]++;
    break;
  case ROUNDING_TIES_AWAY:
    // rounding
    // 2*CA4 - CY
    CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
    CA4r.w[0] = CA4.w[0] + CA4.w[0];
    __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
    CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
    D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
    carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
    CQ.w[0] += carry64;
    if (CQ.w[0] < carry64)
      CQ.w[1]++;
    break;
  case ROUNDING_DOWN:
  case ROUNDING_TO_ZERO:
    break;
  default:	// rounding up
    CQ.w[0]++;
    if (!CQ.w[0])
      CQ.w[1]++;
    break;
  }
#endif
#endif

    res =
      fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode,
			       pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);
  } else {
    // UF occurs

#ifdef SET_STATUS_FLAGS
    if ((diff_expon + 16 < 0)) {
      // set status flags
      __set_status_flags (pfpsf, INEXACT_EXCEPTION);
    }
#endif
    rmode = rnd_mode;
    res =
      get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);

  }

}


//#define LEAVE_TRAILING_ZEROS

TYPE0_FUNCTION_ARG128_ARGTYPE2 (UINT64, bid64qd_div, x, UINT64, y)

     UINT256 CA4 =
       { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256;
UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Tmp;
UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, PD, res, valid_y;
int_float fx, fy, f64;
UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
  digits_q, amount;
int nzeros, i, j, k, d5, done = 0;
unsigned rmode;
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
fexcept_t binaryflags = 0;
#endif

valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], (y));

	// unpack arguments, check for NaN or Infinity
if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
  // test if x is NaN
  if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
#ifdef SET_STATUS_FLAGS
    if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull ||	// sNaN
	(y & 0x7e00000000000000ull) == 0x7e00000000000000ull)
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull);
      Tmp.w[0] = CX.w[0];
      TP128 = reciprocals10_128[18];
      __mul_128x128_high (Qh, Tmp, TP128);
      amount = recip_scale[18];
      __shr_128 (Tmp, Qh, amount);
      res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
    BID_RETURN (res);
  }
  // x is Infinity?
  if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
    // check if y is Inf.
    if (((y & 0x7c00000000000000ull) == 0x7800000000000000ull))
      // return NaN
    {
#ifdef SET_STATUS_FLAGS
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      res = 0x7c00000000000000ull;
      BID_RETURN (res);
    }
	if (((y & 0x7c00000000000000ull) != 0x7c00000000000000ull)) {
    // otherwise return +/-Inf
    res =
      ((x.w[1] ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull;
    BID_RETURN (res);
	}
  }
  // x is 0
  if (((y & INFINITY_MASK64) != INFINITY_MASK64) &&
      !(CY.w[0])) {
#ifdef SET_STATUS_FLAGS
    __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
    // x=y=0, return NaN
    res = 0x7c00000000000000ull;
    BID_RETURN (res);
  }
  // return 0
  if (((y & 0x7800000000000000ull) != 0x7800000000000000ull)) {
	  if (!CY.w[0]) {
#ifdef SET_STATUS_FLAGS
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      res = 0x7c00000000000000ull;
      BID_RETURN (res);
	  }
    exponent_x =
      exponent_x - exponent_y - DECIMAL_EXPONENT_BIAS_128 +
      (DECIMAL_EXPONENT_BIAS << 1);
    if (exponent_x > DECIMAL_MAX_EXPON_64)
      exponent_x = DECIMAL_MAX_EXPON_64;
    else if (exponent_x < 0)
      exponent_x = 0;
    res = (sign_x ^ sign_y) | (((UINT64) exponent_x) << 53);
    BID_RETURN (res);
  }
}
CY.w[1] = 0;
if (!valid_y) {
  // y is Inf. or NaN

  // test if y is NaN
  if ((y & NAN_MASK64) == NAN_MASK64) {
#ifdef SET_STATUS_FLAGS
    if ((y & SNAN_MASK64) == SNAN_MASK64)	// sNaN
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
    BID_RETURN (CY.w[0] & QUIET_MASK64);
  }
  // y is Infinity?
  if (((y) & 0x7800000000000000ull) == 0x7800000000000000ull) {
    // return +/-0
    res = sign_x ^ sign_y;
    BID_RETURN (res);
  }
  // y is 0, return +/-Inf
  res =
    ((x.w[1] ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull;
#ifdef SET_STATUS_FLAGS
  __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
#endif
  BID_RETURN (res);
}
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
diff_expon =
  exponent_x - exponent_y - DECIMAL_EXPONENT_BIAS_128 +
  (DECIMAL_EXPONENT_BIAS << 1);

if (__unsigned_compare_gt_128 (CY, CX)) {
  // CX < CY

  // 2^64
  f64.i = 0x5f800000;

  // fx ~ CX,   fy ~ CY
  fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
  fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
  // expon_cy - expon_cx
  bin_index = (fy.i - fx.i) >> 23;

  if (CX.w[1]) {
    T = power10_index_binexp_128[bin_index].w[0];
    __mul_64x128_short (CA, T, CX);
  } else {
    T128 = power10_index_binexp_128[bin_index];
    __mul_64x128_short (CA, CX.w[0], T128);
  }

  ed2 = 15;
  if (__unsigned_compare_gt_128 (CY, CA))
    ed2++;

  T128 = power10_table_128[ed2];
  __mul_128x128_to_256 (CA4, CA, T128);

  ed2 += estimate_decimal_digits[bin_index];
  CQ.w[0] = CQ.w[1] = 0;
  diff_expon = diff_expon - ed2;

} else {
  // get CQ = CX/CY
  __div_128_by_128 (&CQ, &CR, CX, CY);

  // get number of decimal digits in CQ
  // 2^64
  f64.i = 0x5f800000;
  fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
  // binary expon. of CQ
  bin_expon = (fx.i - 0x3f800000) >> 23;

  digits_q = estimate_decimal_digits[bin_expon];
  TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
  TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
  if (__unsigned_compare_ge_128 (CQ, TP128))
    digits_q++;

  if (digits_q <= 16) {
    if (!CR.w[1] && !CR.w[0]) {
      res = get_BID64 (sign_x ^ sign_y, diff_expon,
		       CQ.w[0], rnd_mode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
      (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
      BID_RETURN (res);
    }

    ed2 = 16 - digits_q;
    T128.w[0] = power10_table_128[ed2].w[0];
    __mul_64x128_to_192 (CA4, (T128.w[0]), CR);
    diff_expon = diff_expon - ed2;
    CQ.w[0] *= T128.w[0];
  } else {
    ed2 = digits_q - 16;
    diff_expon += ed2;
    T128 = reciprocals10_128[ed2];
    __mul_128x128_to_256 (P256, CQ, T128);
    amount = recip_scale[ed2];
    CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount));
    CQ.w[1] = 0;

    __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0]));

    __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]);
    QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0];

    CA4.w[1] = CX.w[1] - QB256.w[1];
    CA4.w[0] = CX.w[0] - QB256.w[0];
    if (CX.w[0] < QB256.w[0])
      CA4.w[1]--;
    if (CR.w[0] || CR.w[1])
      CA4.w[0] |= 1;
    done = 1;
	if(CA4.w[1]|CA4.w[0]) {
    __mul_64x128_low(CY, (power10_table_128[ed2].w[0]),CY);
	}

  }

}

if (!done) {
  __div_256_by_128 (&CQ, &CA4, CY);
}

#ifdef SET_STATUS_FLAGS
  if (CA4.w[0] || CA4.w[1]) {
    // set status flags
    __set_status_flags (pfpsf, INEXACT_EXCEPTION);
  }
#ifndef LEAVE_TRAILING_ZEROS
  else
#endif
#else
#ifndef LEAVE_TRAILING_ZEROS
  if (!CA4.w[0] && !CA4.w[1])
#endif
#endif
#ifndef LEAVE_TRAILING_ZEROS
    // check whether result is exact
  {
	  if(!done) {
    // check whether CX, CY are short
    if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
      i = (int) CY.w[0] - 1;
      j = (int) CX.w[0] - 1;
      // difference in powers of 2 factors for Y and X
      nzeros = ed2 - factors[i][0] + factors[j][0];
      // difference in powers of 5 factors
      d5 = ed2 - factors[i][1] + factors[j][1];
      if (d5 < nzeros)
		nzeros = d5;
      // get P*(2^M[extra_digits])/10^extra_digits
      __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]);
      //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2];

      // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
      amount = recip_scale[nzeros];
      __shr_128_long (CQ, Qh, amount);

      diff_expon += nzeros;
    } else {
      // decompose Q as Qh*10^17 + Ql
      //T128 = reciprocals10_128[17];
      Q_low = CQ.w[0];

      {
	tdigit[0] = Q_low & 0x3ffffff;
	tdigit[1] = 0;
	QX = Q_low >> 26;
	QX32 = QX;
	nzeros = 0;

	for (j = 0; QX32; j++, QX32 >>= 7) {
	  k = (QX32 & 127);
	  tdigit[0] += convert_table[j][k][0];
	  tdigit[1] += convert_table[j][k][1];
	  if (tdigit[0] >= 100000000) {
	    tdigit[0] -= 100000000;
	    tdigit[1]++;
	  }
	}

	if (tdigit[1] >= 100000000) {
	  tdigit[1] -= 100000000;
	  if (tdigit[1] >= 100000000)
	    tdigit[1] -= 100000000;
	}

	digit = tdigit[0];
	if (!digit && !tdigit[1])
	  nzeros += 16;
	else {
	  if (!digit) {
	    nzeros += 8;
	    digit = tdigit[1];
	  }
	  // decompose digit
	  PD = (UINT64) digit *0x068DB8BBull;
	  digit_h = (UINT32) (PD >> 40);
	  digit_low = digit - digit_h * 10000;

	  if (!digit_low)
	    nzeros += 4;
	  else
	    digit_h = digit_low;

	  if (!(digit_h & 1))
	    nzeros +=
	      3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
			    (digit_h & 7));
	}

	if (nzeros) {
	  // get P*(2^M[extra_digits])/10^extra_digits
	  __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]);

	  // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
	  amount = recip_scale[nzeros];
	  __shr_128 (CQ, Qh, amount);
	}
	diff_expon += nzeros;

      }
    }
	  }
	if(diff_expon>=0){
    res =
      fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0],
			       rnd_mode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);
	}
  }
#endif

  if (diff_expon >= 0) {
#ifdef IEEE_ROUND_NEAREST
  // rounding
  // 2*CA4 - CY
  CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
  CA4r.w[0] = CA4.w[0] + CA4.w[0];
  __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
  CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;

  D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
  carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);

  CQ.w[0] += carry64;
  //if(CQ.w[0]<carry64)
  //CQ.w[1] ++;
#else
#ifdef IEEE_ROUND_NEAREST_TIES_AWAY
  // rounding
  // 2*CA4 - CY
  CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
  CA4r.w[0] = CA4.w[0] + CA4.w[0];
  __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
  CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;

  D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
  carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;

  CQ.w[0] += carry64;
  if (CQ.w[0] < carry64)
    CQ.w[1]++;
#else
  rmode = rnd_mode;
  if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
    rmode = 3 - rmode;
  switch (rmode) {
  case ROUNDING_TO_NEAREST:	// round to nearest code
    // rounding
    // 2*CA4 - CY
    CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
    CA4r.w[0] = CA4.w[0] + CA4.w[0];
    __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
    CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
    D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
    carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
    CQ.w[0] += carry64;
    if (CQ.w[0] < carry64)
      CQ.w[1]++;
    break;
  case ROUNDING_TIES_AWAY:
    // rounding
    // 2*CA4 - CY
    CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
    CA4r.w[0] = CA4.w[0] + CA4.w[0];
    __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
    CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
    D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
    carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
    CQ.w[0] += carry64;
    if (CQ.w[0] < carry64)
      CQ.w[1]++;
    break;
  case ROUNDING_DOWN:
  case ROUNDING_TO_ZERO:
    break;
  default:	// rounding up
    CQ.w[0]++;
    if (!CQ.w[0])
      CQ.w[1]++;
    break;
  }
#endif
#endif


    res =
      fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode,
			       pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);
  } else {
    // UF occurs

#ifdef SET_STATUS_FLAGS
    if ((diff_expon + 16 < 0)) {
      // set status flags
      __set_status_flags (pfpsf, INEXACT_EXCEPTION);
    }
#endif
    rmode = rnd_mode;
    res =
      get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);

  }

}

//#define LEAVE_TRAILING_ZEROS

extern UINT32 convert_table[5][128][2];
extern SINT8 factors[][2];
extern UINT8 packed_10000_zeros[];


//UINT64* bid64_div128x128(UINT64 res, UINT128 *px, UINT128 *py, unsigned rnd_mode, unsigned *pfpsf)

TYPE0_FUNCTION_ARG128_ARG128 (UINT64, bid64qq_div, x, y)
     UINT256 CA4 =
       { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256;
UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Tmp;
UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, valid_y, PD, res;
int_float fx, fy, f64;
UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
  digits_q, amount;
int nzeros, i, j, k, d5, done = 0;
unsigned rmode;
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
fexcept_t binaryflags = 0;
#endif

valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y);

	// unpack arguments, check for NaN or Infinity
if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
  // test if x is NaN
  if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
#ifdef SET_STATUS_FLAGS
    if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull ||	// sNaN
	(y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull);
      Tmp.w[0] = CX.w[0];
      TP128 = reciprocals10_128[18];
      __mul_128x128_high (Qh, Tmp, TP128);
      amount = recip_scale[18];
      __shr_128 (Tmp, Qh, amount);
      res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
    BID_RETURN (res);
  }
  // x is Infinity?
  if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
    // check if y is Inf.
    if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull))
      // return NaN
    {
#ifdef SET_STATUS_FLAGS
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      res = 0x7c00000000000000ull;
      BID_RETURN (res);
    }
	if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) {
    // otherwise return +/-Inf
    res =
      ((x.w[1] ^ y.
	w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
    BID_RETURN (res);
	}
  }
  // x is 0
  if (((y.w[1] & 0x7800000000000000ull) != 0x7800000000000000ull)) {
  if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) {
#ifdef SET_STATUS_FLAGS
    __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
    // x=y=0, return NaN
    res = 0x7c00000000000000ull;
    BID_RETURN (res);
  }
  // return 0
  res = (x.w[1] ^ y.w[1]) & 0x8000000000000000ull;
  exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
  if (exponent_x > DECIMAL_MAX_EXPON_64)
    exponent_x = DECIMAL_MAX_EXPON_64;
  else if (exponent_x < 0)
    exponent_x = 0;
  res |= (((UINT64) exponent_x) << 53);
  BID_RETURN (res);
  }
}
if (!valid_y) {
  // y is Inf. or NaN

  // test if y is NaN
  if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
#ifdef SET_STATUS_FLAGS
    if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)	// sNaN
      __set_status_flags (pfpsf, INVALID_EXCEPTION);
#endif
      Tmp.w[1] = (CY.w[1] & 0x00003fffffffffffull);
      Tmp.w[0] = CY.w[0];
      TP128 = reciprocals10_128[18];
      __mul_128x128_high (Qh, Tmp, TP128);
      amount = recip_scale[18];
      __shr_128 (Tmp, Qh, amount);
      res = (CY.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
    BID_RETURN (res);
  }
  // y is Infinity?
  if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
    // return +/-0
    res = sign_x ^ sign_y;
    BID_RETURN (res);
  }
  // y is 0, return +/-Inf
  res =
    ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
#ifdef SET_STATUS_FLAGS
  __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
#endif
  BID_RETURN (res);
}
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;

if (__unsigned_compare_gt_128 (CY, CX)) {
  // CX < CY

  // 2^64
  f64.i = 0x5f800000;

  // fx ~ CX,   fy ~ CY
  fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
  fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
  // expon_cy - expon_cx
  bin_index = (fy.i - fx.i) >> 23;

  if (CX.w[1]) {
    T = power10_index_binexp_128[bin_index].w[0];
    __mul_64x128_short (CA, T, CX);
  } else {
    T128 = power10_index_binexp_128[bin_index];
    __mul_64x128_short (CA, CX.w[0], T128);
  }

  ed2 = 15;
  if (__unsigned_compare_gt_128 (CY, CA))
    ed2++;

  T128 = power10_table_128[ed2];
  __mul_128x128_to_256 (CA4, CA, T128);

  ed2 += estimate_decimal_digits[bin_index];
  CQ.w[0] = CQ.w[1] = 0;
  diff_expon = diff_expon - ed2;

} else {
  // get CQ = CX/CY
  __div_128_by_128 (&CQ, &CR, CX, CY);

  // get number of decimal digits in CQ
  // 2^64
  f64.i = 0x5f800000;
  fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
  // binary expon. of CQ
  bin_expon = (fx.i - 0x3f800000) >> 23;

  digits_q = estimate_decimal_digits[bin_expon];
  TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
  TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
  if (__unsigned_compare_ge_128 (CQ, TP128))
    digits_q++;

  if (digits_q <= 16) {
    if (!CR.w[1] && !CR.w[0]) {
      res = get_BID64 (sign_x ^ sign_y, diff_expon,
		       CQ.w[0], rnd_mode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
      (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
      BID_RETURN (res);
    }

    ed2 = 16 - digits_q;
    T128.w[0] = power10_table_128[ed2].w[0];
    __mul_64x128_to_192 (CA4, (T128.w[0]), CR);
    diff_expon = diff_expon - ed2;
    CQ.w[0] *= T128.w[0];
  } else {
    ed2 = digits_q - 16;
    diff_expon += ed2;
    T128 = reciprocals10_128[ed2];
    __mul_128x128_to_256 (P256, CQ, T128);
    amount = recip_scale[ed2];
    CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount));
    CQ.w[1] = 0;

    __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0]));

    __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]);
    QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0];

    CA4.w[1] = CX.w[1] - QB256.w[1];
    CA4.w[0] = CX.w[0] - QB256.w[0];
    if (CX.w[0] < QB256.w[0])
      CA4.w[1]--;
    if (CR.w[0] || CR.w[1])
      CA4.w[0] |= 1;
    done = 1;
	if(CA4.w[1]|CA4.w[0]) {
    __mul_64x128_low(CY, (power10_table_128[ed2].w[0]),CY);
	}
  }

}

if (!done) {
  __div_256_by_128 (&CQ, &CA4, CY);
}


#ifdef SET_STATUS_FLAGS
  if (CA4.w[0] || CA4.w[1]) {
    // set status flags
    __set_status_flags (pfpsf, INEXACT_EXCEPTION);
  }
#ifndef LEAVE_TRAILING_ZEROS
  else
#endif
#else
#ifndef LEAVE_TRAILING_ZEROS
  if (!CA4.w[0] && !CA4.w[1])
#endif
#endif
#ifndef LEAVE_TRAILING_ZEROS
    // check whether result is exact
  {
	  if(!done) {
    // check whether CX, CY are short
    if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
      i = (int) CY.w[0] - 1;
      j = (int) CX.w[0] - 1;
      // difference in powers of 2 factors for Y and X
      nzeros = ed2 - factors[i][0] + factors[j][0];
      // difference in powers of 5 factors
      d5 = ed2 - factors[i][1] + factors[j][1];
      if (d5 < nzeros)
	nzeros = d5;
      // get P*(2^M[extra_digits])/10^extra_digits
      __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]);
      //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2];

      // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
      amount = recip_scale[nzeros];
      __shr_128_long (CQ, Qh, amount);

      diff_expon += nzeros;
    } else {
      // decompose Q as Qh*10^17 + Ql
      //T128 = reciprocals10_128[17];
      Q_low = CQ.w[0];

      {
	tdigit[0] = Q_low & 0x3ffffff;
	tdigit[1] = 0;
	QX = Q_low >> 26;
	QX32 = QX;
	nzeros = 0;

	for (j = 0; QX32; j++, QX32 >>= 7) {
	  k = (QX32 & 127);
	  tdigit[0] += convert_table[j][k][0];
	  tdigit[1] += convert_table[j][k][1];
	  if (tdigit[0] >= 100000000) {
	    tdigit[0] -= 100000000;
	    tdigit[1]++;
	  }
	}

	if (tdigit[1] >= 100000000) {
	  tdigit[1] -= 100000000;
	  if (tdigit[1] >= 100000000)
	    tdigit[1] -= 100000000;
	}

	digit = tdigit[0];
	if (!digit && !tdigit[1])
	  nzeros += 16;
	else {
	  if (!digit) {
	    nzeros += 8;
	    digit = tdigit[1];
	  }
	  // decompose digit
	  PD = (UINT64) digit *0x068DB8BBull;
	  digit_h = (UINT32) (PD >> 40);
	  digit_low = digit - digit_h * 10000;

	  if (!digit_low)
	    nzeros += 4;
	  else
	    digit_h = digit_low;

	  if (!(digit_h & 1))
	    nzeros +=
	      3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
			    (digit_h & 7));
	}

	if (nzeros) {
	  // get P*(2^M[extra_digits])/10^extra_digits
	  __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]);

	  // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
	  amount = recip_scale[nzeros];
	  __shr_128 (CQ, Qh, amount);
	}
	diff_expon += nzeros;

      }
    }
	  }
	if(diff_expon>=0){
    res =
      fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0],
			       rnd_mode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);
	}
  }
#endif

  if(diff_expon>=0) {

#ifdef IEEE_ROUND_NEAREST
  // rounding
  // 2*CA4 - CY
  CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
  CA4r.w[0] = CA4.w[0] + CA4.w[0];
  __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
  CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;

  D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
  carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);

  CQ.w[0] += carry64;
  //if(CQ.w[0]<carry64)
  //CQ.w[1] ++;
#else
#ifdef IEEE_ROUND_NEAREST_TIES_AWAY
  // rounding
  // 2*CA4 - CY
  CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
  CA4r.w[0] = CA4.w[0] + CA4.w[0];
  __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
  CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;

  D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
  carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;

  CQ.w[0] += carry64;
  if (CQ.w[0] < carry64)
    CQ.w[1]++;
#else
  rmode = rnd_mode;
  if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
    rmode = 3 - rmode;
  switch (rmode) {
  case ROUNDING_TO_NEAREST:	// round to nearest code
    // rounding
    // 2*CA4 - CY
    CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
    CA4r.w[0] = CA4.w[0] + CA4.w[0];
    __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
    CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
    D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
    carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
    CQ.w[0] += carry64;
    if (CQ.w[0] < carry64)
      CQ.w[1]++;
    break;
  case ROUNDING_TIES_AWAY:
    // rounding
    // 2*CA4 - CY
    CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
    CA4r.w[0] = CA4.w[0] + CA4.w[0];
    __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
    CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
    D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
    carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
    CQ.w[0] += carry64;
    if (CQ.w[0] < carry64)
      CQ.w[1]++;
    break;
  case ROUNDING_DOWN:
  case ROUNDING_TO_ZERO:
    break;
  default:	// rounding up
    CQ.w[0]++;
    if (!CQ.w[0])
      CQ.w[1]++;
    break;
  }
#endif
#endif


    res =
      fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode,
			       pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);
  } else {
    // UF occurs

#ifdef SET_STATUS_FLAGS
    if ((diff_expon + 16 < 0)) {
      // set status flags
      __set_status_flags (pfpsf, INEXACT_EXCEPTION);
    }
#endif
    rmode = rnd_mode;
    res =
      get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf);
#ifdef UNCHANGED_BINARY_STATUS_FLAGS
    (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
#endif
    BID_RETURN (res);

  }

}