doc/divide_by_constants_codegen_reference.c - third_party/github.com/ridiculousfish/libdivide - Git at Google

 /*
   Reference implementations of computing and using the "magic number" approach to dividing
   by constants, including codegen instructions. The unsigned division incorporates the
   "round down" optimization per ridiculous_fish.

   This is free and unencumbered software. Any copyright is dedicated to the Public Domain.
 */

 #include <limits.h> //for CHAR_BIT
 #include <assert.h>


 /* Types used in the computations below. These can be redefined to the types appropriate
    for the desired division type (i.e. uint can be defined as unsigned long long).

    Note that the uint type is used in compute_signed_magic_info, so the uint type must
    not be smaller than the sint type.

 */
 typedef unsigned int uint;
 typedef signed int sint;


 /* Computes "magic info" for performing signed division by a fixed integer D.
    The type 'sint' is assumed to be defined as a signed integer type large enough
    to hold both the dividend and the divisor.
    Here >> is arithmetic (signed) shift, and >>> is logical shift.

    To emit code for n/d, rounding towards zero, use the following sequence:

      m = compute_signed_magic_info(D)
      emit("result = (m.multiplier * n) >> SINT_BITS");
      if d > 0 and m.multiplier < 0: emit("result += n")
      if d < 0 and m.multiplier > 0: emit("result -= n")
      if m.post_shift > 0: emit("result >>= m.shift")
      emit("result += (result < 0)")

   The shifts by SINT_BITS may be "free" if the high half of the full multiply
   is put in a separate register.

   The final add can of course be implemented via the sign bit, e.g.
       result += (result >>> (SINT_BITS - 1))
    or
       result -= (result >> (SINT_BITS - 1))

    This code is heavily indebted to Hacker's Delight by Henry Warren.
    See http://www.hackersdelight.org/HDcode/magic.c.txt
    Used with permission from http://www.hackersdelight.org/permissions.htm
  */

 struct magics_info {
     sint multiplier; // the "magic number" multiplier
     unsigned shift; // shift for the dividend after multiplying
 };
 struct magics_info compute_signed_magic_info(sint D);


 /* Computes "magic info" for performing unsigned division by a fixed positive integer D.
    The type 'uint' is assumed to be defined as an unsigned integer type large enough
    to hold both the dividend and the divisor. num_bits can be set appropriately if n is
    known to be smaller than the largest uint; if this is not known then pass
    (sizeof(uint) * CHAR_BIT) for num_bits.

    Assume we have a hardware register of width UINT_BITS, a known constant D which is
    not zero and not a power of 2, and a variable n of width num_bits (which may be
    up to UINT_BITS). To emit code for n/d, use one of the two following sequences
    (here >>> refers to a logical bitshift):

      m = compute_unsigned_magic_info(D, num_bits)
      if m.pre_shift > 0: emit("n >>>= m.pre_shift")
      if m.increment: emit("n = saturated_increment(n)")
      emit("result = (m.multiplier * n) >>> UINT_BITS")
      if m.post_shift > 0: emit("result >>>= m.post_shift")

    or

      m = compute_unsigned_magic_info(D, num_bits)
      if m.pre_shift > 0: emit("n >>>= m.pre_shift")
      emit("result = m.multiplier * n")
      if m.increment: emit("result = result + m.multiplier")
      emit("result >>>= UINT_BITS")
      if m.post_shift > 0: emit("result >>>= m.post_shift")

   The shifts by UINT_BITS may be "free" if the high half of the full multiply
   is put in a separate register.

   saturated_increment(n) means "increment n unless it would wrap to 0," i.e.
     if n == (1 << UINT_BITS)-1: result = n
     else: result = n+1
   A common way to implement this is with the carry bit. For example, on x86:
      add 1
      sbb 0

   Some invariants:
    1: At least one of pre_shift and increment is zero
    2: multiplier is never zero

    This code incorporates the "round down" optimization per ridiculous_fish.
  */

 struct magicu_info {
     uint multiplier; // the "magic number" multiplier
     unsigned pre_shift; // shift for the dividend before multiplying
     unsigned post_shift; //shift for the dividend after multiplying
     int increment; // 0 or 1; if set then increment the numerator, using one of the two strategies
 };
 struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits);


 /* Implementations follow */

 struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {

     //The numerator must fit in a uint
     assert(num_bits > 0 && num_bits <= sizeof(uint) * CHAR_BIT);

     // D must be larger than zero and not a power of 2
     assert(D & (D-1));

     // The eventual result
     struct magicu_info result;

     // Bits in a uint
     const unsigned UINT_BITS = sizeof(uint) * CHAR_BIT;

     // The extra shift implicit in the difference between UINT_BITS and num_bits
     const unsigned extra_shift = UINT_BITS - num_bits;

     // The initial power of 2 is one less than the first one that can possibly work
     const uint initial_power_of_2 = (uint)1 << (UINT_BITS-1);

     // The remainder and quotient of our power of 2 divided by d
     uint quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;

     // ceil(log_2 D)
     unsigned ceil_log_2_D;

     // The magic info for the variant "round down" algorithm
     uint down_multiplier = 0;
     unsigned down_exponent = 0;
     int has_magic_down = 0;

     // Compute ceil(log_2 D)
     ceil_log_2_D = 0;
     uint tmp;
     for (tmp = D; tmp > 0; tmp >>= 1)
         ceil_log_2_D += 1;


     // Begin a loop that increments the exponent, until we find a power of 2 that works.
     unsigned exponent;
     for (exponent = 0; ; exponent++) {
         // Quotient and remainder is from previous exponent; compute it for this exponent.
         if (remainder >= D - remainder) {
             // Doubling remainder will wrap around D
             quotient = quotient * 2 + 1;
             remainder = remainder * 2 - D;
         } else {
             // Remainder will not wrap
             quotient = quotient * 2;
             remainder = remainder * 2;
         }

         // We're done if this exponent works for the round_up algorithm.
         // Note that exponent may be larger than the maximum shift supported,
         // so the check for >= ceil_log_2_D is critical.
         if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((uint)1 << (exponent + extra_shift)))
             break;

         // Set magic_down if we have not set it yet and this exponent works for the round_down algorithm
         if (! has_magic_down && remainder <= ((uint)1 << (exponent + extra_shift))) {
             has_magic_down = 1;
             down_multiplier = quotient;
             down_exponent = exponent;
         }
     }

     if (exponent < ceil_log_2_D) {
         // magic_up is efficient
         result.multiplier = quotient + 1;
         result.pre_shift = 0;
         result.post_shift = exponent;
         result.increment = 0;
     } else if (D & 1) {
         // Odd divisor, so use magic_down, which must have been set
         assert(has_magic_down);
         result.multiplier = down_multiplier;
         result.pre_shift = 0;
         result.post_shift = down_exponent;
         result.increment = 1;
     } else {
         // Even divisor, so use a prefix-shifted dividend
         unsigned pre_shift = 0;
         uint shifted_D = D;
         while ((shifted_D & 1) == 0) {
             shifted_D >>= 1;
             pre_shift += 1;
         }
         result = compute_unsigned_magic_info(shifted_D, num_bits - pre_shift);
         assert(result.increment == 0 && result.pre_shift == 0); //expect no increment or pre_shift in this path
         result.pre_shift = pre_shift;
     }
     return result;
 }

 struct magics_info compute_signed_magic_info(sint D) {
     // D must not be zero and must not be a power of 2 (or its negative)
     assert(D != 0 && (D & -D) != D && (D & -D) != -D);

     // Our result
     struct magics_info result;

     // Bits in an sint
     const unsigned SINT_BITS = sizeof(sint) * CHAR_BIT;

     // Absolute value of D (we know D is not the most negative value since that's a power of 2)
     const uint abs_d = (D < 0 ? -D : D);

     // The initial power of 2 is one less than the first one that can possibly work
     // "two31" in Warren
     unsigned exponent = SINT_BITS - 1;
     const uint initial_power_of_2 = (uint)1 << exponent;

     // Compute the absolute value of our "test numerator,"
     // which is the largest dividend whose remainder with d is d-1.
     // This is called anc in Warren.
     const uint tmp = initial_power_of_2 + (D < 0);
     const uint abs_test_numer = tmp - 1 - tmp % abs_d;

     // Initialize our quotients and remainders (q1, r1, q2, r2 in Warren)
     uint quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
     uint quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
     uint delta;

     // Begin our loop
     do {
         // Update the exponent
         exponent++;

         // Update quotient1 and remainder1
         quotient1 *= 2;
         remainder1 *= 2;
         if (remainder1 >= abs_test_numer) {
             quotient1 += 1;
             remainder1 -= abs_test_numer;
         }

         // Update quotient2 and remainder2
         quotient2 *= 2;
         remainder2 *= 2;
         if (remainder2 >= abs_d) {
             quotient2 += 1;
             remainder2 -= abs_d;
         }

         // Keep going as long as (2**exponent) / abs_d <= delta
         delta = abs_d - remainder2;
     } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));

     result.multiplier = quotient2 + 1;
     if (D < 0) result.multiplier = -result.multiplier;
     result.shift = exponent - SINT_BITS;
     return result;
 }
	/*
	Reference implementations of computing and using the "magic number" approach to dividing
	by constants, including codegen instructions. The unsigned division incorporates the
	"round down" optimization per ridiculous_fish.

	This is free and unencumbered software. Any copyright is dedicated to the Public Domain.
	*/

	#include <limits.h> //for CHAR_BIT
	#include <assert.h>


	/* Types used in the computations below. These can be redefined to the types appropriate
	for the desired division type (i.e. uint can be defined as unsigned long long).

	Note that the uint type is used in compute_signed_magic_info, so the uint type must
	not be smaller than the sint type.

	*/
	typedef unsigned int uint;
	typedef signed int sint;


	/* Computes "magic info" for performing signed division by a fixed integer D.
	The type 'sint' is assumed to be defined as a signed integer type large enough
	to hold both the dividend and the divisor.
	Here >> is arithmetic (signed) shift, and >>> is logical shift.

	To emit code for n/d, rounding towards zero, use the following sequence:

	m = compute_signed_magic_info(D)
	emit("result = (m.multiplier * n) >> SINT_BITS");
	if d > 0 and m.multiplier < 0: emit("result += n")
	if d < 0 and m.multiplier > 0: emit("result -= n")
	if m.post_shift > 0: emit("result >>= m.shift")
	emit("result += (result < 0)")

	The shifts by SINT_BITS may be "free" if the high half of the full multiply
	is put in a separate register.

	The final add can of course be implemented via the sign bit, e.g.
	result += (result >>> (SINT_BITS - 1))
	or
	result -= (result >> (SINT_BITS - 1))

	This code is heavily indebted to Hacker's Delight by Henry Warren.
	See http://www.hackersdelight.org/HDcode/magic.c.txt
	Used with permission from http://www.hackersdelight.org/permissions.htm
	*/

	struct magics_info {
	sint multiplier; // the "magic number" multiplier
	unsigned shift; // shift for the dividend after multiplying
	};
	struct magics_info compute_signed_magic_info(sint D);


	/* Computes "magic info" for performing unsigned division by a fixed positive integer D.
	The type 'uint' is assumed to be defined as an unsigned integer type large enough
	to hold both the dividend and the divisor. num_bits can be set appropriately if n is
	known to be smaller than the largest uint; if this is not known then pass
	(sizeof(uint) * CHAR_BIT) for num_bits.

	Assume we have a hardware register of width UINT_BITS, a known constant D which is
	not zero and not a power of 2, and a variable n of width num_bits (which may be
	up to UINT_BITS). To emit code for n/d, use one of the two following sequences
	(here >>> refers to a logical bitshift):

	m = compute_unsigned_magic_info(D, num_bits)
	if m.pre_shift > 0: emit("n >>>= m.pre_shift")
	if m.increment: emit("n = saturated_increment(n)")
	emit("result = (m.multiplier * n) >>> UINT_BITS")
	if m.post_shift > 0: emit("result >>>= m.post_shift")

	or

	m = compute_unsigned_magic_info(D, num_bits)
	if m.pre_shift > 0: emit("n >>>= m.pre_shift")
	emit("result = m.multiplier * n")
	if m.increment: emit("result = result + m.multiplier")
	emit("result >>>= UINT_BITS")
	if m.post_shift > 0: emit("result >>>= m.post_shift")

	The shifts by UINT_BITS may be "free" if the high half of the full multiply
	is put in a separate register.

	saturated_increment(n) means "increment n unless it would wrap to 0," i.e.
	if n == (1 << UINT_BITS)-1: result = n
	else: result = n+1
	A common way to implement this is with the carry bit. For example, on x86:
	add 1
	sbb 0

	Some invariants:
	1: At least one of pre_shift and increment is zero
	2: multiplier is never zero

	This code incorporates the "round down" optimization per ridiculous_fish.
	*/

	struct magicu_info {
	uint multiplier; // the "magic number" multiplier
	unsigned pre_shift; // shift for the dividend before multiplying
	unsigned post_shift; //shift for the dividend after multiplying
	int increment; // 0 or 1; if set then increment the numerator, using one of the two strategies
	};
	struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits);


	/* Implementations follow */

	struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {

	//The numerator must fit in a uint
	assert(num_bits > 0 && num_bits <= sizeof(uint) * CHAR_BIT);

	// D must be larger than zero and not a power of 2
	assert(D & (D-1));

	// The eventual result
	struct magicu_info result;

	// Bits in a uint
	const unsigned UINT_BITS = sizeof(uint) * CHAR_BIT;

	// The extra shift implicit in the difference between UINT_BITS and num_bits
	const unsigned extra_shift = UINT_BITS - num_bits;

	// The initial power of 2 is one less than the first one that can possibly work
	const uint initial_power_of_2 = (uint)1 << (UINT_BITS-1);

	// The remainder and quotient of our power of 2 divided by d
	uint quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;

	// ceil(log_2 D)
	unsigned ceil_log_2_D;

	// The magic info for the variant "round down" algorithm
	uint down_multiplier = 0;
	unsigned down_exponent = 0;
	int has_magic_down = 0;

	// Compute ceil(log_2 D)
	ceil_log_2_D = 0;
	uint tmp;
	for (tmp = D; tmp > 0; tmp >>= 1)
	ceil_log_2_D += 1;


	// Begin a loop that increments the exponent, until we find a power of 2 that works.
	unsigned exponent;
	for (exponent = 0; ; exponent++) {
	// Quotient and remainder is from previous exponent; compute it for this exponent.
	if (remainder >= D - remainder) {
	// Doubling remainder will wrap around D
	quotient = quotient * 2 + 1;
	remainder = remainder * 2 - D;
	} else {
	// Remainder will not wrap
	quotient = quotient * 2;
	remainder = remainder * 2;
	}

	// We're done if this exponent works for the round_up algorithm.
	// Note that exponent may be larger than the maximum shift supported,
	// so the check for >= ceil_log_2_D is critical.
	if ((exponent + extra_shift >= ceil_log_2_D) \|\| (D - remainder) <= ((uint)1 << (exponent + extra_shift)))
	break;

	// Set magic_down if we have not set it yet and this exponent works for the round_down algorithm
	if (! has_magic_down && remainder <= ((uint)1 << (exponent + extra_shift))) {
	has_magic_down = 1;
	down_multiplier = quotient;
	down_exponent = exponent;
	}
	}

	if (exponent < ceil_log_2_D) {
	// magic_up is efficient
	result.multiplier = quotient + 1;
	result.pre_shift = 0;
	result.post_shift = exponent;
	result.increment = 0;
	} else if (D & 1) {
	// Odd divisor, so use magic_down, which must have been set
	assert(has_magic_down);
	result.multiplier = down_multiplier;
	result.pre_shift = 0;
	result.post_shift = down_exponent;
	result.increment = 1;
	} else {
	// Even divisor, so use a prefix-shifted dividend
	unsigned pre_shift = 0;
	uint shifted_D = D;
	while ((shifted_D & 1) == 0) {
	shifted_D >>= 1;
	pre_shift += 1;
	}
	result = compute_unsigned_magic_info(shifted_D, num_bits - pre_shift);
	assert(result.increment == 0 && result.pre_shift == 0); //expect no increment or pre_shift in this path
	result.pre_shift = pre_shift;
	}
	return result;
	}

	struct magics_info compute_signed_magic_info(sint D) {
	// D must not be zero and must not be a power of 2 (or its negative)
	assert(D != 0 && (D & -D) != D && (D & -D) != -D);

	// Our result
	struct magics_info result;

	// Bits in an sint
	const unsigned SINT_BITS = sizeof(sint) * CHAR_BIT;

	// Absolute value of D (we know D is not the most negative value since that's a power of 2)
	const uint abs_d = (D < 0 ? -D : D);

	// The initial power of 2 is one less than the first one that can possibly work
	// "two31" in Warren
	unsigned exponent = SINT_BITS - 1;
	const uint initial_power_of_2 = (uint)1 << exponent;

	// Compute the absolute value of our "test numerator,"
	// which is the largest dividend whose remainder with d is d-1.
	// This is called anc in Warren.
	const uint tmp = initial_power_of_2 + (D < 0);
	const uint abs_test_numer = tmp - 1 - tmp % abs_d;

	// Initialize our quotients and remainders (q1, r1, q2, r2 in Warren)
	uint quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
	uint quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
	uint delta;

	// Begin our loop
	do {
	// Update the exponent
	exponent++;

	// Update quotient1 and remainder1
	quotient1 *= 2;
	remainder1 *= 2;
	if (remainder1 >= abs_test_numer) {
	quotient1 += 1;
	remainder1 -= abs_test_numer;
	}

	// Update quotient2 and remainder2
	quotient2 *= 2;
	remainder2 *= 2;
	if (remainder2 >= abs_d) {
	quotient2 += 1;
	remainder2 -= abs_d;
	}

	// Keep going as long as (2**exponent) / abs_d <= delta
	delta = abs_d - remainder2;
	} while (quotient1 < delta \|\| (quotient1 == delta && remainder1 == 0));

	result.multiplier = quotient2 + 1;
	if (D < 0) result.multiplier = -result.multiplier;
	result.shift = exponent - SINT_BITS;
	return result;
	}