blob: 929fdaf88442dfa8f4714238ab642bf6a4c7543e [file]
/*
* When dividing by a known compile time constant, the division can be replaced
* by a multiply+shift operation. GCC will do this automatically,
* *BUT ONLY FOR DIVISION OF REGISTER-WIDTH OR NARROWER*.
*
* So on an 8-bit system, 16-bit divides will *NOT* be optimised.
*
* The macros here manually apply the multiply+shift operation for 16-bit numbers.
*
* Testing on an AtMega2560, -O3 optimizations:
* Performance improvement of 85% to 90%+ speed up (division by non-powers of 2)
* Zero increase in RAM usage
* Average of 25 bytes Flash used per call site
* Be careful calling this in a loop with aggressive loop unrolling!
*
* Note: testing of the multiply+shift technique on 8-bit division showed a
* slight slow down over native code on AtMega2560. So the 8 bit equivalent
* macros have not been included
*/
#pragma once
#include "libdivide.h"
#include "u16_ldparams.h"
#include "s16_ldparams.h"
#define CAT_HELPER(a, b) a ## b
#define CONCAT(A, B) CAT_HELPER(A, B)
// GCC will optimise division by a power of 2
// So allow that.
#define S16_ISPOW2_NEG(denom) \
(denom==-2 || \
denom==-4 || \
denom==-8 || \
denom==-16 || \
denom==-32 || \
denom==-64 || \
denom==-128 || \
denom==-256 || \
denom==-512 || \
denom==-1024 || \
denom==-2048 || \
denom==-4096 || \
denom==-8192 || \
denom==-16384)
#define S16_ISPOW2_POS(denom) \
(denom==2 || \
denom==4 || \
denom==8 || \
denom==16 || \
denom==32 || \
denom==64 || \
denom==128 || \
denom==256 || \
denom==512 || \
denom==1024 || \
denom==2048 || \
denom==4096 || \
denom==8192 || \
denom==16384)
#define U16_ISPOW2(denom) (S16_ISPOW2_POS(denom) || denom==32768)
#define S16_ISPOW2(denom) (S16_ISPOW2_POS(denom) || S16_ISPOW2_NEG(denom))
// Apply the libdivide namespace if necessary
#ifdef __cplusplus
#define LIB_DIV_NAMESPACE libdivide::
#else
#define LIB_DIV_NAMESPACE
#endif
/*
* Wrapper for *unsigned* 16-bit DIVISION. The divisor must be a compile time
* constant.
* E.g. FAST_DIV16U(value, 100)
*/
#define U16_MAGIC(d) CONCAT(CONCAT(U16LD_DENOM_, d), _MAGIC)
#define U16_MORE(d) CONCAT(CONCAT(U16LD_DENOM_, d), _MORE)
#define FAST_DIV16U(a, d) (U16_ISPOW2(d) ? a/d : LIB_DIV_NAMESPACE libdivide_u16_do_raw(a, U16_MAGIC(d), U16_MORE(d)))
/*
* Wrapper for *signed* 16-bit DIVISION by a *POSITIVE* compile time constant.
* E.g. FAST_DIV16(-value, 777)
*
* This only works for positive parmeters :-(
* A negative number results in a hypen in the macro name, which is not allowed
*/
#define S16_MAGIC(d) CONCAT(CONCAT(S16LD_DENOM_, d), _MAGIC)
#define S16_MORE(d) CONCAT(CONCAT(S16LD_DENOM_, d), _MORE)
#define FAST_DIV16(a, d) (S16_ISPOW2(d) ? a/d : LIB_DIV_NAMESPACE libdivide_s16_do_raw(a, S16_MAGIC(d), S16_MORE(d)))
/*
* Wrapper for *signed* 16-bit DIVISION by a *NEGATIVE* compile time constant.
* E.g. FAST_DIV16_NEG(-value, 777) // <-- It's converted to negative. Really.
*
* This only works for positive parmeters :-(
* A negative number results in a hypen in the macro name, which is not allowed
*/
#define S16_MAGIC_NEG(d) CONCAT(CONCAT(S16LD_DENOM_MINUS_, d), _MAGIC)
#define S16_MORE_NEG(d) CONCAT(CONCAT(S16LD_DENOM_MINUS_, d), _MORE)
#define FAST_DIV16_NEG(a, d) (S16_ISPOW2(d) ? a/-d : LIB_DIV_NAMESPACE libdivide_s16_do_raw(a, S16_MAGIC_NEG(d), S16_MORE_NEG(d)))
/*
* Wrapper for *unsigned* 16-bit MODULUS. The divisor must be a compile time
* constant.
* E.g. FAST_MOD16U(value, 6)
*/
#define FAST_MOD16U(a, d) (a - (FAST_DIV16U(a, d) * d))