MultiSource/Applications/ALAC/encode/ag_enc.c - third_party/llvm-test-suite - Git at Google

 /*
  * Copyright (c) 2011 Apple Inc. All rights reserved.
  *
  * @APPLE_APACHE_LICENSE_HEADER_START@
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
  * @APPLE_APACHE_LICENSE_HEADER_END@
  */

 /*
 	File:		ag_enc.c

 	Contains:   Adaptive Golomb encode routines.

 	Copyright:	(c) 2001-2011 Apple, Inc.
 */

 #include "aglib.h"
 #include "ALACBitUtilities.h"
 #include "EndianPortable.h"
 #include "ALACAudioTypes.h"

 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #if __GNUC__ && TARGET_OS_MAC
 	#if __POWERPC__
 		#include <ppc_intrinsics.h>
 	#else
 		#include <libkern/OSByteOrder.h>
 	#endif
 #endif

 #define CODE_TO_LONG_MAXBITS	32
 #define N_MAX_MEAN_CLAMP		0xffff
 #define N_MEAN_CLAMP_VAL		0xffff
 #define REPORT_VAL  40

 #if __GNUC__
 #define ALWAYS_INLINE		__attribute__((always_inline))
 #else
 #define ALWAYS_INLINE
 #endif

 #ifdef __XS1B__
 #define UNALIGNED_MEM_ACCESS_NOT_SUPPORTED
 #endif

 /*	And on the subject of the CodeWarrior x86 compiler and inlining, I reworked a lot of this
 	to help the compiler out.   In many cases this required manual inlining or a macro.  Sorry
 	if it is ugly but the performance gains are well worth it.
 	- WSK 5/19/04
 */

 // note: implementing this with some kind of "count leading zeros" assembly is a big performance win
 static inline int32_t lead( int32_t m )
 {
 	long j;
 	unsigned long c = (1ul << 31);

 	for(j=0; j < 32; j++)
 	{
 		if((c & m) != 0)
 			break;
 		c >>= 1;
 	}
 	return (j);
 }

 #define arithmin(a, b) ((a) < (b) ? (a) : (b))

 static inline int32_t ALWAYS_INLINE lg3a( int32_t x)
 {
     int32_t result;

     x += 3;
     result = lead(x);

     return 31 - result;
 }

 static inline int32_t ALWAYS_INLINE abs_func( int32_t a )
 {
 	// note: the CW PPC intrinsic __abs() turns into these instructions so no need to try and use it
 	int32_t isneg  = a >> 31;
 	int32_t xorval = a ^ isneg;
 	int32_t result = xorval-isneg;

 	return result;
 }

 #ifdef UNALIGNED_MEM_ACCESS_NOT_SUPPORTED
 static inline uint32_t ALWAYS_INLINE readBE32bit( uint8_t * buffer )
 {
 	// embedded CPUs typically can't read unaligned 32-bit words so just read the bytes
 	uint32_t		value;

 	value = ((uint32_t)buffer[0] << 24) | ((uint32_t)buffer[1] << 16) |
 			 ((uint32_t)buffer[2] << 8) | (uint32_t)buffer[3];
 	return value;
 }
 static inline void ALWAYS_INLINE writeBE32bit( uint8_t * buffer, uint32_t value )
 {
         // embedded CPUs typically can't write unaligned 32-bit words so just write the bytes
         buffer[0] = (value >> 24) & 0xff;
         buffer[1] = (value >> 16) & 0xff;
         buffer[2] = (value >> 8) & 0xff;
         buffer[3] = value & 0xff;
 }
 #else
 static inline uint32_t ALWAYS_INLINE readBE32bit( uint8_t * buffer )
 {
         uint32_t *i = (uint32_t *)buffer;
         return Swap32NtoB( *i );
 }
 static inline void ALWAYS_INLINE writeBE32bit( uint8_t * buffer, uint32_t value )
 {
         uint32_t *i = (uint32_t *)buffer;
         *i = Swap32BtoN( value );
 }
 #endif

 #if PRAGMA_MARK
 #pragma mark -
 #endif

 static inline int32_t dyn_code(int32_t m, int32_t k, int32_t n, uint32_t *outNumBits)
 {
 	uint32_t 	div, mod, de;
 	uint32_t	numBits;
 	uint32_t	value;

 	//Assert( n >= 0 );

 	div = n/m;

 	if(div >= MAX_PREFIX_16)
 	{
 		numBits = MAX_PREFIX_16 + MAX_DATATYPE_BITS_16;
 		value = (((1<<MAX_PREFIX_16)-1)<<MAX_DATATYPE_BITS_16) + n;
 	}
 	else
 	{
 		mod = n%m;
 		de = (mod == 0);
 		numBits = div + k + 1 - de;
 		value = (((1<<div)-1)<<(numBits-div)) + mod + 1 - de;

 		// if coding this way is bigger than doing escape, then do escape
 		if (numBits > MAX_PREFIX_16 + MAX_DATATYPE_BITS_16)
 		{
 		    numBits = MAX_PREFIX_16 + MAX_DATATYPE_BITS_16;
 		    value = (((1<<MAX_PREFIX_16)-1)<<MAX_DATATYPE_BITS_16) + n;
 		}
 	}

 	*outNumBits = numBits;

 	return (int32_t) value;
 }


 static inline int32_t dyn_code_32bit(int32_t maxbits, uint32_t m, uint32_t k, uint32_t n, uint32_t *outNumBits, uint32_t *outValue, uint32_t *overflow, uint32_t *overflowbits)
 {
 	uint32_t 	div, mod, de;
 	uint32_t	numBits;
 	uint32_t	value;
 	int32_t			didOverflow = 0;

 	div = n/m;

 	if (div < MAX_PREFIX_32)
 	{
 		mod = n - (m * div);

 		de = (mod == 0);
 		numBits = div + k + 1 - de;
 		value = (((1<<div)-1)<<(numBits-div)) + mod + 1 - de;
 		if (numBits > 25)
 			goto codeasescape;
 	}
 	else
 	{
 codeasescape:
 		numBits = MAX_PREFIX_32;
 		value = (((1<<MAX_PREFIX_32)-1));
 		*overflow = n;
 		*overflowbits = maxbits;
 		didOverflow = 1;
 	}

 	*outNumBits = numBits;
 	*outValue = value;

 	return didOverflow;
 }


 static inline void ALWAYS_INLINE dyn_jam_noDeref(unsigned char *out, uint32_t bitPos, uint32_t numBits, uint32_t value)
 {
 	uint32_t	*i = (uint32_t *)(out + (bitPos >> 3));
 	uint32_t	mask;
 	uint32_t	curr;
 	uint32_t	shift;

 	//Assert( numBits <= 32 );

 	curr = readBE32bit((uint8_t*) i);

 	shift = 32 - (bitPos & 7) - numBits;

 	mask = ~0u >> (32 - numBits);		// mask must be created in two steps to avoid compiler sequencing ambiguity
 	mask <<= shift;

 	value  = (value << shift) & mask;
 	value |= curr & ~mask;

 	writeBE32bit((uint8_t*) i, value);
 }


 static inline void ALWAYS_INLINE dyn_jam_noDeref_large(unsigned char *out, uint32_t bitPos, uint32_t numBits, uint32_t value)
 {
 	uint32_t *	i = (uint32_t *)(out + (bitPos>>3));
 	uint32_t	w;
 	uint32_t	curr;
 	uint32_t	mask;
 	int32_t			shiftvalue = (32 - (bitPos&7) - numBits);

 	//Assert(numBits <= 32);

 	curr = readBE32bit((uint8_t*) i);

 	if (shiftvalue < 0)
 	{
 		uint8_t 	tailbyte;
 		uint8_t 	*tailptr;

 		w = value >> -shiftvalue;
 		mask = ~0u >> -shiftvalue;
 		w |= (curr & ~mask);

 		tailptr = ((uint8_t *)i) + 4;
 		tailbyte = (value << ((8+shiftvalue))) & 0xff;
 		*tailptr = (uint8_t)tailbyte;
 	}
 	else
 	{
 		mask = ~0u >> (32 - numBits);
 		mask <<= shiftvalue;			// mask must be created in two steps to avoid compiler sequencing ambiguity

 		w  = (value << shiftvalue) & mask;
 		w |= curr & ~mask;
 	}

 	writeBE32bit((uint8_t*) i, w);
 }


 int32_t dyn_comp( AGParamRecPtr params, int32_t * pc, BitBuffer * bitstream, int32_t numSamples, int32_t bitSize, uint32_t * outNumBits )
 {
     unsigned char *		out;
     uint32_t		bitPos, startPos;
     uint32_t			m, k, n, c, mz, nz;
     uint32_t		numBits;
     uint32_t			value;
     int32_t				del, zmode;
 	uint32_t		overflow, overflowbits;
     int32_t					status;

     // shadow the variables in params so there's not the dereferencing overhead
     uint32_t		mb, pb, kb, wb;
     int32_t					rowPos = 0;
     int32_t					rowSize = params->sw;
     int32_t					rowJump = (params->fw) - rowSize;
     int32_t *			inPtr = pc;

 	*outNumBits = 0;
 	RequireAction( (bitSize >= 1) && (bitSize <= 32), return kALAC_ParamError; );

 	out = bitstream->cur;
 	startPos = bitstream->bitIndex;
     bitPos = startPos;

     mb = params->mb = params->mb0;
     pb = params->pb;
     kb = params->kb;
     wb = params->wb;
     zmode = 0;

     c=0;
 	status = ALAC_noErr;

     while (c < numSamples)
     {
         m  = mb >> QBSHIFT;
         k = lg3a(m);
         if ( k > kb)
         {
         	k = kb;
         }
         m = (1<<k)-1;

         del = *inPtr++;
         rowPos++;

         n = (abs_func(del) << 1) - ((del >> 31) & 1) - zmode;
 		//Assert( 32-lead(n) <= bitSize );

 		if ( dyn_code_32bit(bitSize, m, k, n, &numBits, &value, &overflow, &overflowbits) )
 		{
 			dyn_jam_noDeref(out, bitPos, numBits, value);
 			bitPos += numBits;
 			dyn_jam_noDeref_large(out, bitPos, overflowbits, overflow);
 			bitPos += overflowbits;
 		}
 		else
 		{
 			dyn_jam_noDeref(out, bitPos, numBits, value);
 			bitPos += numBits;
 		}

         c++;
         if ( rowPos >= rowSize)
         {
         	rowPos = 0;
         	inPtr += rowJump;
         }

         mb = pb * (n + zmode) + mb - ((pb *mb)>>QBSHIFT);

 		// update mean tracking if it's overflowed
 		if (n > N_MAX_MEAN_CLAMP)
 			mb = N_MEAN_CLAMP_VAL;

         zmode = 0;

         RequireAction(c <= numSamples, status = kALAC_ParamError; goto Exit; );

         if (((mb << MMULSHIFT) < QB) && (c < numSamples))
         {
             zmode = 1;
             nz = 0;

             while(c<numSamples && *inPtr == 0)
             {
             	/* Take care of wrap-around globals. */
                 ++inPtr;
                 ++nz;
                 ++c;
                 if ( ++rowPos >= rowSize)
                 {
                 	rowPos = 0;
                 	inPtr += rowJump;
                 }

                 if(nz >= 65535)
                 {
                 	zmode = 0;
                 	break;
                 }
             }

             k = lead(mb) - BITOFF+((mb+MOFF)>>MDENSHIFT);
             mz = ((1<<k)-1) & wb;

             value = dyn_code(mz, k, nz, &numBits);
             dyn_jam_noDeref(out, bitPos, numBits, value);
             bitPos += numBits;

             mb = 0;
         }
     }

     *outNumBits = (bitPos - startPos);
 	BitBufferAdvance( bitstream, *outNumBits );

 Exit:
 	return status;
 }
	/*
	* Copyright (c) 2011 Apple Inc. All rights reserved.
	*
	* @APPLE_APACHE_LICENSE_HEADER_START@
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*
	* @APPLE_APACHE_LICENSE_HEADER_END@
	*/

	/*
	File: ag_enc.c

	Contains: Adaptive Golomb encode routines.

	Copyright: (c) 2001-2011 Apple, Inc.
	*/

	#include "aglib.h"
	#include "ALACBitUtilities.h"
	#include "EndianPortable.h"
	#include "ALACAudioTypes.h"

	#include <math.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#if __GNUC__ && TARGET_OS_MAC
	#if __POWERPC__
	#include <ppc_intrinsics.h>
	#else
	#include <libkern/OSByteOrder.h>
	#endif
	#endif

	#define CODE_TO_LONG_MAXBITS 32
	#define N_MAX_MEAN_CLAMP 0xffff
	#define N_MEAN_CLAMP_VAL 0xffff
	#define REPORT_VAL 40

	#if __GNUC__
	#define ALWAYS_INLINE __attribute__((always_inline))
	#else
	#define ALWAYS_INLINE
	#endif

	#ifdef __XS1B__
	#define UNALIGNED_MEM_ACCESS_NOT_SUPPORTED
	#endif

	/* And on the subject of the CodeWarrior x86 compiler and inlining, I reworked a lot of this
	to help the compiler out. In many cases this required manual inlining or a macro. Sorry
	if it is ugly but the performance gains are well worth it.
	- WSK 5/19/04
	*/

	// note: implementing this with some kind of "count leading zeros" assembly is a big performance win
	static inline int32_t lead( int32_t m )
	{
	long j;
	unsigned long c = (1ul << 31);

	for(j=0; j < 32; j++)
	{
	if((c & m) != 0)
	break;
	c >>= 1;
	}
	return (j);
	}

	#define arithmin(a, b) ((a) < (b) ? (a) : (b))

	static inline int32_t ALWAYS_INLINE lg3a( int32_t x)
	{
	int32_t result;

	x += 3;
	result = lead(x);

	return 31 - result;
	}

	static inline int32_t ALWAYS_INLINE abs_func( int32_t a )
	{
	// note: the CW PPC intrinsic __abs() turns into these instructions so no need to try and use it
	int32_t isneg = a >> 31;
	int32_t xorval = a ^ isneg;
	int32_t result = xorval-isneg;

	return result;
	}

	#ifdef UNALIGNED_MEM_ACCESS_NOT_SUPPORTED
	static inline uint32_t ALWAYS_INLINE readBE32bit( uint8_t * buffer )
	{
	// embedded CPUs typically can't read unaligned 32-bit words so just read the bytes
	uint32_t value;

	value = ((uint32_t)buffer[0] << 24) \| ((uint32_t)buffer[1] << 16) \|
	((uint32_t)buffer[2] << 8) \| (uint32_t)buffer[3];
	return value;
	}
	static inline void ALWAYS_INLINE writeBE32bit( uint8_t * buffer, uint32_t value )
	{
	// embedded CPUs typically can't write unaligned 32-bit words so just write the bytes
	buffer[0] = (value >> 24) & 0xff;
	buffer[1] = (value >> 16) & 0xff;
	buffer[2] = (value >> 8) & 0xff;
	buffer[3] = value & 0xff;
	}
	#else
	static inline uint32_t ALWAYS_INLINE readBE32bit( uint8_t * buffer )
	{
	uint32_t i = (uint32_t )buffer;
	return Swap32NtoB( *i );
	}
	static inline void ALWAYS_INLINE writeBE32bit( uint8_t * buffer, uint32_t value )
	{
	uint32_t i = (uint32_t )buffer;
	*i = Swap32BtoN( value );
	}
	#endif

	#if PRAGMA_MARK
	#pragma mark -
	#endif

	static inline int32_t dyn_code(int32_t m, int32_t k, int32_t n, uint32_t *outNumBits)
	{
	uint32_t div, mod, de;
	uint32_t numBits;
	uint32_t value;

	//Assert( n >= 0 );

	div = n/m;

	if(div >= MAX_PREFIX_16)
	{
	numBits = MAX_PREFIX_16 + MAX_DATATYPE_BITS_16;
	value = (((1<<MAX_PREFIX_16)-1)<<MAX_DATATYPE_BITS_16) + n;
	}
	else
	{
	mod = n%m;
	de = (mod == 0);
	numBits = div + k + 1 - de;
	value = (((1<<div)-1)<<(numBits-div)) + mod + 1 - de;

	// if coding this way is bigger than doing escape, then do escape
	if (numBits > MAX_PREFIX_16 + MAX_DATATYPE_BITS_16)
	{
	numBits = MAX_PREFIX_16 + MAX_DATATYPE_BITS_16;
	value = (((1<<MAX_PREFIX_16)-1)<<MAX_DATATYPE_BITS_16) + n;
	}
	}

	*outNumBits = numBits;

	return (int32_t) value;
	}


	static inline int32_t dyn_code_32bit(int32_t maxbits, uint32_t m, uint32_t k, uint32_t n, uint32_t outNumBits, uint32_t outValue, uint32_t overflow, uint32_t overflowbits)
	{
	uint32_t div, mod, de;
	uint32_t numBits;
	uint32_t value;
	int32_t didOverflow = 0;

	div = n/m;

	if (div < MAX_PREFIX_32)
	{
	mod = n - (m * div);

	de = (mod == 0);
	numBits = div + k + 1 - de;
	value = (((1<<div)-1)<<(numBits-div)) + mod + 1 - de;
	if (numBits > 25)
	goto codeasescape;
	}
	else
	{
	codeasescape:
	numBits = MAX_PREFIX_32;
	value = (((1<<MAX_PREFIX_32)-1));
	*overflow = n;
	*overflowbits = maxbits;
	didOverflow = 1;
	}

	*outNumBits = numBits;
	*outValue = value;

	return didOverflow;
	}


	static inline void ALWAYS_INLINE dyn_jam_noDeref(unsigned char *out, uint32_t bitPos, uint32_t numBits, uint32_t value)
	{
	uint32_t i = (uint32_t )(out + (bitPos >> 3));
	uint32_t mask;
	uint32_t curr;
	uint32_t shift;

	//Assert( numBits <= 32 );

	curr = readBE32bit((uint8_t*) i);

	shift = 32 - (bitPos & 7) - numBits;

	mask = ~0u >> (32 - numBits); // mask must be created in two steps to avoid compiler sequencing ambiguity
	mask <<= shift;

	value = (value << shift) & mask;
	value \|= curr & ~mask;

	writeBE32bit((uint8_t*) i, value);
	}


	static inline void ALWAYS_INLINE dyn_jam_noDeref_large(unsigned char *out, uint32_t bitPos, uint32_t numBits, uint32_t value)
	{
	uint32_t * i = (uint32_t *)(out + (bitPos>>3));
	uint32_t w;
	uint32_t curr;
	uint32_t mask;
	int32_t shiftvalue = (32 - (bitPos&7) - numBits);

	//Assert(numBits <= 32);

	curr = readBE32bit((uint8_t*) i);

	if (shiftvalue < 0)
	{
	uint8_t tailbyte;
	uint8_t *tailptr;

	w = value >> -shiftvalue;
	mask = ~0u >> -shiftvalue;
	w \|= (curr & ~mask);

	tailptr = ((uint8_t *)i) + 4;
	tailbyte = (value << ((8+shiftvalue))) & 0xff;
	*tailptr = (uint8_t)tailbyte;
	}
	else
	{
	mask = ~0u >> (32 - numBits);
	mask <<= shiftvalue; // mask must be created in two steps to avoid compiler sequencing ambiguity

	w = (value << shiftvalue) & mask;
	w \|= curr & ~mask;
	}

	writeBE32bit((uint8_t*) i, w);
	}


	int32_t dyn_comp( AGParamRecPtr params, int32_t * pc, BitBuffer * bitstream, int32_t numSamples, int32_t bitSize, uint32_t * outNumBits )
	{
	unsigned char * out;
	uint32_t bitPos, startPos;
	uint32_t m, k, n, c, mz, nz;
	uint32_t numBits;
	uint32_t value;
	int32_t del, zmode;
	uint32_t overflow, overflowbits;
	int32_t status;

	// shadow the variables in params so there's not the dereferencing overhead
	uint32_t mb, pb, kb, wb;
	int32_t rowPos = 0;
	int32_t rowSize = params->sw;
	int32_t rowJump = (params->fw) - rowSize;
	int32_t * inPtr = pc;

	*outNumBits = 0;
	RequireAction( (bitSize >= 1) && (bitSize <= 32), return kALAC_ParamError; );

	out = bitstream->cur;
	startPos = bitstream->bitIndex;
	bitPos = startPos;

	mb = params->mb = params->mb0;
	pb = params->pb;
	kb = params->kb;
	wb = params->wb;
	zmode = 0;

	c=0;
	status = ALAC_noErr;

	while (c < numSamples)
	{
	m = mb >> QBSHIFT;
	k = lg3a(m);
	if ( k > kb)
	{
	k = kb;
	}
	m = (1<<k)-1;

	del = *inPtr++;
	rowPos++;

	n = (abs_func(del) << 1) - ((del >> 31) & 1) - zmode;
	//Assert( 32-lead(n) <= bitSize );

	if ( dyn_code_32bit(bitSize, m, k, n, &numBits, &value, &overflow, &overflowbits) )
	{
	dyn_jam_noDeref(out, bitPos, numBits, value);
	bitPos += numBits;
	dyn_jam_noDeref_large(out, bitPos, overflowbits, overflow);
	bitPos += overflowbits;
	}
	else
	{
	dyn_jam_noDeref(out, bitPos, numBits, value);
	bitPos += numBits;
	}

	c++;
	if ( rowPos >= rowSize)
	{
	rowPos = 0;
	inPtr += rowJump;
	}

	mb = pb * (n + zmode) + mb - ((pb *mb)>>QBSHIFT);

	// update mean tracking if it's overflowed
	if (n > N_MAX_MEAN_CLAMP)
	mb = N_MEAN_CLAMP_VAL;

	zmode = 0;

	RequireAction(c <= numSamples, status = kALAC_ParamError; goto Exit; );

	if (((mb << MMULSHIFT) < QB) && (c < numSamples))
	{
	zmode = 1;
	nz = 0;

	while(c<numSamples && *inPtr == 0)
	{
	/* Take care of wrap-around globals. */
	++inPtr;
	++nz;
	++c;
	if ( ++rowPos >= rowSize)
	{
	rowPos = 0;
	inPtr += rowJump;
	}

	if(nz >= 65535)
	{
	zmode = 0;
	break;
	}
	}

	k = lead(mb) - BITOFF+((mb+MOFF)>>MDENSHIFT);
	mz = ((1<<k)-1) & wb;

	value = dyn_code(mz, k, nz, &numBits);
	dyn_jam_noDeref(out, bitPos, numBits, value);
	bitPos += numBits;

	mb = 0;
	}
	}

	*outNumBits = (bitPos - startPos);
	BitBufferAdvance( bitstream, *outNumBits );

	Exit:
	return status;
	}