blob: 4b59482014e2ac047d5a60d5029ce01b4f9653bf [file] [log] [blame]
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include <assert.h>
#include <string.h> // For memcpy and memset.
#include "libyuv/basic_types.h"
#include "libyuv/convert_argb.h" // For kYuvI601Constants
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#ifdef __cplusplus
#define STATIC_CAST(type, expr) static_cast<type>(expr)
#else
#define STATIC_CAST(type, expr) (type)(expr)
#endif
// This macro controls YUV to RGB using unsigned math to extend range of
// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
// LIBYUV_UNLIMITED_DATA
// Macros to enable unlimited data for each colorspace
// LIBYUV_UNLIMITED_BT601
// LIBYUV_UNLIMITED_BT709
// LIBYUV_UNLIMITED_BT2020
// The following macro from row_win makes the C code match the row_win code,
// which is 7 bit fixed point for ARGBToI420:
#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
defined(_MSC_VER) && !defined(__clang__) && \
(defined(_M_IX86) || defined(_M_X64))
#define LIBYUV_RGB7 1
#endif
#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
defined(__i386__) || defined(_M_IX86))
#define LIBYUV_ARGBTOUV_PAVGB 1
#define LIBYUV_RGBTOU_TRUNCATE 1
#endif
#if defined(LIBYUV_BIT_EXACT)
#define LIBYUV_UNATTENUATE_DUP 1
#endif
// llvm x86 is poor at ternary operator, so use branchless min/max.
#define USE_BRANCHLESS 1
#if defined(USE_BRANCHLESS)
static __inline int32_t clamp0(int32_t v) {
return -(v >= 0) & v;
}
// TODO(fbarchard): make clamp255 preserve negative values.
static __inline int32_t clamp255(int32_t v) {
return (-(v >= 255) | v) & 255;
}
static __inline int32_t clamp1023(int32_t v) {
return (-(v >= 1023) | v) & 1023;
}
// clamp to max
static __inline int32_t ClampMax(int32_t v, int32_t max) {
return (-(v >= max) | v) & max;
}
static __inline uint32_t Abs(int32_t v) {
int m = -(v < 0);
return (v + m) ^ m;
}
#else // USE_BRANCHLESS
static __inline int32_t clamp0(int32_t v) {
return (v < 0) ? 0 : v;
}
static __inline int32_t clamp255(int32_t v) {
return (v > 255) ? 255 : v;
}
static __inline int32_t clamp1023(int32_t v) {
return (v > 1023) ? 1023 : v;
}
static __inline int32_t ClampMax(int32_t v, int32_t max) {
return (v > max) ? max : v;
}
static __inline uint32_t Abs(int32_t v) {
return (v < 0) ? -v : v;
}
#endif // USE_BRANCHLESS
static __inline uint32_t Clamp(int32_t val) {
int v = clamp0(val);
return (uint32_t)(clamp255(v));
}
static __inline uint32_t Clamp10(int32_t val) {
int v = clamp0(val);
return (uint32_t)(clamp1023(v));
}
// Little Endian
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define WRITEWORD(p, v) *(uint32_t*)(p) = v
#else
static inline void WRITEWORD(uint8_t* p, uint32_t v) {
p[0] = (uint8_t)(v & 255);
p[1] = (uint8_t)((v >> 8) & 255);
p[2] = (uint8_t)((v >> 16) & 255);
p[3] = (uint8_t)((v >> 24) & 255);
}
#endif
void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_rgb24[0];
uint8_t g = src_rgb24[1];
uint8_t r = src_rgb24[2];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = 255u;
dst_argb += 4;
src_rgb24 += 3;
}
}
void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t r = src_raw[0];
uint8_t g = src_raw[1];
uint8_t b = src_raw[2];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = 255u;
dst_argb += 4;
src_raw += 3;
}
}
void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t r = src_raw[0];
uint8_t g = src_raw[1];
uint8_t b = src_raw[2];
dst_rgba[0] = 255u;
dst_rgba[1] = b;
dst_rgba[2] = g;
dst_rgba[3] = r;
dst_rgba += 4;
src_raw += 3;
}
}
void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t r = src_raw[0];
uint8_t g = src_raw[1];
uint8_t b = src_raw[2];
dst_rgb24[0] = b;
dst_rgb24[1] = g;
dst_rgb24[2] = r;
dst_rgb24 += 3;
src_raw += 3;
}
}
void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
uint8_t g = STATIC_CAST(
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_argb[3] = 255u;
dst_argb += 4;
src_rgb565 += 2;
}
}
void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
uint8_t* dst_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
uint8_t g = STATIC_CAST(
uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
uint8_t r = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
uint8_t a = STATIC_CAST(uint8_t, src_argb1555[1] >> 7);
dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
dst_argb[1] = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_argb[3] = -a;
dst_argb += 4;
src_argb1555 += 2;
}
}
void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
uint8_t* dst_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = STATIC_CAST(uint8_t, src_argb4444[0] & 0x0f);
uint8_t g = STATIC_CAST(uint8_t, src_argb4444[0] >> 4);
uint8_t r = STATIC_CAST(uint8_t, src_argb4444[1] & 0x0f);
uint8_t a = STATIC_CAST(uint8_t, src_argb4444[1] >> 4);
dst_argb[0] = STATIC_CAST(uint8_t, (b << 4) | b);
dst_argb[1] = STATIC_CAST(uint8_t, (g << 4) | g);
dst_argb[2] = STATIC_CAST(uint8_t, (r << 4) | r);
dst_argb[3] = STATIC_CAST(uint8_t, (a << 4) | a);
dst_argb += 4;
src_argb4444 += 2;
}
}
void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32_t ar30;
memcpy(&ar30, src_ar30, sizeof ar30);
uint32_t b = (ar30 >> 2) & 0xff;
uint32_t g = (ar30 >> 12) & 0xff;
uint32_t r = (ar30 >> 22) & 0xff;
uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
*(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
dst_argb += 4;
src_ar30 += 4;
}
}
void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32_t ar30;
memcpy(&ar30, src_ar30, sizeof ar30);
uint32_t b = (ar30 >> 2) & 0xff;
uint32_t g = (ar30 >> 12) & 0xff;
uint32_t r = (ar30 >> 22) & 0xff;
uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
*(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
dst_abgr += 4;
src_ar30 += 4;
}
}
void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32_t ar30;
memcpy(&ar30, src_ar30, sizeof ar30);
uint32_t b = ar30 & 0x3ff;
uint32_t ga = ar30 & 0xc00ffc00;
uint32_t r = (ar30 >> 20) & 0x3ff;
*(uint32_t*)(dst_ab30) = r | ga | (b << 20);
dst_ab30 += 4;
src_ar30 += 4;
}
}
void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb[0];
uint8_t g = src_argb[1];
uint8_t r = src_argb[2];
uint8_t a = src_argb[3];
dst_abgr[0] = r;
dst_abgr[1] = g;
dst_abgr[2] = b;
dst_abgr[3] = a;
dst_abgr += 4;
src_argb += 4;
}
}
void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb[0];
uint8_t g = src_argb[1];
uint8_t r = src_argb[2];
uint8_t a = src_argb[3];
dst_bgra[0] = a;
dst_bgra[1] = r;
dst_bgra[2] = g;
dst_bgra[3] = b;
dst_bgra += 4;
src_argb += 4;
}
}
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb[0];
uint8_t g = src_argb[1];
uint8_t r = src_argb[2];
uint8_t a = src_argb[3];
dst_rgba[0] = a;
dst_rgba[1] = b;
dst_rgba[2] = g;
dst_rgba[3] = r;
dst_rgba += 4;
src_argb += 4;
}
}
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb[0];
uint8_t g = src_argb[1];
uint8_t r = src_argb[2];
dst_rgb[0] = b;
dst_rgb[1] = g;
dst_rgb[2] = r;
dst_rgb += 3;
src_argb += 4;
}
}
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb[0];
uint8_t g = src_argb[1];
uint8_t r = src_argb[2];
dst_rgb[0] = r;
dst_rgb[1] = g;
dst_rgb[2] = b;
dst_rgb += 3;
src_argb += 4;
}
}
void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t a = src_rgba[0];
uint8_t b = src_rgba[1];
uint8_t g = src_rgba[2];
uint8_t r = src_rgba[3];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = a;
dst_argb += 4;
src_rgba += 4;
}
}
void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = src_argb[0] >> 3;
uint8_t g0 = src_argb[1] >> 2;
uint8_t r0 = src_argb[2] >> 3;
uint8_t b1 = src_argb[4] >> 3;
uint8_t g1 = src_argb[5] >> 2;
uint8_t r1 = src_argb[6] >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
(r1 << 27));
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
uint8_t b0 = src_argb[0] >> 3;
uint8_t g0 = src_argb[1] >> 2;
uint8_t r0 = src_argb[2] >> 3;
*(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
}
}
// dither4 is a row of 4 values from 4x4 dither matrix.
// The 4x4 matrix contains values to increase RGB. When converting to
// fewer bits (565) this provides an ordered dither.
// The order in the 4x4 matrix in first byte is upper left.
// The 4 values are passed as an int, then referenced as an array, so
// endian will not affect order of the original matrix. But the dither4
// will containing the first pixel in the lower byte for little endian
// or the upper byte for big endian.
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
uint8_t* dst_rgb,
uint32_t dither4,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
int dither0 = ((const unsigned char*)(&dither4))[x & 3];
int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
uint8_t b1 = STATIC_CAST(uint8_t, clamp255(src_argb[4] + dither1) >> 3);
uint8_t g1 = STATIC_CAST(uint8_t, clamp255(src_argb[5] + dither1) >> 2);
uint8_t r1 = STATIC_CAST(uint8_t, clamp255(src_argb[6] + dither1) >> 3);
*(uint16_t*)(dst_rgb + 0) =
STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
*(uint16_t*)(dst_rgb + 2) =
STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
*(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
}
}
void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = src_argb[0] >> 3;
uint8_t g0 = src_argb[1] >> 3;
uint8_t r0 = src_argb[2] >> 3;
uint8_t a0 = src_argb[3] >> 7;
uint8_t b1 = src_argb[4] >> 3;
uint8_t g1 = src_argb[5] >> 3;
uint8_t r1 = src_argb[6] >> 3;
uint8_t a1 = src_argb[7] >> 7;
*(uint16_t*)(dst_rgb + 0) =
STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
*(uint16_t*)(dst_rgb + 2) =
STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | (a1 << 15));
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
uint8_t b0 = src_argb[0] >> 3;
uint8_t g0 = src_argb[1] >> 3;
uint8_t r0 = src_argb[2] >> 3;
uint8_t a0 = src_argb[3] >> 7;
*(uint16_t*)(dst_rgb) =
STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
}
}
void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = src_argb[0] >> 4;
uint8_t g0 = src_argb[1] >> 4;
uint8_t r0 = src_argb[2] >> 4;
uint8_t a0 = src_argb[3] >> 4;
uint8_t b1 = src_argb[4] >> 4;
uint8_t g1 = src_argb[5] >> 4;
uint8_t r1 = src_argb[6] >> 4;
uint8_t a1 = src_argb[7] >> 4;
*(uint16_t*)(dst_rgb + 0) =
STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
*(uint16_t*)(dst_rgb + 2) =
STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | (a1 << 12));
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
uint8_t b0 = src_argb[0] >> 4;
uint8_t g0 = src_argb[1] >> 4;
uint8_t r0 = src_argb[2] >> 4;
uint8_t a0 = src_argb[3] >> 4;
*(uint16_t*)(dst_rgb) =
STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
}
}
void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32_t r0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
uint32_t b0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
uint32_t a0 = (src_abgr[3] >> 6);
*(uint32_t*)(dst_ar30) =
STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
dst_ar30 += 4;
src_abgr += 4;
}
}
void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
uint32_t a0 = (src_argb[3] >> 6);
*(uint32_t*)(dst_ar30) =
STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
dst_ar30 += 4;
src_argb += 4;
}
}
void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
int x;
for (x = 0; x < width; ++x) {
uint16_t b = src_argb[0] * 0x0101;
uint16_t g = src_argb[1] * 0x0101;
uint16_t r = src_argb[2] * 0x0101;
uint16_t a = src_argb[3] * 0x0101;
dst_ar64[0] = b;
dst_ar64[1] = g;
dst_ar64[2] = r;
dst_ar64[3] = a;
dst_ar64 += 4;
src_argb += 4;
}
}
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
int x;
for (x = 0; x < width; ++x) {
uint16_t b = src_argb[0] * 0x0101;
uint16_t g = src_argb[1] * 0x0101;
uint16_t r = src_argb[2] * 0x0101;
uint16_t a = src_argb[3] * 0x0101;
dst_ab64[0] = r;
dst_ab64[1] = g;
dst_ab64[2] = b;
dst_ab64[3] = a;
dst_ab64 += 4;
src_argb += 4;
}
}
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_ar64[0] >> 8;
uint8_t g = src_ar64[1] >> 8;
uint8_t r = src_ar64[2] >> 8;
uint8_t a = src_ar64[3] >> 8;
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = a;
dst_argb += 4;
src_ar64 += 4;
}
}
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t r = src_ab64[0] >> 8;
uint8_t g = src_ab64[1] >> 8;
uint8_t b = src_ab64[2] >> 8;
uint8_t a = src_ab64[3] >> 8;
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = a;
dst_argb += 4;
src_ab64 += 4;
}
}
void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) {
int x;
for (x = 0; x < width; ++x) {
uint16_t b = src_ar64[0];
uint16_t g = src_ar64[1];
uint16_t r = src_ar64[2];
uint16_t a = src_ar64[3];
dst_ab64[0] = r;
dst_ab64[1] = g;
dst_ab64[2] = b;
dst_ab64[3] = a;
dst_ab64 += 4;
src_ar64 += 4;
}
}
// TODO(fbarchard): Make shuffle compatible with SIMD versions
void AR64ShuffleRow_C(const uint8_t* src_ar64,
uint8_t* dst_ar64,
const uint8_t* shuffler,
int width) {
const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64;
uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64;
int index0 = shuffler[0] / 2;
int index1 = shuffler[2] / 2;
int index2 = shuffler[4] / 2;
int index3 = shuffler[6] / 2;
// Shuffle a row of AR64.
int x;
for (x = 0; x < width / 2; ++x) {
// To support in-place conversion.
uint16_t b = src_ar64_16[index0];
uint16_t g = src_ar64_16[index1];
uint16_t r = src_ar64_16[index2];
uint16_t a = src_ar64_16[index3];
dst_ar64_16[0] = b;
dst_ar64_16[1] = g;
dst_ar64_16[2] = r;
dst_ar64_16[3] = a;
src_ar64_16 += 4;
dst_ar64_16 += 4;
}
}
#ifdef LIBYUV_RGB7
// Old 7 bit math for compatibility on unsupported platforms.
static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
return STATIC_CAST(uint8_t, ((33 * r + 65 * g + 13 * b) >> 7) + 16);
}
#else
// 8 bit
// Intel SSE/AVX uses the following equivalent formula
// 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
// return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
// 0x7e80) >> 8;
static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
}
#endif
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
// LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
#ifdef LIBYUV_RGBTOU_TRUNCATE
static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
}
static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
}
#else
// TODO(fbarchard): Add rounding to x86 SIMD and use this
static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8);
}
static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8);
}
#endif
// LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
return STATIC_CAST(
uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8);
}
static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
return STATIC_CAST(
uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8);
}
#endif
// ARGBToY_C and ARGBToUV_C
// Intel version mimic SSE/AVX which does 2 pavgb
#if defined(LIBYUV_ARGBTOUV_PAVGB)
#define MAKEROWY(NAME, R, G, B, BPP) \
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
src_rgb += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
dst_u[0] = RGBToU(ar, ag, ab); \
dst_v[0] = RGBToV(ar, ag, ab); \
src_rgb += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
dst_u[0] = RGBToU(ar, ag, ab); \
dst_v[0] = RGBToV(ar, ag, ab); \
} \
}
#else
// ARM version does sum / 2 then multiply by 2x smaller coefficients
#define MAKEROWY(NAME, R, G, B, BPP) \
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
src_rgb += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
src_rgb1[B + BPP] + 1) >> \
1; \
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
src_rgb1[G + BPP] + 1) >> \
1; \
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
src_rgb1[R + BPP] + 1) >> \
1; \
dst_u[0] = RGB2xToU(ar, ag, ab); \
dst_v[0] = RGB2xToV(ar, ag, ab); \
src_rgb += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint16_t ab = src_rgb[B] + src_rgb1[B]; \
uint16_t ag = src_rgb[G] + src_rgb1[G]; \
uint16_t ar = src_rgb[R] + src_rgb1[R]; \
dst_u[0] = RGB2xToU(ar, ag, ab); \
dst_v[0] = RGB2xToV(ar, ag, ab); \
} \
}
#endif
MAKEROWY(ARGB, 2, 1, 0, 4)
MAKEROWY(BGRA, 1, 2, 3, 4)
MAKEROWY(ABGR, 0, 1, 2, 4)
MAKEROWY(RGBA, 3, 2, 1, 4)
MAKEROWY(RGB24, 2, 1, 0, 3)
MAKEROWY(RAW, 0, 1, 2, 3)
#undef MAKEROWY
// JPeg uses a variation on BT.601-1 full range
// y = 0.29900 * r + 0.58700 * g + 0.11400 * b
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
// BT.601 Mpeg range uses:
// b 0.1016 * 255 = 25.908 = 25
// g 0.5078 * 255 = 129.489 = 129
// r 0.2578 * 255 = 65.739 = 66
// JPeg 7 bit Y (deprecated)
// b 0.11400 * 128 = 14.592 = 15
// g 0.58700 * 128 = 75.136 = 75
// r 0.29900 * 128 = 38.272 = 38
// JPeg 8 bit Y:
// b 0.11400 * 256 = 29.184 = 29
// g 0.58700 * 256 = 150.272 = 150
// r 0.29900 * 256 = 76.544 = 77
// JPeg 8 bit U:
// b 0.50000 * 255 = 127.5 = 127
// g -0.33126 * 255 = -84.4713 = -84
// r -0.16874 * 255 = -43.0287 = -43
// JPeg 8 bit V:
// b -0.08131 * 255 = -20.73405 = -20
// g -0.41869 * 255 = -106.76595 = -107
// r 0.50000 * 255 = 127.5 = 127
#ifdef LIBYUV_RGB7
// Old 7 bit math for compatibility on unsupported platforms.
static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
return (38 * r + 75 * g + 15 * b + 64) >> 7;
}
#else
// 8 bit
static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
return (77 * r + 150 * g + 29 * b + 128) >> 8;
}
#endif
#if defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
}
static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
}
#else
static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
}
static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
}
#endif
// ARGBToYJ_C and ARGBToUVJ_C
// Intel version mimic SSE/AVX which does 2 pavgb
#if defined(LIBYUV_ARGBTOUV_PAVGB)
#define MAKEROWYJ(NAME, R, G, B, BPP) \
void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
src_rgb += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
dst_u[0] = RGBToUJ(ar, ag, ab); \
dst_v[0] = RGBToVJ(ar, ag, ab); \
src_rgb += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
dst_u[0] = RGBToUJ(ar, ag, ab); \
dst_v[0] = RGBToVJ(ar, ag, ab); \
} \
}
#else
// ARM version does sum / 2 then multiply by 2x smaller coefficients
#define MAKEROWYJ(NAME, R, G, B, BPP) \
void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
int x; \
for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
src_rgb += BPP; \
dst_y += 1; \
} \
} \
void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
int x; \
for (x = 0; x < width - 1; x += 2) { \
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
src_rgb1[B + BPP] + 1) >> \
1; \
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
src_rgb1[G + BPP] + 1) >> \
1; \
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
src_rgb1[R + BPP] + 1) >> \
1; \
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
src_rgb += BPP * 2; \
src_rgb1 += BPP * 2; \
dst_u += 1; \
dst_v += 1; \
} \
if (width & 1) { \
uint16_t ab = (src_rgb[B] + src_rgb1[B]); \
uint16_t ag = (src_rgb[G] + src_rgb1[G]); \
uint16_t ar = (src_rgb[R] + src_rgb1[R]); \
dst_u[0] = RGB2xToUJ(ar, ag, ab); \
dst_v[0] = RGB2xToVJ(ar, ag, ab); \
} \
}
#endif
MAKEROWYJ(ARGB, 2, 1, 0, 4)
MAKEROWYJ(ABGR, 0, 1, 2, 4)
MAKEROWYJ(RGBA, 3, 2, 1, 4)
MAKEROWYJ(RGB24, 2, 1, 0, 3)
MAKEROWYJ(RAW, 0, 1, 2, 3)
#undef MAKEROWYJ
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_rgb565[0] & 0x1f;
uint8_t g = STATIC_CAST(
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
uint8_t r = src_rgb565[1] >> 3;
b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_y[0] = RGBToY(r, g, b);
src_rgb565 += 2;
dst_y += 1;
}
}
void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb1555[0] & 0x1f;
uint8_t g = STATIC_CAST(
uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
g = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_y[0] = RGBToY(r, g, b);
src_argb1555 += 2;
dst_y += 1;
}
}
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb4444[0] & 0x0f;
uint8_t g = src_argb4444[0] >> 4;
uint8_t r = src_argb4444[1] & 0x0f;
b = STATIC_CAST(uint8_t, (b << 4) | b);
g = STATIC_CAST(uint8_t, (g << 4) | g);
r = STATIC_CAST(uint8_t, (r << 4) | r);
dst_y[0] = RGBToY(r, g, b);
src_argb4444 += 2;
dst_y += 1;
}
}
void RGB565ToUVRow_C(const uint8_t* src_rgb565,
int src_stride_rgb565,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
uint8_t g0 = STATIC_CAST(
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f);
uint8_t g1 = STATIC_CAST(
uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3));
uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3);
uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
uint8_t g2 = STATIC_CAST(
uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f);
uint8_t g3 = STATIC_CAST(
uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3));
uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3);
b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4));
r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4));
r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
#if defined(LIBYUV_ARGBTOUV_PAVGB)
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
src_rgb565 += 4;
next_rgb565 += 4;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
uint8_t g0 = STATIC_CAST(
uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
uint8_t g2 = STATIC_CAST(
uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
#if defined(LIBYUV_ARGBTOUV_PAVGB)
uint8_t ab = AVGB(b0, b2);
uint8_t ag = AVGB(g0, g2);
uint8_t ar = AVGB(r0, r2);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = b0 + b2;
uint16_t g = g0 + g2;
uint16_t r = r0 + r2;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
}
}
void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
int src_stride_argb1555,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
uint8_t g0 = STATIC_CAST(
uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
uint8_t b1 = STATIC_CAST(uint8_t, src_argb1555[2] & 0x1f);
uint8_t g1 = STATIC_CAST(
uint8_t, (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3));
uint8_t r1 = STATIC_CAST(uint8_t, (src_argb1555[3] & 0x7c) >> 2);
uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
uint8_t g2 = STATIC_CAST(
uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
uint8_t b3 = STATIC_CAST(uint8_t, next_argb1555[2] & 0x1f);
uint8_t g3 = STATIC_CAST(
uint8_t, (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3));
uint8_t r3 = STATIC_CAST(uint8_t, (next_argb1555[3] & 0x7c) >> 2);
b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
g1 = STATIC_CAST(uint8_t, (g1 << 3) | (g1 >> 2));
r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
g3 = STATIC_CAST(uint8_t, (g3 << 3) | (g3 >> 2));
r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
#if defined(LIBYUV_ARGBTOUV_PAVGB)
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
src_argb1555 += 4;
next_argb1555 += 4;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
uint8_t g0 = STATIC_CAST(
uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
uint8_t g2 = STATIC_CAST(
uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
#if defined(LIBYUV_ARGBTOUV_PAVGB)
uint8_t ab = AVGB(b0, b2);
uint8_t ag = AVGB(g0, g2);
uint8_t ar = AVGB(r0, r2);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = b0 + b2;
uint16_t g = g0 + g2;
uint16_t r = r0 + r2;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
}
}
void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
int src_stride_argb4444,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 = src_argb4444[0] & 0x0f;
uint8_t g0 = src_argb4444[0] >> 4;
uint8_t r0 = src_argb4444[1] & 0x0f;
uint8_t b1 = src_argb4444[2] & 0x0f;
uint8_t g1 = src_argb4444[2] >> 4;
uint8_t r1 = src_argb4444[3] & 0x0f;
uint8_t b2 = next_argb4444[0] & 0x0f;
uint8_t g2 = next_argb4444[0] >> 4;
uint8_t r2 = next_argb4444[1] & 0x0f;
uint8_t b3 = next_argb4444[2] & 0x0f;
uint8_t g3 = next_argb4444[2] >> 4;
uint8_t r3 = next_argb4444[3] & 0x0f;
b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
b1 = STATIC_CAST(uint8_t, (b1 << 4) | b1);
g1 = STATIC_CAST(uint8_t, (g1 << 4) | g1);
r1 = STATIC_CAST(uint8_t, (r1 << 4) | r1);
b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
b3 = STATIC_CAST(uint8_t, (b3 << 4) | b3);
g3 = STATIC_CAST(uint8_t, (g3 << 4) | g3);
r3 = STATIC_CAST(uint8_t, (r3 << 4) | r3);
#if defined(LIBYUV_ARGBTOUV_PAVGB)
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
src_argb4444 += 4;
next_argb4444 += 4;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8_t b0 = src_argb4444[0] & 0x0f;
uint8_t g0 = src_argb4444[0] >> 4;
uint8_t r0 = src_argb4444[1] & 0x0f;
uint8_t b2 = next_argb4444[0] & 0x0f;
uint8_t g2 = next_argb4444[0] >> 4;
uint8_t r2 = next_argb4444[1] & 0x0f;
b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
#if defined(LIBYUV_ARGBTOUV_PAVGB)
uint8_t ab = AVGB(b0, b2);
uint8_t ag = AVGB(g0, g2);
uint8_t ar = AVGB(r0, r2);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = b0 + b2;
uint16_t g = g0 + g2;
uint16_t r = r0 + r2;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
}
}
void ARGBToUV444Row_C(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t ab = src_argb[0];
uint8_t ag = src_argb[1];
uint8_t ar = src_argb[2];
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
src_argb += 4;
dst_u += 1;
dst_v += 1;
}
}
void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
dst_argb[3] = src_argb[3];
dst_argb += 4;
src_argb += 4;
}
}
// Convert a row of image to Sepia tone.
void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
int sb = (b * 17 + g * 68 + r * 35) >> 7;
int sg = (b * 22 + g * 88 + r * 45) >> 7;
int sr = (b * 24 + g * 98 + r * 50) >> 7;
// b does not over flow. a is preserved from original.
dst_argb[0] = STATIC_CAST(uint8_t, sb);
dst_argb[1] = STATIC_CAST(uint8_t, clamp255(sg));
dst_argb[2] = STATIC_CAST(uint8_t, clamp255(sr));
dst_argb += 4;
}
}
// Apply color matrix to a row of image. Matrix is signed.
// TODO(fbarchard): Consider adding rounding (+32).
void ARGBColorMatrixRow_C(const uint8_t* src_argb,
uint8_t* dst_argb,
const int8_t* matrix_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
int b = src_argb[0];
int g = src_argb[1];
int r = src_argb[2];
int a = src_argb[3];
int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
a * matrix_argb[3]) >>
6;
int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
a * matrix_argb[7]) >>
6;
int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
a * matrix_argb[11]) >>
6;
int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
a * matrix_argb[15]) >>
6;
dst_argb[0] = STATIC_CAST(uint8_t, Clamp(sb));
dst_argb[1] = STATIC_CAST(uint8_t, Clamp(sg));
dst_argb[2] = STATIC_CAST(uint8_t, Clamp(sr));
dst_argb[3] = STATIC_CAST(uint8_t, Clamp(sa));
src_argb += 4;
dst_argb += 4;
}
}
// Apply color table to a row of image.
void ARGBColorTableRow_C(uint8_t* dst_argb,
const uint8_t* table_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
int a = dst_argb[3];
dst_argb[0] = table_argb[b * 4 + 0];
dst_argb[1] = table_argb[g * 4 + 1];
dst_argb[2] = table_argb[r * 4 + 2];
dst_argb[3] = table_argb[a * 4 + 3];
dst_argb += 4;
}
}
// Apply color table to a row of image.
void RGBColorTableRow_C(uint8_t* dst_argb,
const uint8_t* table_argb,
int width) {
int x;
for (x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
dst_argb[0] = table_argb[b * 4 + 0];
dst_argb[1] = table_argb[g * 4 + 1];
dst_argb[2] = table_argb[r * 4 + 2];
dst_argb += 4;
}
}
void ARGBQuantizeRow_C(uint8_t* dst_argb,
int scale,
int interval_size,
int interval_offset,
int width) {
int x;
for (x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
dst_argb[0] = STATIC_CAST(
uint8_t, (b * scale >> 16) * interval_size + interval_offset);
dst_argb[1] = STATIC_CAST(
uint8_t, (g * scale >> 16) * interval_size + interval_offset);
dst_argb[2] = STATIC_CAST(
uint8_t, (r * scale >> 16) * interval_size + interval_offset);
dst_argb += 4;
}
}
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v* f >> 24
void ARGBShadeRow_C(const uint8_t* src_argb,
uint8_t* dst_argb,
int width,
uint32_t value) {
const uint32_t b_scale = REPEAT8(value & 0xff);
const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
const uint32_t a_scale = REPEAT8(value >> 24);
int i;
for (i = 0; i < width; ++i) {
const uint32_t b = REPEAT8(src_argb[0]);
const uint32_t g = REPEAT8(src_argb[1]);
const uint32_t r = REPEAT8(src_argb[2]);
const uint32_t a = REPEAT8(src_argb[3]);
dst_argb[0] = SHADE(b, b_scale);
dst_argb[1] = SHADE(g, g_scale);
dst_argb[2] = SHADE(r, r_scale);
dst_argb[3] = SHADE(a, a_scale);
src_argb += 4;
dst_argb += 4;
}
}
#undef REPEAT8
#undef SHADE
void ARGBMultiplyRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
const uint32_t b = src_argb[0];
const uint32_t g = src_argb[1];
const uint32_t r = src_argb[2];
const uint32_t a = src_argb[3];
const uint32_t b_scale = src_argb1[0];
const uint32_t g_scale = src_argb1[1];
const uint32_t r_scale = src_argb1[2];
const uint32_t a_scale = src_argb1[3];
dst_argb[0] = STATIC_CAST(uint8_t, (b * b_scale + 128) >> 8);
dst_argb[1] = STATIC_CAST(uint8_t, (g * g_scale + 128) >> 8);
dst_argb[2] = STATIC_CAST(uint8_t, (r * r_scale + 128) >> 8);
dst_argb[3] = STATIC_CAST(uint8_t, (a * a_scale + 128) >> 8);
src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
}
}
#define SHADE(f, v) clamp255(v + f)
void ARGBAddRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
const int b = src_argb[0];
const int g = src_argb[1];
const int r = src_argb[2];
const int a = src_argb[3];
const int b_add = src_argb1[0];
const int g_add = src_argb1[1];
const int r_add = src_argb1[2];
const int a_add = src_argb1[3];
dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_add));
dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_add));
dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_add));
dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_add));
src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
}
}
#undef SHADE
#define SHADE(f, v) clamp0(f - v)
void ARGBSubtractRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
const int b = src_argb[0];
const int g = src_argb[1];
const int r = src_argb[2];
const int a = src_argb[3];
const int b_sub = src_argb1[0];
const int g_sub = src_argb1[1];
const int r_sub = src_argb1[2];
const int a_sub = src_argb1[3];
dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_sub));
dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_sub));
dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_sub));
dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_sub));
src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
}
}
#undef SHADE
// Sobel functions which mimics SSSE3.
void SobelXRow_C(const uint8_t* src_y0,
const uint8_t* src_y1,
const uint8_t* src_y2,
uint8_t* dst_sobelx,
int width) {
int i;
for (i = 0; i < width; ++i) {
int a = src_y0[i];
int b = src_y1[i];
int c = src_y2[i];
int a_sub = src_y0[i + 2];
int b_sub = src_y1[i + 2];
int c_sub = src_y2[i + 2];
int a_diff = a - a_sub;
int b_diff = b - b_sub;
int c_diff = c - c_sub;
int sobel = Abs(a_diff + b_diff * 2 + c_diff);
dst_sobelx[i] = (uint8_t)(clamp255(sobel));
}
}
void SobelYRow_C(const uint8_t* src_y0,
const uint8_t* src_y1,
uint8_t* dst_sobely,
int width) {
int i;
for (i = 0; i < width; ++i) {
int a = src_y0[i + 0];
int b = src_y0[i + 1];
int c = src_y0[i + 2];
int a_sub = src_y1[i + 0];
int b_sub = src_y1[i + 1];
int c_sub = src_y1[i + 2];
int a_diff = a - a_sub;
int b_diff = b - b_sub;
int c_diff = c - c_sub;
int sobel = Abs(a_diff + b_diff * 2 + c_diff);
dst_sobely[i] = (uint8_t)(clamp255(sobel));
}
}
void SobelRow_C(const uint8_t* src_sobelx,
const uint8_t* src_sobely,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
int r = src_sobelx[i];
int b = src_sobely[i];
int s = clamp255(r + b);
dst_argb[0] = (uint8_t)(s);
dst_argb[1] = (uint8_t)(s);
dst_argb[2] = (uint8_t)(s);
dst_argb[3] = (uint8_t)(255u);
dst_argb += 4;
}
}
void SobelToPlaneRow_C(const uint8_t* src_sobelx,
const uint8_t* src_sobely,
uint8_t* dst_y,
int width) {
int i;
for (i = 0; i < width; ++i) {
int r = src_sobelx[i];
int b = src_sobely[i];
int s = clamp255(r + b);
dst_y[i] = (uint8_t)(s);
}
}
void SobelXYRow_C(const uint8_t* src_sobelx,
const uint8_t* src_sobely,
uint8_t* dst_argb,
int width) {
int i;
for (i = 0; i < width; ++i) {
int r = src_sobelx[i];
int b = src_sobely[i];
int g = clamp255(r + b);
dst_argb[0] = (uint8_t)(b);
dst_argb[1] = (uint8_t)(g);
dst_argb[2] = (uint8_t)(r);
dst_argb[3] = (uint8_t)(255u);
dst_argb += 4;
}
}
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// Copy a Y to RGB.
int x;
for (x = 0; x < width; ++x) {
uint8_t y = src_y[0];
dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
dst_argb[3] = 255u;
dst_argb += 4;
++src_y;
}
}
// Macros to create SIMD specific yuv to rgb conversion constants.
// clang-format off
#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
// Bias values include subtract 128 from U and V, bias from Y and rounding.
// For B and R bias is negative. For G bias is positive.
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
{{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
{YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \
0, 0}}
#else
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
{{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, \
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, \
{UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
{0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, \
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, \
{YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
{YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
#endif
// clang-format on
#define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR) \
const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR); \
const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
// TODO(fbarchard): Generate SIMD structures from float matrix.
// BT.601 limited range YUV to RGB reference
// R = (Y - 16) * 1.164 + V * 1.596
// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
// B = (Y - 16) * 1.164 + U * 2.018
// KR = 0.299; KB = 0.114
// U and V contributions to R,G,B.
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT601)
#define UB 129 /* round(2.018 * 64) */
#else
#define UB 128 /* max(128, round(2.018 * 64)) */
#endif
#define UG 25 /* round(0.391 * 64) */
#define VG 52 /* round(0.813 * 64) */
#define VR 102 /* round(1.596 * 64) */
// Y contribution to R,G,B. Scale and bias.
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
// BT.601 full range YUV to RGB reference (aka JPEG)
// * R = Y + V * 1.40200
// * G = Y - U * 0.34414 - V * 0.71414
// * B = Y + U * 1.77200
// KR = 0.299; KB = 0.114
// U and V contributions to R,G,B.
#define UB 113 /* round(1.77200 * 64) */
#define UG 22 /* round(0.34414 * 64) */
#define VG 46 /* round(0.71414 * 64) */
#define VR 90 /* round(1.40200 * 64) */
// Y contribution to R,G,B. Scale and bias.
#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
#define YB 32 /* 64 / 2 */
MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
// BT.709 limited range YUV to RGB reference
// R = (Y - 16) * 1.164 + V * 1.793
// G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
// B = (Y - 16) * 1.164 + U * 2.112
// KR = 0.2126, KB = 0.0722
// U and V contributions to R,G,B.
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT709)
#define UB 135 /* round(2.112 * 64) */
#else
#define UB 128 /* max(128, round(2.112 * 64)) */
#endif
#define UG 14 /* round(0.213 * 64) */
#define VG 34 /* round(0.533 * 64) */
#define VR 115 /* round(1.793 * 64) */
// Y contribution to R,G,B. Scale and bias.
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
// BT.709 full range YUV to RGB reference
// R = Y + V * 1.5748
// G = Y - U * 0.18732 - V * 0.46812
// B = Y + U * 1.8556
// KR = 0.2126, KB = 0.0722
// U and V contributions to R,G,B.
#define UB 119 /* round(1.8556 * 64) */
#define UG 12 /* round(0.18732 * 64) */
#define VG 30 /* round(0.46812 * 64) */
#define VR 101 /* round(1.5748 * 64) */
// Y contribution to R,G,B. Scale and bias. (same as jpeg)
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
#define YB 32 /* 64 / 2 */
MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
// BT.2020 limited range YUV to RGB reference
// R = (Y - 16) * 1.164384 + V * 1.67867
// G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
// B = (Y - 16) * 1.164384 + U * 2.14177
// KR = 0.2627; KB = 0.0593
// U and V contributions to R,G,B.
#if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT2020)
#define UB 137 /* round(2.142 * 64) */
#else
#define UB 128 /* max(128, round(2.142 * 64)) */
#endif
#define UG 12 /* round(0.187326 * 64) */
#define VG 42 /* round(0.65042 * 64) */
#define VR 107 /* round(1.67867 * 64) */
// Y contribution to R,G,B. Scale and bias.
#define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
#define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
// BT.2020 full range YUV to RGB reference
// R = Y + V * 1.474600
// G = Y - U * 0.164553 - V * 0.571353
// B = Y + U * 1.881400
// KR = 0.2627; KB = 0.0593
#define UB 120 /* round(1.881400 * 64) */
#define UG 11 /* round(0.164553 * 64) */
#define VG 37 /* round(0.571353 * 64) */
#define VR 94 /* round(1.474600 * 64) */
// Y contribution to R,G,B. Scale and bias. (same as jpeg)
#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
#define YB 32 /* 64 / 2 */
MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
#undef YG
#undef YB
#undef UB
#undef UG
#undef VG
#undef VR
#undef BB
#undef BG
#undef BR
#undef MAKEYUVCONSTANTS
#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
#define LOAD_YUV_CONSTANTS \
int ub = yuvconstants->kUVCoeff[0]; \
int vr = yuvconstants->kUVCoeff[1]; \
int ug = yuvconstants->kUVCoeff[2]; \
int vg = yuvconstants->kUVCoeff[3]; \
int yg = yuvconstants->kRGBCoeffBias[0]; \
int bb = yuvconstants->kRGBCoeffBias[1]; \
int bg = yuvconstants->kRGBCoeffBias[2]; \
int br = yuvconstants->kRGBCoeffBias[3]
#define CALC_RGB16 \
int32_t y1 = (uint32_t)(y32 * yg) >> 16; \
int b16 = y1 + (u * ub) - bb; \
int g16 = y1 + bg - (u * ug + v * vg); \
int r16 = y1 + (v * vr) - br
#else
#define LOAD_YUV_CONSTANTS \
int ub = yuvconstants->kUVToB[0]; \
int ug = yuvconstants->kUVToG[0]; \
int vg = yuvconstants->kUVToG[1]; \
int vr = yuvconstants->kUVToR[1]; \
int yg = yuvconstants->kYToRgb[0]; \
int yb = yuvconstants->kYBiasToRgb[0]
#define CALC_RGB16 \
int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
int8_t ui = (int8_t)u; \
int8_t vi = (int8_t)v; \
ui -= 0x80; \
vi -= 0x80; \
int b16 = y1 + (ui * ub); \
int g16 = y1 - (ui * ug + vi * vg); \
int r16 = y1 + (vi * vr)
#endif
// C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel(uint8_t y,
uint8_t u,
uint8_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = y * 0x0101;
CALC_RGB16;
*b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
*g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
*r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
}
// Reads 8 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel8_16(uint8_t y,
uint8_t u,
uint8_t v,
int* b,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = y * 0x0101;
CALC_RGB16;
*b = b16;
*g = g16;
*r = r16;
}
// C reference code that mimics the YUV 16 bit assembly.
// Reads 10 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel10_16(uint16_t y,
uint16_t u,
uint16_t v,
int* b,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = (y << 6) | (y >> 4);
u = STATIC_CAST(uint8_t, clamp255(u >> 2));
v = STATIC_CAST(uint8_t, clamp255(v >> 2));
CALC_RGB16;
*b = b16;
*g = g16;
*r = r16;
}
// C reference code that mimics the YUV 16 bit assembly.
// Reads 12 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel12_16(int16_t y,
int16_t u,
int16_t v,
int* b,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = (y << 4) | (y >> 8);
u = STATIC_CAST(uint8_t, clamp255(u >> 4));
v = STATIC_CAST(uint8_t, clamp255(v >> 4));
CALC_RGB16;
*b = b16;
*g = g16;
*r = r16;
}
// C reference code that mimics the YUV 10 bit assembly.
// Reads 10 bit YUV and clamps down to 8 bit RGB.
static __inline void YuvPixel10(uint16_t y,
uint16_t u,
uint16_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
int b16;
int g16;
int r16;
YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
*b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
*g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
*r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
}
// C reference code that mimics the YUV 12 bit assembly.
// Reads 12 bit YUV and clamps down to 8 bit RGB.
static __inline void YuvPixel12(uint16_t y,
uint16_t u,
uint16_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
int b16;
int g16;
int r16;
YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
*b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
*g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
*r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
}
// C reference code that mimics the YUV 16 bit assembly.
// Reads 16 bit YUV and leaves result as 8 bit.
static __inline void YuvPixel16_8(uint16_t y,
uint16_t u,
uint16_t v,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = y;
u = STATIC_CAST(uint16_t, clamp255(u >> 8));
v = STATIC_CAST(uint16_t, clamp255(v >> 8));
CALC_RGB16;
*b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
*g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
*r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
}
// C reference code that mimics the YUV 16 bit assembly.
// Reads 16 bit YUV and leaves result as 16 bit.
static __inline void YuvPixel16_16(uint16_t y,
uint16_t u,
uint16_t v,
int* b,
int* g,
int* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = y;
u = STATIC_CAST(uint16_t, clamp255(u >> 8));
v = STATIC_CAST(uint16_t, clamp255(v >> 8));
CALC_RGB16;
*b = b16;
*g = g16;
*r = r16;
}
// C reference code that mimics the YUV assembly.
// Reads 8 bit YUV and leaves result as 8 bit.
static __inline void YPixel(uint8_t y,
uint8_t* b,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
int yg = yuvconstants->kRGBCoeffBias[0];
int ygb = yuvconstants->kRGBCoeffBias[4];
#else
int ygb = yuvconstants->kYBiasToRgb[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
uint8_t b8 = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
*b = b8;
*g = b8;
*r = b8;
}
void I444ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
src_y += 1;
src_u += 1;
src_v += 1;
rgb_buf += 4; // Advance 1 pixel.
}
}
void I444ToRGB24Row_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
src_y += 1;
src_u += 1;
src_v += 1;
rgb_buf += 3; // Advance 1 pixel.
}
}
// Also used for 420
void I422ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
// 10 bit YUV to ARGB
void I210ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
void I410ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
src_y += 1;
src_u += 1;
src_v += 1;
rgb_buf += 4; // Advance 1 pixels.
}
}
void I210AlphaToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = STATIC_CAST(uint8_t, clamp255(src_a[1] >> 2));
src_y += 2;
src_u += 1;
src_v += 1;
src_a += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
}
}
void I410AlphaToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
const uint16_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
src_y += 1;
src_u += 1;
src_v += 1;
src_a += 1;
rgb_buf += 4; // Advance 1 pixels.
}
}
// 12 bit YUV to ARGB
void I212ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
uint32_t ar30;
b = b >> 4; // convert 8 bit 10.6 to 10 bit.
g = g >> 4;
r = r >> 4;
b = Clamp10(b);
g = Clamp10(g);
r = Clamp10(r);
ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
(*(uint32_t*)rgb_buf) = ar30;
}
// 10 bit YUV to 10 bit AR30
void I210ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width - 1; x += 2) {
YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf + 4, b, g, r);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
}
}
// 12 bit YUV to 10 bit AR30
void I212ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width - 1; x += 2) {
YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf + 4, b, g, r);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
}
}
void I410ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width; ++x) {
YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
src_y += 1;
src_u += 1;
src_v += 1;
rgb_buf += 4; // Advance 1 pixel.
}
}
// P210 has 10 bits in msb of 16 bit NV12 style layout.
void P210ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_uv,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
dst_argb + 2, yuvconstants);
dst_argb[3] = 255;
YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], dst_argb + 4, dst_argb + 5,
dst_argb + 6, yuvconstants);
dst_argb[7] = 255;
src_y += 2;
src_uv += 2;
dst_argb += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
dst_argb + 2, yuvconstants);
dst_argb[3] = 255;
}
}
void P410ToARGBRow_C(const uint16_t* src_y,
const uint16_t* src_uv,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
dst_argb + 2, yuvconstants);
dst_argb[3] = 255;
src_y += 1;
src_uv += 2;
dst_argb += 4; // Advance 1 pixels.
}
}
void P210ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_uv,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width - 1; x += 2) {
YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
StoreAR30(dst_ar30, b, g, r);
YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
StoreAR30(dst_ar30 + 4, b, g, r);
src_y += 2;
src_uv += 2;
dst_ar30 += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
StoreAR30(dst_ar30, b, g, r);
}
}
void P410ToAR30Row_C(const uint16_t* src_y,
const uint16_t* src_uv,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width; ++x) {
YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
StoreAR30(dst_ar30, b, g, r);
src_y += 1;
src_uv += 2;
dst_ar30 += 4; // Advance 1 pixel.
}
}
// 8 bit YUV to 10 bit AR30
// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
void I422ToAR30Row_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
int b;
int g;
int r;
for (x = 0; x < width - 1; x += 2) {
YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf + 4, b, g, r);
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
StoreAR30(rgb_buf, b, g, r);
}
}
void I444AlphaToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
const uint8_t* src_a,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x)