Added Direct3D shader to hardware accelerate YV12 and IYUV textures.
diff --git a/src/render/direct3d/SDL_render_d3d.c b/src/render/direct3d/SDL_render_d3d.c
index 1c3abc1..95a3d74 100644
--- a/src/render/direct3d/SDL_render_d3d.c
+++ b/src/render/direct3d/SDL_render_d3d.c
@@ -169,6 +169,32 @@
LPD3DXBUFFER* ppShader,
LPD3DXBUFFER* ppErrorMsgs);
+static void PrintShaderData(LPDWORD shader_data, DWORD shader_size)
+{
+ OutputDebugStringA("const DWORD shader_data[] = {\n\t");
+ {
+ SDL_bool newline = SDL_FALSE;
+ unsigned i;
+ for (i = 0; i < shader_size / sizeof(DWORD); ++i) {
+ char dword[11];
+ if (i > 0) {
+ if ((i%6) == 0) {
+ newline = SDL_TRUE;
+ }
+ if (newline) {
+ OutputDebugStringA(",\n ");
+ newline = SDL_FALSE;
+ } else {
+ OutputDebugStringA(", ");
+ }
+ }
+ SDL_snprintf(dword, sizeof(dword), "0x%8.8x", shader_data[i]);
+ OutputDebugStringA(dword);
+ }
+ OutputDebugStringA("\n};\n");
+ }
+}
+
#endif /* ASSEMBLE_SHADER */
@@ -228,17 +254,26 @@
SDL_bool updateSize;
SDL_bool beginScene;
SDL_bool enableSeparateAlphaBlend;
- D3DTEXTUREFILTERTYPE scaleMode;
+ D3DTEXTUREFILTERTYPE scaleMode[8];
IDirect3DSurface9 *defaultRenderTarget;
IDirect3DSurface9 *currentRenderTarget;
void* d3dxDLL;
ID3DXMatrixStack *matrixStack;
+ LPDIRECT3DPIXELSHADER9 ps_yuv;
} D3D_RenderData;
typedef struct
{
IDirect3DTexture9 *texture;
D3DTEXTUREFILTERTYPE scaleMode;
+
+ /* YV12 texture support */
+ SDL_bool yuv;
+ IDirect3DTexture9 *utexture;
+ IDirect3DTexture9 *vtexture;
+ Uint8 *pixels;
+ int pitch;
+ SDL_Rect locked_rect;
} D3D_TextureData;
typedef struct
@@ -337,6 +372,9 @@
return D3DFMT_X8R8G8B8;
case SDL_PIXELFORMAT_ARGB8888:
return D3DFMT_A8R8G8B8;
+ case SDL_PIXELFORMAT_YV12:
+ case SDL_PIXELFORMAT_IYUV:
+ return D3DFMT_L8;
default:
return D3DFMT_UNKNOWN;
}
@@ -385,7 +423,7 @@
D3DCULL_NONE);
IDirect3DDevice9_SetRenderState(data->device, D3DRS_LIGHTING, FALSE);
IDirect3DDevice9_GetRenderTarget(data->device, 0, &data->defaultRenderTarget);
- data->scaleMode = D3DTEXF_FORCE_DWORD;
+ SDL_memset(data->scaleMode, 0xFF, sizeof(data->scaleMode));
return 0;
}
@@ -587,7 +625,7 @@
return NULL;
}
data->beginScene = SDL_TRUE;
- data->scaleMode = D3DTEXF_FORCE_DWORD;
+ SDL_memset(data->scaleMode, 0xFF, sizeof(data->scaleMode));
/* Get presentation parameters to fill info */
result = IDirect3DDevice9_GetSwapChain(data->device, 0, &chain);
@@ -676,6 +714,136 @@
IDirect3DDevice9_SetTransform(data->device, D3DTS_WORLD, &matrix);
IDirect3DDevice9_SetTransform(data->device, D3DTS_VIEW, &matrix);
+ if (caps.MaxSimultaneousTextures >= 3)
+ {
+#ifdef ASSEMBLE_SHADER
+ /* This shader was created by running the following HLSL through the fxc compiler
+ and then tuning the generated assembly.
+
+ fxc /T fx_4_0 /O3 /Gfa /Fc yuv.fxc yuv.fx
+
+ --- yuv.fx ---
+ Texture2D g_txY;
+ Texture2D g_txU;
+ Texture2D g_txV;
+
+ SamplerState samLinear
+ {
+ Filter = ANISOTROPIC;
+ AddressU = Clamp;
+ AddressV = Clamp;
+ MaxAnisotropy = 1;
+ };
+
+ struct VS_OUTPUT
+ {
+ float2 TextureUV : TEXCOORD0;
+ };
+
+ struct PS_OUTPUT
+ {
+ float4 RGBAColor : SV_Target;
+ };
+
+ PS_OUTPUT YUV420( VS_OUTPUT In )
+ {
+ const float3 offset = {-0.0625, -0.5, -0.5};
+ const float3 Rcoeff = {1.164, 0.000, 1.596};
+ const float3 Gcoeff = {1.164, -0.391, -0.813};
+ const float3 Bcoeff = {1.164, 2.018, 0.000};
+
+ PS_OUTPUT Output;
+ float2 TextureUV = In.TextureUV;
+
+ float3 yuv;
+ yuv.x = g_txY.Sample( samLinear, TextureUV ).r;
+ yuv.y = g_txU.Sample( samLinear, TextureUV ).r;
+ yuv.z = g_txV.Sample( samLinear, TextureUV ).r;
+
+ yuv += offset;
+ Output.RGBAColor.r = dot(yuv, Rcoeff);
+ Output.RGBAColor.g = dot(yuv, Gcoeff);
+ Output.RGBAColor.b = dot(yuv, Bcoeff);
+ Output.RGBAColor.a = 1.0f;
+
+ return Output;
+ }
+
+ technique10 RenderYUV420
+ {
+ pass P0
+ {
+ SetPixelShader( CompileShader( ps_4_0_level_9_0, YUV420() ) );
+ }
+ }
+ */
+ const char *shader_text =
+ "ps_2_0\n"
+ "def c0, -0.0625, -0.5, -0.5, 1\n"
+ "def c1, 1.16400003, 0, 1.59599996, 0\n"
+ "def c2, 1.16400003, -0.391000003, -0.813000023, 0\n"
+ "def c3, 1.16400003, 2.01799989, 0, 0\n"
+ "dcl t0.xy\n"
+ "dcl v0.xyzw\n"
+ "dcl_2d s0\n"
+ "dcl_2d s1\n"
+ "dcl_2d s2\n"
+ "texld r0, t0, s0\n"
+ "texld r1, t0, s1\n"
+ "texld r2, t0, s2\n"
+ "mov r0.y, r1.x\n"
+ "mov r0.z, r2.x\n"
+ "add r0.xyz, r0, c0\n"
+ "dp3 r1.x, r0, c1\n"
+ "dp3 r1.y, r0, c2\n"
+ "dp2add r1.z, r0, c3, c3.z\n" /* Logically this is "dp3 r1.z, r0, c3" but the optimizer did its magic */
+ "mov r1.w, c0.w\n"
+ "mul r0, r1, v0\n" /* Not in the HLSL, multiply by vertex color */
+ "mov oC0, r0\n"
+ ;
+ LPD3DXBUFFER pCode;
+ LPD3DXBUFFER pErrorMsgs;
+ LPDWORD shader_data = NULL;
+ DWORD shader_size = 0;
+ result = D3DXAssembleShader(shader_text, SDL_strlen(shader_text), NULL, NULL, 0, &pCode, &pErrorMsgs);
+ if (!FAILED(result)) {
+ shader_data = (DWORD*)pCode->lpVtbl->GetBufferPointer(pCode);
+ shader_size = pCode->lpVtbl->GetBufferSize(pCode);
+ PrintShaderData(shader_data, shader_size);
+ } else {
+ const char *error = (const char *)pErrorMsgs->lpVtbl->GetBufferPointer(pErrorMsgs);
+ SDL_SetError("Couldn't assemble shader: %s", error);
+ }
+#else
+ const DWORD shader_data[] = {
+ 0xffff0200, 0x05000051, 0xa00f0000, 0xbd800000, 0xbf000000, 0xbf000000,
+ 0x3f800000, 0x05000051, 0xa00f0001, 0x3f94fdf4, 0x00000000, 0x3fcc49ba,
+ 0x00000000, 0x05000051, 0xa00f0002, 0x3f94fdf4, 0xbec83127, 0xbf5020c5,
+ 0x00000000, 0x05000051, 0xa00f0003, 0x3f94fdf4, 0x400126e9, 0x00000000,
+ 0x00000000, 0x0200001f, 0x80000000, 0xb0030000, 0x0200001f, 0x80000000,
+ 0x900f0000, 0x0200001f, 0x90000000, 0xa00f0800, 0x0200001f, 0x90000000,
+ 0xa00f0801, 0x0200001f, 0x90000000, 0xa00f0802, 0x03000042, 0x800f0000,
+ 0xb0e40000, 0xa0e40800, 0x03000042, 0x800f0001, 0xb0e40000, 0xa0e40801,
+ 0x03000042, 0x800f0002, 0xb0e40000, 0xa0e40802, 0x02000001, 0x80020000,
+ 0x80000001, 0x02000001, 0x80040000, 0x80000002, 0x03000002, 0x80070000,
+ 0x80e40000, 0xa0e40000, 0x03000008, 0x80010001, 0x80e40000, 0xa0e40001,
+ 0x03000008, 0x80020001, 0x80e40000, 0xa0e40002, 0x0400005a, 0x80040001,
+ 0x80e40000, 0xa0e40003, 0xa0aa0003, 0x02000001, 0x80080001, 0xa0ff0000,
+ 0x03000005, 0x800f0000, 0x80e40001, 0x90e40000, 0x02000001, 0x800f0800,
+ 0x80e40000, 0x0000ffff
+ };
+#endif
+ if (shader_data) {
+ result = IDirect3DDevice9_CreatePixelShader(data->device, shader_data, &data->ps_yuv);
+ if (!FAILED(result)) {
+ renderer->info.texture_formats[renderer->info.num_texture_formats++] = SDL_PIXELFORMAT_YV12;
+ renderer->info.texture_formats[renderer->info.num_texture_formats++] = SDL_PIXELFORMAT_IYUV;
+ } else {
+ D3D_SetError("CreatePixelShader()", result);
+ }
+ }
+ }
+
return renderer;
}
@@ -744,6 +912,70 @@
return D3D_SetError("CreateTexture()", result);
}
+ if (texture->format == SDL_PIXELFORMAT_YV12 ||
+ texture->format == SDL_PIXELFORMAT_IYUV) {
+ data->yuv = SDL_TRUE;
+
+ result =
+ IDirect3DDevice9_CreateTexture(renderdata->device, texture->w / 2,
+ texture->h / 2, 1, usage,
+ PixelFormatToD3DFMT(texture->format),
+ pool, &data->utexture, NULL);
+ if (FAILED(result)) {
+ return D3D_SetError("CreateTexture()", result);
+ }
+
+ result =
+ IDirect3DDevice9_CreateTexture(renderdata->device, texture->w / 2,
+ texture->h / 2, 1, usage,
+ PixelFormatToD3DFMT(texture->format),
+ pool, &data->vtexture, NULL);
+ if (FAILED(result)) {
+ return D3D_SetError("CreateTexture()", result);
+ }
+ }
+
+ return 0;
+}
+
+static int
+D3D_UpdateTextureInternal(IDirect3DTexture9 *texture, Uint32 format, SDL_bool full_texture, int x, int y, int w, int h, const void *pixels, int pitch)
+{
+ RECT d3drect;
+ D3DLOCKED_RECT locked;
+ const Uint8 *src;
+ Uint8 *dst;
+ int row, length;
+ HRESULT result;
+
+ if (full_texture) {
+ result = IDirect3DTexture9_LockRect(texture, 0, &locked, NULL, D3DLOCK_DISCARD);
+ } else {
+ d3drect.left = x;
+ d3drect.right = x + w;
+ d3drect.top = y;
+ d3drect.bottom = y + h;
+ result = IDirect3DTexture9_LockRect(texture, 0, &locked, &d3drect, 0);
+ }
+
+ if (FAILED(result)) {
+ return D3D_SetError("LockRect()", result);
+ }
+
+ src = (const Uint8 *)pixels;
+ dst = locked.pBits;
+ length = w * SDL_BYTESPERPIXEL(format);
+ if (length == pitch && length == locked.Pitch) {
+ SDL_memcpy(dst, src, length*h);
+ } else {
+ for (row = 0; row < h; ++row) {
+ SDL_memcpy(dst, src, length);
+ src += pitch;
+ dst += locked.Pitch;
+ }
+ }
+ IDirect3DTexture9_UnlockRect(texture, 0);
+
return 0;
}
@@ -752,46 +984,34 @@
const SDL_Rect * rect, const void *pixels, int pitch)
{
D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
- RECT d3drect;
- D3DLOCKED_RECT locked;
- const Uint8 *src;
- Uint8 *dst;
- int row, length;
- HRESULT result;
+ SDL_bool full_texture = SDL_FALSE;
#ifdef USE_DYNAMIC_TEXTURE
if (texture->access == SDL_TEXTUREACCESS_STREAMING &&
rect->x == 0 && rect->y == 0 &&
rect->w == texture->w && rect->h == texture->h) {
- result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, NULL, D3DLOCK_DISCARD);
- } else
+ full_texture = SDL_TRUE;
+ }
#endif
- {
- d3drect.left = rect->x;
- d3drect.right = rect->x + rect->w;
- d3drect.top = rect->y;
- d3drect.bottom = rect->y + rect->h;
- result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
+
+ if (D3D_UpdateTextureInternal(data->texture, texture->format, full_texture, rect->x, rect->y, rect->w, rect->h, pixels, pitch) < 0) {
+ return -1;
}
- if (FAILED(result)) {
- return D3D_SetError("LockRect()", result);
- }
+ if (data->yuv) {
+ /* Skip to the correct offset into the next texture */
+ pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
- src = pixels;
- dst = locked.pBits;
- length = rect->w * SDL_BYTESPERPIXEL(texture->format);
- if (length == pitch && length == locked.Pitch) {
- SDL_memcpy(dst, src, length*rect->h);
- } else {
- for (row = 0; row < rect->h; ++row) {
- SDL_memcpy(dst, src, length);
- src += pitch;
- dst += locked.Pitch;
+ if (D3D_UpdateTextureInternal(texture->format == SDL_PIXELFORMAT_YV12 ? data->vtexture : data->utexture, texture->format, full_texture, rect->x / 2, rect->y / 2, rect->w / 2, rect->h / 2, pixels, pitch / 2) < 0) {
+ return -1;
+ }
+
+ /* Skip to the correct offset into the next texture */
+ pixels = (const void*)((const Uint8*)pixels + (rect->h * pitch)/4);
+ if (D3D_UpdateTextureInternal(texture->format == SDL_PIXELFORMAT_YV12 ? data->utexture : data->vtexture, texture->format, full_texture, rect->x / 2, rect->y / 2, rect->w / 2, rect->h / 2, pixels, pitch / 2) < 0) {
+ return -1;
}
}
- IDirect3DTexture9_UnlockRect(data->texture, 0);
-
return 0;
}
@@ -804,17 +1024,33 @@
D3DLOCKED_RECT locked;
HRESULT result;
- d3drect.left = rect->x;
- d3drect.right = rect->x + rect->w;
- d3drect.top = rect->y;
- d3drect.bottom = rect->y + rect->h;
+ if (data->yuv) {
+ // It's more efficient to upload directly...
+ if (!data->pixels) {
+ data->pitch = texture->w;
+ data->pixels = (Uint8 *)SDL_malloc((texture->h * data->pitch * 3) / 2);
+ if (!data->pixels) {
+ return SDL_OutOfMemory();
+ }
+ }
+ data->locked_rect = *rect;
+ *pixels =
+ (void *) ((Uint8 *) data->pixels + rect->y * data->pitch +
+ rect->x * SDL_BYTESPERPIXEL(texture->format));
+ *pitch = data->pitch;
+ } else {
+ d3drect.left = rect->x;
+ d3drect.right = rect->x + rect->w;
+ d3drect.top = rect->y;
+ d3drect.bottom = rect->y + rect->h;
- result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
- if (FAILED(result)) {
- return D3D_SetError("LockRect()", result);
+ result = IDirect3DTexture9_LockRect(data->texture, 0, &locked, &d3drect, 0);
+ if (FAILED(result)) {
+ return D3D_SetError("LockRect()", result);
+ }
+ *pixels = locked.pBits;
+ *pitch = locked.Pitch;
}
- *pixels = locked.pBits;
- *pitch = locked.Pitch;
return 0;
}
@@ -823,7 +1059,15 @@
{
D3D_TextureData *data = (D3D_TextureData *) texture->driverdata;
- IDirect3DTexture9_UnlockRect(data->texture, 0);
+ if (data->yuv) {
+ const SDL_Rect *rect = &data->locked_rect;
+ void *pixels =
+ (void *) ((Uint8 *) data->pixels + rect->y * data->pitch +
+ rect->x * SDL_BYTESPERPIXEL(texture->format));
+ D3D_UpdateTexture(renderer, texture, rect, pixels, data->pitch);
+ } else {
+ IDirect3DTexture9_UnlockRect(data->texture, 0);
+ }
}
static int
@@ -1196,6 +1440,18 @@
return 0;
}
+static void
+D3D_UpdateTextureScaleMode(D3D_RenderData *data, D3D_TextureData *texturedata, unsigned index)
+{
+ if (texturedata->scaleMode != data->scaleMode[index]) {
+ IDirect3DDevice9_SetSamplerState(data->device, index, D3DSAMP_MINFILTER,
+ texturedata->scaleMode);
+ IDirect3DDevice9_SetSamplerState(data->device, index, D3DSAMP_MAGFILTER,
+ texturedata->scaleMode);
+ data->scaleMode[index] = texturedata->scaleMode;
+ }
+}
+
static int
D3D_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * srcrect, const SDL_FRect * dstrect)
@@ -1255,13 +1511,7 @@
D3D_SetBlendMode(data, texture->blendMode);
- if (texturedata->scaleMode != data->scaleMode) {
- IDirect3DDevice9_SetSamplerState(data->device, 0, D3DSAMP_MINFILTER,
- texturedata->scaleMode);
- IDirect3DDevice9_SetSamplerState(data->device, 0, D3DSAMP_MAGFILTER,
- texturedata->scaleMode);
- data->scaleMode = texturedata->scaleMode;
- }
+ D3D_UpdateTextureScaleMode(data, texturedata, 0);
result =
IDirect3DDevice9_SetTexture(data->device, 0, (IDirect3DBaseTexture9 *)
@@ -1269,6 +1519,28 @@
if (FAILED(result)) {
return D3D_SetError("SetTexture()", result);
}
+
+ if (texturedata->yuv) {
+ shader = data->ps_yuv;
+
+ D3D_UpdateTextureScaleMode(data, texturedata, 1);
+ D3D_UpdateTextureScaleMode(data, texturedata, 2);
+
+ result =
+ IDirect3DDevice9_SetTexture(data->device, 1, (IDirect3DBaseTexture9 *)
+ texturedata->utexture);
+ if (FAILED(result)) {
+ return D3D_SetError("SetTexture()", result);
+ }
+
+ result =
+ IDirect3DDevice9_SetTexture(data->device, 2, (IDirect3DBaseTexture9 *)
+ texturedata->vtexture);
+ if (FAILED(result)) {
+ return D3D_SetError("SetTexture()", result);
+ }
+ }
+
if (shader) {
result = IDirect3DDevice9_SetPixelShader(data->device, shader);
if (FAILED(result)) {
@@ -1375,13 +1647,7 @@
ID3DXMatrixStack_Translate(data->matrixStack, (float)dstrect->x + centerx, (float)dstrect->y + centery, (float)0.0);
IDirect3DDevice9_SetTransform(data->device, D3DTS_VIEW, (D3DMATRIX*)ID3DXMatrixStack_GetTop(data->matrixStack));
- if (texturedata->scaleMode != data->scaleMode) {
- IDirect3DDevice9_SetSamplerState(data->device, 0, D3DSAMP_MINFILTER,
- texturedata->scaleMode);
- IDirect3DDevice9_SetSamplerState(data->device, 0, D3DSAMP_MAGFILTER,
- texturedata->scaleMode);
- data->scaleMode = texturedata->scaleMode;
- }
+ D3D_UpdateTextureScaleMode(data, texturedata, 0);
result =
IDirect3DDevice9_SetTexture(data->device, 0, (IDirect3DBaseTexture9 *)
@@ -1389,6 +1655,28 @@
if (FAILED(result)) {
return D3D_SetError("SetTexture()", result);
}
+
+ if (texturedata->yuv) {
+ shader = data->ps_yuv;
+
+ D3D_UpdateTextureScaleMode(data, texturedata, 1);
+ D3D_UpdateTextureScaleMode(data, texturedata, 2);
+
+ result =
+ IDirect3DDevice9_SetTexture(data->device, 1, (IDirect3DBaseTexture9 *)
+ texturedata->utexture);
+ if (FAILED(result)) {
+ return D3D_SetError("SetTexture()", result);
+ }
+
+ result =
+ IDirect3DDevice9_SetTexture(data->device, 2, (IDirect3DBaseTexture9 *)
+ texturedata->vtexture);
+ if (FAILED(result)) {
+ return D3D_SetError("SetTexture()", result);
+ }
+ }
+
if (shader) {
result = IDirect3DDevice9_SetPixelShader(data->device, shader);
if (FAILED(result)) {
@@ -1511,6 +1799,15 @@
if (data->texture) {
IDirect3DTexture9_Release(data->texture);
}
+ if (data->utexture) {
+ IDirect3DTexture9_Release(data->utexture);
+ }
+ if (data->vtexture) {
+ IDirect3DTexture9_Release(data->vtexture);
+ }
+ if (data->pixels) {
+ SDL_free(data->pixels);
+ }
SDL_free(data);
texture->driverdata = NULL;
}