| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "lib/ui/gfx/util/image_formats.h" |
| #include "garnet/lib/ui/yuv/yuv.h" |
| #include "lib/fxl/logging.h" |
| #include "lib/images/cpp/images.h" |
| |
| namespace scenic_impl { |
| namespace gfx { |
| namespace image_formats { |
| |
| namespace { |
| |
| // Takes 4 bytes of YUY2 and writes 8 bytes of RGBA |
| // TODO(MZ-547): do this better with a lookup table |
| void Yuy2ToBgra(uint8_t* yuy2, uint8_t* bgra1, uint8_t* bgra2) { |
| uint8_t y1 = yuy2[0]; |
| uint8_t u = yuy2[1]; |
| uint8_t y2 = yuy2[2]; |
| uint8_t v = yuy2[3]; |
| yuv::YuvToBgra(y1, u, v, bgra1); |
| yuv::YuvToBgra(y2, u, v, bgra2); |
| } |
| |
| void ConvertYuy2ToBgra(uint8_t* out_ptr, uint8_t* in_ptr, |
| uint64_t buffer_size) { |
| // converts to BGRA |
| // uint8_t addresses: |
| // 0 1 2 3 4 5 6 7 8 |
| // | Y | U | Y | V | |
| // | B | G | R | A | B | G | R | A |
| // We have 2 bytes per pixel, but we need to convert blocks of 4: |
| uint32_t num_double_pixels = buffer_size / 4; |
| // Since in_ptr and out_ptr are uint8_t, we step by 4 (bytes) |
| // in the incoming buffer, and 8 (bytes) in the output buffer. |
| for (unsigned int i = 0; i < num_double_pixels; i++) { |
| Yuy2ToBgra(&in_ptr[4 * i], &out_ptr[8 * i], &out_ptr[8 * i + 4]); |
| } |
| } |
| |
| void ConvertYuy2ToBgraAndMirror(uint8_t* out_ptr, uint8_t* in_ptr, |
| uint32_t out_width, uint32_t out_height) { |
| uint32_t double_pixels_per_row = out_width / 2; |
| uint32_t in_stride = out_width * 2; |
| uint32_t out_stride = out_width * 4; |
| // converts to BGRA and mirrors left-right |
| for (uint32_t y = 0; y < out_height; ++y) { |
| for (uint32_t x = 0; x < double_pixels_per_row; ++x) { |
| uint64_t out = 8 * ((double_pixels_per_row - 1 - x)) + y * out_stride; |
| Yuy2ToBgra(&in_ptr[4 * x + y * in_stride], &out_ptr[out + 4], |
| &out_ptr[out]); |
| } |
| } |
| } |
| |
| void MirrorBgra(uint32_t* out_ptr, uint32_t* in_ptr, uint32_t width, |
| uint32_t height) { |
| // converts to BGRA and mirrors left-right |
| for (uint32_t y = 0; y < height; ++y) { |
| for (uint32_t x = 0; x < width; ++x) { |
| uint64_t out = ((width - 1 - x)) + y * width; |
| out_ptr[out] = in_ptr[x + y * width]; |
| } |
| } |
| } |
| |
| // For now, copy each UV sample to a 2x2 square of ouput pixels. This is not |
| // proper signal processing for the UV up-scale, but it _may_ be faster. |
| // |
| // This function isn't really optimized in any serious sense so far. |
| void ConvertNv12ToBgra(uint8_t* out_ptr, uint8_t* in_ptr, uint32_t width, |
| uint32_t height, uint32_t in_stride) { |
| uint8_t* y_base = in_ptr; |
| uint8_t* uv_base = in_ptr + height * in_stride; |
| |
| // Convert 2 lines at a time, to avoid reading UV data twice. I don't know if |
| // avoiding reading UV twice really matters much since we're not skipping |
| // caches (such as with non-temporal reads), and I wouldn't be surpised if the |
| // bottleneck is often compute rather than memory. |
| // |
| // Writing two lines at a time might turn out to be counterproductive, |
| // possibly depending on CPU write buffering details. |
| for (uint32_t y = 0; y < height; y += 2) { |
| uint8_t* y1_sample_iter = y_base + y * in_stride; |
| uint8_t* y2_sample_iter = y_base + (y + 1) * in_stride; |
| uint8_t* uv_sample_iter = uv_base + y / 2 * in_stride; |
| uint8_t* bgra1_sample_iter = out_ptr + y * width * 4; |
| uint8_t* bgra2_sample_iter = out_ptr + (y + 1) * width * 4; |
| |
| // Minimizing this inner loop matters more than per-2-lines stuff above, of |
| // course. |
| for (uint32_t x = 0; x < width; x += 2) { |
| uint8_t u = *uv_sample_iter; |
| uint8_t v = *(uv_sample_iter + 1); |
| |
| // Unknown whether unrolling this 2 pixel wide loop (by just having two |
| // copies of the loop body in a row) would be better or worse. The 2 |
| // pixels high is already "unrolled" in some sense, so this chunk of code |
| // is processing 2x2 pixels. For now, it's probably more readable with |
| // this loop present instead of unrolled, but note that the x_offset is |
| // not used within the body of the loop. |
| for (uint32_t x_offset = 0; x_offset < 2; ++x_offset) { |
| // Unknown whether inlining each of these is better or worse. |
| yuv::YuvToBgra(*y1_sample_iter, u, v, bgra1_sample_iter); |
| yuv::YuvToBgra(*y2_sample_iter, u, v, bgra2_sample_iter); |
| y1_sample_iter += sizeof(uint8_t); |
| y2_sample_iter += sizeof(uint8_t); |
| bgra1_sample_iter += sizeof(uint32_t); |
| bgra2_sample_iter += sizeof(uint32_t); |
| } |
| |
| uv_sample_iter += sizeof(uint16_t); // Each UV sample is 2 bytes. |
| } |
| } |
| } |
| |
| void ConvertYv12ToBgra(uint8_t* out_ptr, uint8_t* in_ptr, uint32_t width, |
| uint32_t height, uint32_t in_stride) { |
| // Y plane, then V plane, then U plane. The V and U planes will use |
| // in_stride / 2 (at least until we encounter any "YV12" where that doesn't |
| // work). |
| uint8_t* y_base = in_ptr; |
| uint8_t* u_base = in_ptr + height * in_stride + height / 2 * in_stride / 2; |
| uint8_t* v_base = in_ptr + height * in_stride; |
| |
| for (uint32_t y = 0; y < height; y += 2) { |
| uint8_t* y1_sample_iter = y_base + y * in_stride; |
| uint8_t* y2_sample_iter = y_base + (y + 1) * in_stride; |
| uint8_t* u_sample_iter = u_base + y / 2 * in_stride / 2; |
| uint8_t* v_sample_iter = v_base + y / 2 * in_stride / 2; |
| uint8_t* bgra1_sample_iter = out_ptr + y * width * sizeof(uint32_t); |
| uint8_t* bgra2_sample_iter = out_ptr + (y + 1) * width * sizeof(uint32_t); |
| |
| for (uint32_t x = 0; x < width; x += 2) { |
| uint8_t u = *u_sample_iter; |
| uint8_t v = *v_sample_iter; |
| |
| for (uint32_t x_offset = 0; x_offset < 2; ++x_offset) { |
| // Unknown whether inlining each of these is better or worse. |
| yuv::YuvToBgra(*y1_sample_iter, u, v, bgra1_sample_iter); |
| yuv::YuvToBgra(*y2_sample_iter, u, v, bgra2_sample_iter); |
| y1_sample_iter += sizeof(uint8_t); |
| y2_sample_iter += sizeof(uint8_t); |
| bgra1_sample_iter += sizeof(uint32_t); |
| bgra2_sample_iter += sizeof(uint32_t); |
| } |
| |
| u_sample_iter += sizeof(uint8_t); |
| v_sample_iter += sizeof(uint8_t); |
| } |
| } |
| } |
| |
| } // anonymous namespace |
| |
| escher::image_utils::ImageConversionFunction GetFunctionToConvertToBgra8( |
| const fuchsia::images::ImageInfo& image_info) { |
| size_t bits_per_pixel = images::BitsPerPixel(image_info.pixel_format); |
| switch (image_info.pixel_format) { |
| case fuchsia::images::PixelFormat::BGRA_8: |
| if (image_info.transform == fuchsia::images::Transform::FLIP_HORIZONTAL) { |
| return [](void* out, void* in, uint32_t width, uint32_t height) { |
| MirrorBgra(reinterpret_cast<uint32_t*>(out), |
| reinterpret_cast<uint32_t*>(in), width, height); |
| }; |
| } else { |
| // no conversion needed. |
| FXL_DCHECK(bits_per_pixel % 8 == 0); |
| size_t bytes_per_pixel = bits_per_pixel / 8; |
| return [bytes_per_pixel](void* out, void* in, uint32_t width, |
| uint32_t height) { |
| memcpy(out, in, width * height * bytes_per_pixel); |
| }; |
| } |
| break; |
| // TODO(MZ-551): support vertical flipping |
| case fuchsia::images::PixelFormat::YUY2: |
| if (image_info.transform == fuchsia::images::Transform::FLIP_HORIZONTAL) { |
| return [](void* out, void* in, uint32_t width, uint32_t height) { |
| ConvertYuy2ToBgraAndMirror(reinterpret_cast<uint8_t*>(out), |
| reinterpret_cast<uint8_t*>(in), width, |
| height); |
| }; |
| } else { |
| FXL_DCHECK(bits_per_pixel % 8 == 0); |
| size_t bytes_per_pixel = bits_per_pixel / 8; |
| return [bytes_per_pixel](void* out, void* in, uint32_t width, |
| uint32_t height) { |
| ConvertYuy2ToBgra(reinterpret_cast<uint8_t*>(out), |
| reinterpret_cast<uint8_t*>(in), |
| width * height * bytes_per_pixel); |
| }; |
| } |
| break; |
| case fuchsia::images::PixelFormat::NV12: |
| FXL_DCHECK(image_info.transform == fuchsia::images::Transform::NORMAL) |
| << "NV12 transforms not yet implemented"; |
| // At least for now, capture stride from the image_info. Assert that width |
| // and height could also be captured this way, but don't actually use |
| // their captured versions yet. |
| return [captured_in_stride = image_info.stride, |
| captured_width = image_info.width, |
| captured_height = image_info.height]( |
| void* out, void* in, uint32_t width, uint32_t height) { |
| FXL_DCHECK(captured_width == width); |
| FXL_DCHECK(captured_height == height); |
| ConvertNv12ToBgra(reinterpret_cast<uint8_t*>(out), |
| reinterpret_cast<uint8_t*>(in), width, height, |
| captured_in_stride); |
| }; |
| break; |
| case fuchsia::images::PixelFormat::YV12: |
| FXL_DCHECK(image_info.transform == fuchsia::images::Transform::NORMAL) |
| << "YV12 transforms not yet implemented"; |
| // At least for now, capture stride from the image_info. Assert that width |
| // and height could also be captured this way, but don't actually use |
| // their captured versions yet. |
| return [captured_in_stride = image_info.stride, |
| captured_width = image_info.width, |
| captured_height = image_info.height]( |
| void* out, void* in, uint32_t width, uint32_t height) { |
| FXL_DCHECK(captured_width == width); |
| FXL_DCHECK(captured_height == height); |
| ConvertYv12ToBgra(reinterpret_cast<uint8_t*>(out), |
| reinterpret_cast<uint8_t*>(in), width, height, |
| captured_in_stride); |
| }; |
| break; |
| } |
| return nullptr; |
| } |
| |
| } // namespace image_formats |
| } // namespace gfx |
| } // namespace scenic_impl |