blob: 4bb99b22e405b4e40698deb4119fd6557c0ee74f [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "util.h"
#include <lib/async/dispatcher.h>
#include <lib/media/test/one_shot_event.h>
#include <stdarg.h>
#include <stdio.h>
#include <zircon/assert.h>
#include <fstream>
#include <iostream>
#include <memory>
#include <fbl/auto_lock.h>
// Branchless implementation of pdep
inline void ProcessCoordinate(uint32_t tile_local_coordinate, uint32_t& mask_bit_idx,
uint32_t axis_mask, uint32_t axis_mask_bit_idx,
uint32_t& swizzled_offset) {
// This has up to one bit set, depending on whether corresponding bit position of axis_mask
// has that bit set. If 1 bit set, deposit currently-corresponding bit of mask in swizzled offset.
// If no bits set, save currently-corresponding bit of mask for next iteration.
uint32_t axis_bit = (1u << axis_mask_bit_idx) & axis_mask;
// Deposit a bit of (original) mask into swizzled offset, shifted cumulatively to the left by how
// many zeroes there have been in axis_mask so far (see pdep link for a diagram). Since we are
// guaranteed that there is no overlap between the different axis_masks we can directly add the
// output to the swizzle_offset.
uint32_t deposit_bit = axis_bit & (tile_local_coordinate << mask_bit_idx);
swizzled_offset += deposit_bit;
// If axis_bit has a bit set, we used up a bit position of mask and can leave that bit position
// behind by not incrementing the value of mask_bit_idx. If axis_bit has no bits set, we're
// bringing the mask_bit_idx bit of mask along for the next iteration (along with higher-order
// bits of mask), and by incrementing the value of mask_bit_idx, that bit will end up landing in
// the correct position within swizzled_offset. The below code is a branchless equivalent to the
// following:
//
// if (!axis_bit) mask_bit_idx += 1u;
mask_bit_idx += ~(axis_bit >> axis_mask_bit_idx) & 0x1u;
}
uint32_t ConvertLinearToLegacyYTiled(uint32_t y_offset, uint32_t x_offset, uint32_t pitch) {
static constexpr uint32_t kXMask = 0x0E0Fu;
static constexpr uint32_t kXBits = 7u;
static constexpr uint32_t kYMask = 0x01F0u;
static constexpr uint32_t kYBits = 5u;
static constexpr uint32_t kTotalBits = kXBits + kYBits;
// Ensure the masks are not malformed
static_assert((kXMask + kYMask) == (kXMask | kYMask), "X and/or Y mask is malformed");
static_assert((kXMask | kYMask) < (1 << 16), "Mask can only contain 16 bits");
// Ensure pitch is a multiple of 2^kXBits
ZX_ASSERT(pitch == ((pitch >> kXBits) << kXBits));
// Figure out the amount of tiles per row
uint32_t tiles_per_row = (pitch >> kXBits);
// First calculate the row and col (in terms of tiles) of where the address is
uint32_t row = (y_offset >> kYBits);
uint32_t col = (x_offset >> kXBits);
// Next calculate the tile-local coordinates
uint32_t y_coordinate = (y_offset & ((1 << kYBits) - 1));
uint32_t x_coordinate = (x_offset & ((1 << kXBits) - 1));
// First calculate the surface offset give the tile numbers
uint32_t swizzled_offset = ((row * tiles_per_row) + col) << kTotalBits;
// Would be nice if _pdep_u32 was supported but currently Fuchsia only supports x86-64. The
// BMI2 extension, which _pdep_u32 is a part of, is supported in x86-64-v3. Currently requiring
// processors to support BMI2 should exclude too many older generation architectures as outlined
// in https://fuchsia.dev/fuchsia-src/contribute/governance/rfcs/0073_x86_64_platform_requirement.
// If Fuchsia ever upgrades to x86-64-v3 the below code can be upgraded to use the _pdep_u32
// intrinsic like so:
//
// swizzled_offset += _pdep_u32(x, kXMask);
// swizzled_offset += _pdep_u32(y, kYMask);
//
// For a more detailed explanation of pedp please read the following:
// https://www.felixcloutier.com/x86/pdep
uint32_t axis_bit_idx = 0u, x_bit_idx = 0u, y_bit_idx = 0u;
while (axis_bit_idx < kTotalBits) {
// Process the x and y coordinates; due to no bits shared between kXMask and kYMask, and
// the fact that every bit mask has a corresponding 1 in exactly one of kXMask or kYMask but not
// both, we know that we'll be depositing a bit from x or from y into swizzled_offset, but not
// from both. Also, we know we'll be consuming a bit from (not shifting) exactly one of x or y
// (whichever deposited a bit into swizzled_offset).
ProcessCoordinate(x_coordinate, x_bit_idx, kXMask, axis_bit_idx, swizzled_offset);
ProcessCoordinate(y_coordinate, y_bit_idx, kYMask, axis_bit_idx, swizzled_offset);
// Increment the axis_bit_idx by one. Loop again to process the next bit index with the
// corresponding bit mask
axis_bit_idx += 1u;
}
return swizzled_offset;
}
void Exit(const char* format, ...) {
// Let's not have a buffer on the stack, not because it couldn't be done
// safely, but because we'd potentially run into stack size vs. message length
// tradeoffs, stack expansion granularity fun, or whatever else.
va_list args;
va_start(args, format);
size_t buffer_bytes = vsnprintf(nullptr, 0, format, args) + 1;
va_end(args);
// ~buffer never actually runs since this method never returns
std::unique_ptr<char[]> buffer(new char[buffer_bytes]);
va_start(args, format);
size_t buffer_bytes_2 = vsnprintf(buffer.get(), buffer_bytes, format, args) + 1;
(void)buffer_bytes_2;
// sanity check; should match so go ahead and assert that it does.
assert(buffer_bytes == buffer_bytes_2);
va_end(args);
printf("%s - exiting\n", buffer.get());
// If anything goes wrong, exit(-1) is used directly (until we have any reason
// to do otherwise).
exit(-1);
}
// This is obviously not how anyone would really stream a file, but this example
// program isn't about streaming a large media file.
std::unique_ptr<uint8_t[]> read_whole_file(const char* filename, size_t* size) {
std::ifstream file;
// std::ios::ate means start at the end to tellg() will get the size
file.open(filename, std::ios::in | std::ios::binary | std::ios::ate);
if (!file.is_open()) {
Exit("failed to open file %s", filename);
}
std::streampos input_size = file.tellg();
*size = input_size;
if (input_size == -1) {
Exit("file.tellg() failed");
}
VLOGF("file size is: %lld", static_cast<long long>(input_size));
auto raw_adts = std::make_unique<uint8_t[]>(input_size);
file.seekg(0, std::ios::beg);
if (!file) {
Exit("file.seekg(0, beg) failed");
}
file.read(reinterpret_cast<char*>(raw_adts.get()), input_size);
if (!file) {
Exit("file.read() failed");
}
file.close();
if (!file) {
Exit("file.close() failed");
}
return raw_adts;
}
void PostSerial(async_dispatcher_t* dispatcher, fit::closure to_run) {
zx_status_t post_result = async::PostTask(dispatcher, std::move(to_run));
if (post_result != ZX_OK) {
Exit("async::PostTask() failed - post_result: %d", post_result);
}
}
// Fence through any lambdas previously posted with PostSerial().
void FencePostSerial(async_dispatcher_t* dispatcher) {
// If this gets stuck, make sure this isn't being called on the dispatcher's
// own thread, and consider asserting in the caller that this isn't getting
// called on the dispatcher's thread.
OneShotEvent one_shot;
PostSerial(dispatcher, [&one_shot] { one_shot.Signal(); });
// Wait indefinitely.
one_shot.Wait();
}
void SHA256_Update_AudioParameters(SHA256_CTX* sha256_ctx, const fuchsia::media::PcmFormat& pcm) {
uint32_t pcm_mode_le = htole32(pcm.pcm_mode);
if (!SHA256_Update(sha256_ctx, &pcm_mode_le, sizeof(pcm_mode_le))) {
assert(false);
}
uint32_t bits_per_sample_le = htole32(pcm.bits_per_sample);
if (!SHA256_Update(sha256_ctx, &bits_per_sample_le, sizeof(bits_per_sample_le))) {
assert(false);
}
uint32_t frames_per_second_le = htole32(pcm.frames_per_second);
if (!SHA256_Update(sha256_ctx, &frames_per_second_le, sizeof(frames_per_second_le))) {
assert(false);
}
for (fuchsia::media::AudioChannelId channel_id : pcm.channel_map) {
uint32_t channel_id_le = htole32(channel_id);
if (!SHA256_Update(sha256_ctx, &channel_id_le, sizeof(channel_id_le))) {
assert(false);
}
}
}
void SHA256_Update_VideoParameters(SHA256_CTX* sha256_ctx,
const fuchsia::media::VideoUncompressedFormat& video) {
UpdateSha256(sha256_ctx, video.fourcc);
UpdateSha256(sha256_ctx, video.primary_width_pixels);
UpdateSha256(sha256_ctx, video.primary_height_pixels);
UpdateSha256(sha256_ctx, video.secondary_width_pixels);
UpdateSha256(sha256_ctx, video.secondary_height_pixels);
UpdateSha256(sha256_ctx, video.planar);
UpdateSha256(sha256_ctx, video.swizzled);
UpdateSha256(sha256_ctx, video.primary_line_stride_bytes);
UpdateSha256(sha256_ctx, video.secondary_line_stride_bytes);
UpdateSha256(sha256_ctx, video.primary_start_offset);
UpdateSha256(sha256_ctx, video.secondary_start_offset);
UpdateSha256(sha256_ctx, video.tertiary_start_offset);
UpdateSha256(sha256_ctx, video.primary_pixel_stride);
UpdateSha256(sha256_ctx, video.secondary_pixel_stride);
}
void SHA256_Update_VideoPlane(SHA256_CTX* sha256_ctx, uint8_t* start, uint32_t width,
uint32_t stride, uint32_t height) {
uint8_t* src = start;
for (uint32_t row = 0; row < height; ++row) {
SHA256_Update(sha256_ctx, src, width);
src += stride;
}
}