| /* |
| * Copyright (c) 2017-2019, Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included |
| * in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| * OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| //! |
| //! \file codechal_vdenc_hevc_g12.cpp |
| //! \brief HEVC VDEnc encoder for GEN12. |
| //! |
| |
| #include "codechal_vdenc_hevc_g12.h" |
| #include "codechal_kernel_header_g12.h" |
| #include "codeckrnheader.h" |
| #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE) |
| #include "igcodeckrn_g12.h" |
| #endif |
| #include "mhw_vdbox_g12_X.h" |
| #include "mhw_vdbox_hcp_g12_X.h" |
| #include "mhw_vdbox_vdenc_g12_X.h" |
| #include "mhw_mi_g12_X.h" |
| #include "mhw_render_g12_X.h" |
| #include "codechal_mmc_encode_hevc_g12.h" |
| #include "media_user_settings_mgr_g12.h" |
| #include "mhw_mmio_g12.h" |
| #include "hal_oca_interface.h" |
| |
| const uint32_t CodechalVdencHevcStateG12::m_VdboxVDENCRegBase[4] = M_VDBOX_VDENC_REG_BASE; |
| |
| const double CodechalVdencHevcStateG12::m_devThreshIFPNEG[] = { |
| 0.80, 0.60, 0.34, 0.2, |
| }; |
| |
| const double CodechalVdencHevcStateG12::m_devThreshIFPPOS[] = { |
| 0.2, 0.4 , 0.66, 0.9, |
| }; |
| |
| const double CodechalVdencHevcStateG12::m_devThreshPBFPNEG[] = { |
| 0.90, 0.66, 0.46, 0.3, |
| }; |
| |
| const double CodechalVdencHevcStateG12::m_devThreshPBFPPOS[] = { |
| 0.3, 0.46, 0.70, 0.90, |
| }; |
| |
| const double CodechalVdencHevcStateG12::m_devThreshVBRNEG[] = { |
| 0.90, 0.70, 0.50, 0.3, |
| }; |
| |
| const double CodechalVdencHevcStateG12::m_devThreshVBRPOS[] = { |
| 0.4, 0.5, 0.75, 0.90, |
| }; |
| |
| const int8_t CodechalVdencHevcStateG12::m_lowdelayDevThreshPB[] = { |
| -45, -33, -23, -15, -8, 0, 15, 25, |
| }; |
| const int8_t CodechalVdencHevcStateG12::m_lowdelayDevThreshVBR[] = { |
| -45, -35, -25, -15, -8, 0, 20, 40, |
| }; |
| const int8_t CodechalVdencHevcStateG12::m_lowdelayDevThreshI[] = { |
| -40, -30, -17, -10, -5, 0, 10, 20, |
| }; |
| |
| const int8_t CodechalVdencHevcStateG12::m_lowdelayDeltaFrmszI[][8] = { |
| { 0, 0, -8, -12, -16, -20, -28, -36 }, |
| { 0, 0, -4, -8, -12, -16, -24, -32 }, |
| { 4, 2, 0, -1, -3, -8, -16, -24 }, |
| { 8, 4, 2, 0, -1, -4, -8, -16 }, |
| { 20, 16, 4, 0, -1, -4, -8, -16 }, |
| { 24, 20, 16, 8, 4, 0, -4, -8 }, |
| { 28, 24, 20, 16, 8, 4, 0, -8 }, |
| { 32, 24, 20, 16, 8, 4, 0, -4 }, |
| { 64, 48, 28, 20, 16, 12, 8, 4 }, |
| }; |
| |
| const int8_t CodechalVdencHevcStateG12::m_lowdelayDeltaFrmszP[][8] = { |
| { -8, -24, -32, -40, -44, -48, -52, -80 }, |
| { -8, -16, -32, -40, -40, -44, -44, -56 }, |
| { 0, 0, -12, -20, -24, -28, -32, -36 }, |
| { 8, 4, 0, 0, -8, -16, -24, -32 }, |
| { 32, 16, 8, 4, -4, -8, -16, -20 }, |
| { 36, 24, 16, 8, 4, -2, -4, -8 }, |
| { 40, 36, 24, 20, 16, 8, 0, -8 }, |
| { 48, 40, 28, 24, 20, 12, 0, -4 }, |
| { 64, 48, 28, 20, 16, 12, 8, 4 }, |
| }; |
| |
| const int8_t CodechalVdencHevcStateG12::m_lowdelayDeltaFrmszB[][8] = { |
| { 0, -4, -8, -16, -24, -32, -40, -48 }, |
| { 1, 0, -4, -8, -16, -24, -32, -40 }, |
| { 4, 2, 0, -1, -3, -8, -16, -24 }, |
| { 8, 4, 2, 0, -1, -4, -8, -16 }, |
| { 20, 16, 4, 0, -1, -4, -8, -16 }, |
| { 24, 20, 16, 8, 4, 0, -4, -8 }, |
| { 28, 24, 20, 16, 8, 4, 0, -8 }, |
| { 32, 24, 20, 16, 8, 4, 0, -4 }, |
| { 64, 48, 28, 20, 16, 12, 8, 4 }, |
| }; |
| |
| const uint32_t CodechalVdencHevcStateG12::m_hucConstantData[] = { |
| 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x012c012c, 0x012c012c, 0x012c012c, |
| 0x012c012c, 0x012c012c, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00640064, |
| 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, |
| 0x00640064, 0x00640064, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x012c012c, |
| 0x012c012c, 0x012c012c, 0x012c012c, 0x012c012c, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8, |
| 0x00c800c8, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, |
| 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x503c1e04, 0xffc88c78, 0x3c1e0400, 0xc88c7850, |
| 0x140200ff, 0xa0824628, 0x0000ffc8, 0x00000000, 0x04030302, 0x00000000, 0x03030200, 0x0000ff04, |
| 0x02020000, 0xffff0303, 0x01000000, 0xff020202, 0x0000ffff, 0x02020100, 0x00fffffe, 0x01010000, |
| 0xfffffe02, 0x010000ff, 0xfefe0201, 0x0000ffff, 0xfe010100, 0x00fffffe, 0x01010000, 0x00000000, |
| 0x03030200, 0x00000004, 0x03020000, 0x00ff0403, 0x02000000, 0xff030302, 0x000000ff, 0x02020201, |
| 0x00ffffff, 0x02010000, 0xfffffe02, 0x01000000, 0xfffe0201, 0x0000ffff, 0xfe020101, 0x00fffffe, |
| 0x01010000, 0xfffffefe, 0x01000000, 0x00000001, 0x03020000, 0x00000403, 0x02000000, 0xff040303, |
| 0x00000000, 0x03030202, 0x0000ffff, 0x02020100, 0xffffff02, 0x01000000, 0xfffe0202, 0x000000ff, |
| 0xfe020101, 0x00ffffff, 0x02010100, 0xfffffefe, 0x01000000, 0xfffefe01, 0x000000ff, 0xe0e00101, |
| 0xc0d0d0d0, 0xe0e0b0c0, 0xd0d0d0e0, 0xf0f0c0d0, 0xd0e0e0e0, 0x0408d0d0, 0xe8f0f800, 0x1820dce0, |
| 0xf8fc0210, 0x2024ecf0, 0x0008101c, 0x2428f8fc, 0x08101418, 0x2830f800, 0x0c14181c, 0x3040fc00, |
| 0x0c10141c, 0xe8f80408, 0xc8d0d4e0, 0xf0f8b0c0, 0xccd4d8e0, 0x0000c0c8, 0xd8dce4f0, 0x0408d0d4, |
| 0xf0f80000, 0x0808dce8, 0xf0f80004, 0x0810dce8, 0x00080808, 0x0810f8fc, 0x08080808, 0x1010f800, |
| 0x08080808, 0x1020fc00, 0x08080810, 0xfc000408, 0xe0e8f0f8, 0x0001d0d8, 0xe8f0f8fc, 0x0204d8e0, |
| 0xf8fdff00, 0x0408e8f0, 0xfcff0002, 0x1014f0f8, 0xfcff0004, 0x1418f0f8, 0x00040810, 0x181cf8fc, |
| 0x04081014, 0x1820f800, 0x04081014, 0x3040fc00, 0x0c10141c, 0x40300408, 0x80706050, 0x30a0a090, |
| 0x70605040, 0xa0a09080, 0x60504030, 0xa0908070, 0x040201a0, 0x18141008, 0x02012420, 0x0a080604, |
| 0x01101010, 0x0c080402, 0x10101010, 0x05030201, 0x02010106, 0x00000503, 0xff030201, 0x02010000, |
| 0x000000ff, 0xfffefe01, 0xfdfd0100, 0xfb00ffff, 0xfffffefd, 0xfefdfbfa, 0x030201ff, 0x01010605, |
| 0x00050302, 0x03020101, 0x010000ff, 0x0000ff02, 0xffff0100, 0xfe0100ff, 0x00ffffff, 0xfffffefc, |
| 0xfefcfb00, 0x0101ffff, 0x01050402, 0x04020101, 0x01010000, 0x0000ff02, 0x00ff0101, 0xff000000, |
| 0x0100ffff, 0xfffffffe, 0xfffefd00, 0xfcfb00ff, 0x1efffffe, 0x070d0e10, 0x00003207, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, |
| 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, |
| 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, |
| 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, |
| 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, |
| 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, |
| 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, |
| 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, |
| 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, |
| 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, |
| 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, |
| 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, |
| 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, |
| 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, |
| 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, |
| 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
| 0x00000000, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, |
| 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, |
| 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, |
| 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, |
| 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, |
| 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, |
| 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, |
| 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, |
| 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, |
| 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, |
| 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, |
| 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, |
| 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, |
| 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, |
| 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, |
| 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, |
| 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, |
| 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, |
| 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, |
| 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, |
| 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, |
| 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, |
| 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, |
| 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, |
| 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, |
| 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, |
| 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, |
| 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, |
| 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, |
| 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, |
| 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, |
| 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, |
| 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, |
| 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, |
| 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, |
| 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, |
| 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, |
| 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, |
| 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, |
| 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff |
| }; |
| |
| uint32_t CodechalVdencHevcStateG12::GetMaxBtCount() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| uint32_t maxBtCount = 0; |
| |
| #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE) |
| auto btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment(); |
| |
| // DsConversion kernel |
| maxBtCount = 2 * (MOS_ALIGN_CEIL(m_cscDsState->GetBTCount(), btIdxAlignment)); |
| #endif |
| |
| // add ME and stream-in later |
| return maxBtCount; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::InitKernelStateMe() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface->pStateHeapInterface); |
| |
| uint32_t kernelSize = m_combinedKernelSize; |
| CODECHAL_KERNEL_HEADER currKrnHeader; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize( |
| m_kernelBinary, |
| VDENC_ME_P, |
| 0, |
| &currKrnHeader, |
| &kernelSize)); |
| |
| auto kernelStatePtr = &m_vdencMeKernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams( |
| VDENC_ME_P, |
| &kernelStatePtr->KernelParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable( |
| VDENC_ME_P, |
| &m_vdencMeKernelBindingTable)); |
| |
| kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData(); |
| kernelStatePtr->KernelParams.pBinary = |
| m_kernelBinary + |
| (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT); |
| kernelStatePtr->KernelParams.iSize = kernelSize; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested( |
| m_stateHeapInterface, |
| kernelStatePtr->KernelParams.iBTCount, |
| &kernelStatePtr->dwSshSize, |
| &kernelStatePtr->dwBindingTableSize)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize( |
| m_kernelBinary, |
| VDENC_ME_B, |
| 0, |
| &currKrnHeader, |
| &kernelSize)); |
| |
| kernelStatePtr = &m_vdencMeKernelStateRAB; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams( |
| VDENC_ME_B, |
| &kernelStatePtr->KernelParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable( |
| VDENC_ME_B, |
| &m_vdencStreaminKernelBindingTable)); |
| |
| kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData(); |
| kernelStatePtr->KernelParams.pBinary = |
| m_kernelBinary + |
| (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT); |
| kernelStatePtr->KernelParams.iSize = kernelSize; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested( |
| m_stateHeapInterface, |
| kernelStatePtr->KernelParams.iBTCount, |
| &kernelStatePtr->dwSshSize, |
| &kernelStatePtr->dwBindingTableSize)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::InitKernelStateStreamIn() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface->pStateHeapInterface); |
| |
| uint32_t kernelSize = m_combinedKernelSize; |
| CODECHAL_KERNEL_HEADER currKrnHeader; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize( |
| m_kernelBinary, |
| VDENC_STREAMIN_HEVC, |
| 0, |
| &currKrnHeader, |
| &kernelSize)); |
| |
| auto kernelStatePtr = &m_vdencStreaminKernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams( |
| VDENC_STREAMIN_HEVC, |
| &kernelStatePtr->KernelParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable( |
| VDENC_STREAMIN_HEVC, |
| &m_vdencStreaminKernelBindingTable)); |
| |
| kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData(); |
| kernelStatePtr->KernelParams.pBinary = |
| m_kernelBinary + |
| (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT); |
| kernelStatePtr->KernelParams.iSize = kernelSize; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested( |
| m_stateHeapInterface, |
| kernelStatePtr->KernelParams.iBTCount, |
| &kernelStatePtr->dwSshSize, |
| &kernelStatePtr->dwBindingTableSize)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize( |
| m_kernelBinary, |
| VDENC_STREAMIN_HEVC_RAB, |
| 0, |
| &currKrnHeader, |
| &kernelSize)); |
| |
| kernelStatePtr = &m_vdencStreaminKernelStateRAB; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams( |
| VDENC_STREAMIN_HEVC_RAB, |
| &kernelStatePtr->KernelParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable( |
| VDENC_STREAMIN_HEVC_RAB, |
| &m_vdencStreaminKernelBindingTable)); |
| |
| kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData(); |
| kernelStatePtr->KernelParams.pBinary = |
| m_kernelBinary + |
| (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT); |
| kernelStatePtr->KernelParams.iSize = kernelSize; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested( |
| m_stateHeapInterface, |
| kernelStatePtr->KernelParams.iBTCount, |
| &kernelStatePtr->dwSshSize, |
| &kernelStatePtr->dwBindingTableSize)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::InitKernelState() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE) |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMe()); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateStreamIn()); |
| #endif |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::DecideEncodingPipeNumber() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| m_numPipePre = m_numPipe; |
| m_numPipe = m_numVdbox; |
| |
| uint8_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| |
| CODECHAL_ENCODE_VERBOSEMESSAGE("Tile Columns = %d.", numTileColumns); |
| |
| if (numTileColumns > m_numPipe) |
| { |
| // Streaming buffer does does work if numTileColumns > m_numPipe |
| if (m_hevcSeqParams->EnableStreamingBufferLLC || m_hevcSeqParams->EnableStreamingBufferDDR) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Streaming buffer does does work if numTileColumns > m_numPipe!"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| m_numPipe = 1; |
| } |
| |
| if (numTileColumns < m_numPipe) |
| { |
| if (numTileColumns >= 1 && numTileColumns <= 4) |
| { |
| m_numPipe = numTileColumns; |
| } |
| else |
| { |
| m_numPipe = 1; // invalid tile column test cases and switch back to the single VDBOX mode |
| } |
| } |
| |
| // Tile replay needs scalability enabled, Remove Resolution check for scalability |
| |
| m_useVirtualEngine = true; // always use virtual engine interface for single pipe and scalability mode |
| |
| m_numUsedVdbox = m_numPipe; |
| m_numberTilesInFrame = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1); |
| |
| if (m_scalabilityState) |
| { |
| // Create/ re-use a GPU context with 2 pipes |
| m_scalabilityState->ucScalablePipeNum = m_numPipe; |
| } |
| |
| CODECHAL_ENCODE_VERBOSEMESSAGE("System VDBOX number = %d, decided pipe num = %d.", m_numVdbox, m_numPipe); |
| |
| return eStatus; |
| } |
| |
| bool CodechalVdencHevcStateG12::CheckSupportedFormat(PMOS_SURFACE surface) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| bool isColorFormatSupported = false; |
| |
| if (nullptr == surface) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer."); |
| return isColorFormatSupported; |
| } |
| |
| switch (surface->Format) |
| { |
| case Format_NV12: |
| case Format_NV21: |
| case Format_P010: // Planar 4:2:0 |
| case Format_YUY2: |
| case Format_YUYV: |
| case Format_YVYU: |
| case Format_UYVY: |
| case Format_VYUY: |
| case Format_A8R8G8B8: |
| case Format_A8B8G8R8: |
| case Format_R10G10B10A2:// Packed RGB 4:4:4 |
| case Format_B10G10R10A2:// Packed RGB 4:4:4 |
| case Format_AYUV: |
| case Format_Y410: // Packed 4:4:4 |
| isColorFormatSupported = true; |
| break; |
| case Format_Y210: // Packed 4:2:2 |
| if (MEDIA_IS_WA(m_waTable, WaHEVCVDEncY210LinearInputNotSupported)) |
| { |
| isColorFormatSupported = surface->TileType == MOS_TILE_Y; |
| } |
| else |
| { |
| isColorFormatSupported = true; |
| } |
| break; |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Input surface color format = %d not supported!", surface->Format); |
| break; |
| } |
| |
| return isColorFormatSupported; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::PlatformCapabilityCheck() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber()); |
| |
| if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState, |
| (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt)); |
| } |
| |
| if (m_frameWidth * m_frameHeight > ENCODE_HEVC_MAX_16K_PIC_WIDTH * ENCODE_HEVC_MAX_16K_PIC_HEIGHT) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Frame resolution greater than 16k not supported"); |
| } |
| |
| //GopRefDist -- 0: All-Intra, 1: LowDelayMode, > 1: Random Access |
| if (m_hevcSeqParams->GopRefDist > 1 && m_hevcSeqParams->TargetUsage == 7) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Random Access B in TU7 not supported"); |
| } |
| |
| // TU configuration for RDOQ |
| if (m_hevcRdoqEnabled) |
| { |
| m_hevcRdoqEnabled = (m_hevcSeqParams->TargetUsage < 7); |
| } |
| |
| // set RDOQ Intra blocks Threshold for Gen11+ |
| m_rdoqIntraTuThreshold = 0; |
| if (m_hevcRdoqEnabled) |
| { |
| if (1 == m_hevcSeqParams->TargetUsage) |
| { |
| m_rdoqIntraTuThreshold = 0xffff; |
| } |
| else if (4 == m_hevcSeqParams->TargetUsage) |
| { |
| m_rdoqIntraTuThreshold = m_picWidthInMb * m_picHeightInMb; |
| m_rdoqIntraTuThreshold = MOS_MIN(m_rdoqIntraTuThreshold / 10, 0xffff); |
| } |
| } |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_rsvdState->PlatformCapabilityCheck()); |
| } |
| #endif |
| |
| return eStatus; |
| } |
| |
| CodechalVdencHevcStateG12::~CodechalVdencHevcStateG12() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (m_scalabilityState) |
| { |
| MOS_FreeMemAndSetNull(m_scalabilityState); |
| } |
| //Note: virtual engine interface destroy is done in MOS layer |
| |
| CODECHAL_DEBUG_TOOL( |
| DestroyHevcPar(); |
| MOS_Delete(m_encodeParState); |
| ) |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| MOS_Delete(m_rsvdState); |
| m_rsvdState = nullptr; |
| } |
| #endif |
| if(m_gpuCtxCreatOpt) |
| { |
| MOS_Delete(m_gpuCtxCreatOpt); |
| m_gpuCtxCreatOpt = nullptr; |
| } |
| return; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::AllocatePakResources() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6)*((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE; |
| uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5)*((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE; |
| m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size); |
| |
| const uint32_t picWidthInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_LCU_SIZE); //assume smallest LCU to get max width |
| const uint32_t picHeightInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_LCU_SIZE); //assume smallest LCU to get max height |
| |
| MHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam; |
| MOS_ZeroMemory(&hcpBufSizeParam, sizeof(hcpBufSizeParam)); |
| hcpBufSizeParam.ucMaxBitDepth = m_bitDepth; |
| hcpBufSizeParam.ucChromaFormat = m_chromaFormat; |
| // We should move the buffer allocation to picture level if the size is dependent on LCU size |
| hcpBufSizeParam.dwCtbLog2SizeY = 6; //assume Max LCU size |
| hcpBufSizeParam.dwPicWidth = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE); |
| hcpBufSizeParam.dwPicHeight = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE); |
| |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| |
| // Deblocking Filter Row Store Scratch data surface |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Row Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "DeblockingScratchBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resDeblockingFilterRowStoreScratchBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Row Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| // Deblocking Filter Tile Row Store Scratch data surface |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Row Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "DeblockingTileRowScratchBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resDeblockingFilterTileRowStoreScratchBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Row Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| // Deblocking Filter Column Row Store Scratch data surface |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_COL, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Column Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "DeblockingColumnScratchBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resDeblockingFilterColumnRowStoreScratchBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Column Row Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| // Metadata Line buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_META_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Line Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "MetadataLineBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resMetadataLineBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Line Buffer."); |
| return eStatus; |
| } |
| |
| // Metadata Tile Line buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Line Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "MetadataTileLineBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resMetadataTileLineBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Line Buffer."); |
| return eStatus; |
| } |
| |
| // Metadata Tile Column buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_COL, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Column Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "MetadataTileColumnBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resMetadataTileColumnBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Column Buffer."); |
| return eStatus; |
| } |
| |
| // SAO Line buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Line Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "SaoLineBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resSaoLineBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Line Buffer."); |
| return eStatus; |
| } |
| |
| // SAO Tile Line buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Line Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "SaoTileLineBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resSaoTileLineBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Line Buffer."); |
| return eStatus; |
| } |
| |
| // SAO Tile Column buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_COL, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Column Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "SaoTileColumnBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resSaoTileColumnBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Column Buffer."); |
| return eStatus; |
| } |
| |
| // Lcu ILDB StreamOut buffer |
| allocParamsForBufferLinear.dwBytes = CODECHAL_CACHELINE_SIZE; |
| allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resLcuIldbStreamOutBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU ILDB StreamOut Buffer."); |
| return eStatus; |
| } |
| |
| // Lcu Base Address buffer |
| // HEVC Encoder Mode: Slice size is written to this buffer when slice size conformance is enabled. |
| // 1 CL (= 16 DWs = 64 bytes) per slice * Maximum number of slices in a frame. |
| // Align to page for HUC requirement |
| uint32_t maxLcu = picWidthInMinLCU * picHeightInMinLCU; |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(maxLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); |
| allocParamsForBufferLinear.pBufName = "LcuBaseAddressBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resLcuBaseAddressBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU Base Address Buffer."); |
| return eStatus; |
| } |
| |
| // SAO Row Store buffer |
| // Aligned to 4 for each tile column |
| uint32_t maxTileColumn = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE); |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(picWidthInMinLCU + 3 * maxTileColumn, 4) * 16; |
| allocParamsForBufferLinear.pBufName = "SaoRowStoreBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_vdencSAORowStoreBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO row store Buffer."); |
| return eStatus; |
| } |
| |
| // SAO StreamOut buffer |
| uint32_t size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU; |
| //extra added size to cover tile enabled case, per tile width aligned to 4. 20: max tile column No. |
| size += 3 * 20 * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU; |
| allocParamsForBufferLinear.dwBytes = size; |
| allocParamsForBufferLinear.pBufName = "SaoStreamOutBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resSaoStreamOutBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO StreamOut Buffer."); |
| return eStatus; |
| } |
| |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| |
| // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP PipeBufAddr command |
| size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * m_maxTileNumber, CODECHAL_PAGE_SIZE); //Each tile has 9 cache size bytes of data, Align to page is HuC requirement |
| allocParamsForBufferLinear.dwBytes = size; |
| allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resFrameStatStreamOutBuffer), |
| "Failed to create VDENC FrameStatStreamOutBuffer Buffer"); |
| |
| // PAK Statistics buffer |
| size = MOS_ALIGN_CEIL(m_vdencBrcPakStatsBufferSize, CODECHAL_PAGE_SIZE); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource( |
| m_standard, size, 1, pakStats, "pakStats")); |
| |
| // Slice Count buffer 1 DW = 4 Bytes |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE); |
| allocParamsForBufferLinear.pBufName = "Slice Count Buffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_sliceCountBuffer), |
| "Failed to create VDENC Slice Count Buffer"); |
| |
| // VDEncMode Timer buffer 1 DW = 4 Bytes |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE); |
| allocParamsForBufferLinear.pBufName = "VDEncMode Timer Buffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_vdencModeTimerBuffer), |
| "Failed to create VDEncMode Timer Buffer"); |
| |
| if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHWCounterAutoIncrementEnforced(m_osInterface) && m_enableTileReplay) |
| { |
| uint32_t maxTileRow = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE); |
| uint32_t maxTileColumn = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE); |
| |
| allocParamsForBufferLinear.dwBytes = maxTileRow*maxTileColumn*(sizeof(HwCounter)); |
| allocParamsForBufferLinear.pBufName = "HWCounter"; |
| allocParamsForBufferLinear.bIsPersistent = true; |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resHwCountTileReplay), |
| "Failed to create tile base HW counter buffer"); |
| allocParamsForBufferLinear.bIsPersistent = false; |
| } |
| |
| uint32_t frameWidthInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE); |
| uint32_t frameHeightInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE); |
| uint32_t frameWidthInLcus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MAX_LCU_SIZE_G10); |
| uint32_t frameHeightInLcus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MAX_LCU_SIZE_G10); |
| uint32_t maxTileColumns = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE); |
| |
| // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command |
| // One CU has 16-byte. But, each tile needs to be aliged to the cache line |
| size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE); |
| allocParamsForBufferLinear.dwBytes = size; |
| allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resPakcuLevelStreamoutData.sResource)); |
| m_resPakcuLevelStreamoutData.dwSize = size; |
| CODECHAL_ENCODE_VERBOSEMESSAGE("first allocate cu steam out buffer, size=0x%x.\n", size); |
| |
| // these 2 buffers are not used so far, but put the correct size calculation here |
| // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command |
| // One CU has 16-byte. But, each tile needs to be aliged to the cache line |
| size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE); |
| |
| // PAK Slice Level Streamut Data. DW60-DW62 in HCP pipe buffer address command |
| // one LCU has one cache line. Use CU as LCU during creation |
| size = frameWidthInLcus * frameHeightInLcus * CODECHAL_CACHELINE_SIZE; |
| |
| // Allocate SSE Source Pixel Row Store Buffer |
| m_sizeOfSseSrcPixelRowStoreBufferPerLcu = CODECHAL_CACHELINE_SIZE * (4 + 4) << 1; |
| allocParamsForBufferLinear.dwBytes = 2 * m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (m_widthAlignedMaxLcu + 3 * maxTileColumns); |
| allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resSseSrcPixelRowStoreBuffer), |
| "Failed to create SseSrcPixelRowStoreBuffer"); |
| |
| //HCP scalability Sync buffer |
| allocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE; |
| allocParamsForBufferLinear.pBufName = "GEN11 HCP scalability Sync buffer "; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resHcpScalabilitySyncBuffer.sResource), |
| "Failed to create GEN11 HCP scalability Sync Buffer"); |
| |
| // create the tile coding state parameters |
| for (auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++) |
| { |
| m_tileParams[i] = (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)MOS_AllocAndZeroMemory( |
| sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)* m_maxTileNumber); |
| } |
| |
| if (m_enableHWSemaphore) |
| { |
| // Create the HW sync objects which will be used by each reference frame and BRC in GEN11 |
| allocParamsForBufferLinear.dwBytes = sizeof(uint32_t); |
| allocParamsForBufferLinear.pBufName = "SemaphoreMemory"; |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = 1; |
| |
| uint32_t* data = nullptr; |
| |
| for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_refSync[i].resSemaphoreMem.sResource), |
| "Failed to create HW Semaphore Memory."); |
| m_refSync[i].resSemaphoreMem.dwSize = allocParamsForBufferLinear.dwBytes; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_refSync[i].resSemaphoreMem.sResource, |
| &lockFlagsWriteOnly)); |
| |
| *data = 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_refSync[i].resSemaphoreMem.sResource)); |
| } |
| } |
| |
| // create the HW semaphore buffer to sync up between VDBOXes. This is used to WA HW internal lock issue |
| if (m_enableVdBoxHWSemaphore) |
| { |
| allocParamsForBufferLinear.dwBytes = sizeof(uint32_t); |
| allocParamsForBufferLinear.pBufName = "VDBOX SemaphoreMemory"; |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = 1; |
| |
| uint32_t* data = nullptr; |
| |
| for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resVdBoxSemaphoreMem); i++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resVdBoxSemaphoreMem[i].sResource), |
| "Failed to create VDBOX HW Semaphore Memory."); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resVdBoxSemaphoreMem[i].sResource, |
| &lockFlagsWriteOnly)); |
| |
| *data = 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_resVdBoxSemaphoreMem[i].sResource)); |
| } |
| } |
| |
| uint32_t* data = nullptr; |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = 1; |
| |
| allocParamsForBufferLinear.dwBytes = sizeof(uint32_t); |
| allocParamsForBufferLinear.pBufName = "BrcPakSemaphoreMemory"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resBrcPakSemaphoreMem.sResource), |
| "Failed to create BRC PAK Semaphore Memory."); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resBrcPakSemaphoreMem.sResource, |
| &lockFlagsWriteOnly)); |
| |
| *data = 0; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_resBrcPakSemaphoreMem.sResource)); |
| |
| // 3rd level batch buffer |
| // To be moved to a more proper place later |
| MOS_ZeroMemory(&m_thirdLevelBatchBuffer, sizeof(m_thirdLevelBatchBuffer)); |
| m_thirdLevelBatchBuffer.bSecondLevel = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb( |
| m_osInterface, |
| &m_thirdLevelBatchBuffer, |
| nullptr, |
| m_thirdLBSize)); |
| |
| if (m_enableTileStitchByHW) |
| { |
| if (Mos_ResourceIsNull(&m_resHucStatus2Buffer)) |
| { |
| // HUC STATUS 2 Buffer for HuC status check in COND_BB_END |
| allocParamsForBufferLinear.dwBytes = sizeof(uint64_t); |
| allocParamsForBufferLinear.pBufName = "HUC STATUS 2 Buffer"; |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( |
| m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resHucStatus2Buffer), |
| "%s: Failed to allocate HUC STATUS 2 Buffer\n", |
| __FUNCTION__); |
| } |
| uint8_t *data; |
| for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) |
| { |
| for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++) |
| { |
| // HuC stitching Data buffer |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandDataVdencG12), CODECHAL_PAGE_SIZE); |
| allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer"; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resHucStitchDataBuffer[i][j])); |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = 1; |
| uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resHucStitchDataBuffer[i][j], |
| &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(pData); |
| MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]); |
| } |
| } |
| //Second level BB for huc stitching cmd |
| MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer)); |
| m_HucStitchCmdBatchBuffer.bSecondLevel = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb( |
| m_osInterface, |
| &m_HucStitchCmdBatchBuffer, |
| nullptr, |
| m_hwInterface->m_HucStitchCmdBatchBufferSize)); |
| } |
| |
| if (m_numDelay) |
| { |
| allocParamsForBufferLinear.dwBytes = sizeof(uint32_t); |
| allocParamsForBufferLinear.pBufName = "DelayMinusMemory"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resDelayMinus), "Failed to allocate delay minus memory."); |
| |
| uint8_t* data; |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| data = (uint8_t*)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resDelayMinus, |
| &lockFlags); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory(data, sizeof(uint32_t)); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resDelayMinus); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::FreePakResources() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resSseSrcPixelRowStoreBuffer); |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resHcpScalabilitySyncBuffer.sResource); |
| m_osInterface->pfnFreeResource(m_osInterface, &m_vdencSAORowStoreBuffer); |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resPakcuLevelStreamoutData.sResource); |
| if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHWCounterAutoIncrementEnforced(m_osInterface) && m_enableTileReplay) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resHwCountTileReplay); |
| } |
| |
| for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resTileBasedStatisticsBuffer); i++) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[i].sResource); |
| } |
| for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[i].sResource); |
| } |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource); |
| |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcDataBuffer); |
| |
| for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++) |
| { |
| for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resHucPakStitchDmemBuffer[k][i]); |
| } |
| } |
| |
| if (m_numDelay) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resDelayMinus); |
| } |
| |
| for (auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++) |
| { |
| MOS_FreeMemory(m_tileParams[i]); |
| } |
| |
| // command buffer for VE, allocated in MOS_STATUS CodechalEncodeHevcBase::VerifyCommandBufferSize() |
| for (auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++) |
| { |
| for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_HCP_PIPE; j++) |
| { |
| for (auto k = 0; k < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; k++) |
| { |
| PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k]; |
| |
| if (!Mos_ResourceIsNull(&cmdBuffer->OsResource)) |
| { |
| if (cmdBuffer->pCmdBase) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource); |
| } |
| m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource); |
| } |
| } |
| } |
| } |
| |
| for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++) |
| { |
| auto sync = &m_refSync[i]; |
| |
| if (!Mos_ResourceIsNull(&sync->resSyncObject)) |
| { |
| // if this object has been signaled before, we need to wait to ensure singal-wait is in pair. |
| if (sync->uiSemaphoreObjCount || sync->bInUsed) |
| { |
| MOS_SYNC_PARAMS syncParams = g_cInitSyncParams; |
| syncParams.GpuContext = m_renderContext; |
| syncParams.presSyncResource = &sync->resSyncObject; |
| syncParams.uiSemaphoreCount = sync->uiSemaphoreObjCount; |
| m_osInterface->pfnEngineWait(m_osInterface, &syncParams); |
| } |
| } |
| m_osInterface->pfnFreeResource(m_osInterface, &sync->resSemaphoreMem.sResource); |
| } |
| |
| for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resVdBoxSemaphoreMem); i++) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resVdBoxSemaphoreMem[i].sResource); |
| } |
| |
| if (m_enableTileStitchByHW) |
| { |
| for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) |
| { |
| for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++) |
| { |
| // HuC stitching Data buffer |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_resHucStitchDataBuffer[i][j]); |
| } |
| } |
| //Second level BB for huc stitching cmd |
| Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr); |
| } |
| |
| Mhw_FreeBb(m_osInterface, &m_thirdLevelBatchBuffer, nullptr); |
| FreeTileLevelBatch(); |
| FreeTileRowLevelBRCBatch(); |
| |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcPakSemaphoreMem.sResource); |
| |
| return CodechalVdencHevcState::FreePakResources(); |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::AllocateEncResources() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::AllocateEncResources()); |
| |
| if (m_hmeSupported) |
| { |
| HmeParams hmeParams; |
| |
| MOS_ZeroMemory(&hmeParams, sizeof(hmeParams)); |
| hmeParams.b4xMeDistortionBufferSupported = true; |
| hmeParams.ps16xMeMvDataBuffer = &m_s16XMeMvDataBuffer; |
| hmeParams.ps32xMeMvDataBuffer = &m_s32XMeMvDataBuffer; |
| hmeParams.ps4xMeDistortionBuffer = &m_s4XMeDistortionBuffer; |
| hmeParams.ps4xMeMvDataBuffer = &m_s4XMeMvDataBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources4xME(&hmeParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources16xME(&hmeParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources32xME(&hmeParams)); |
| } |
| |
| // VDENC tile row store buffer |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) * CODECHAL_CACHELINE_SIZE * 2; |
| allocParamsForBufferLinear.pBufName = "VDENC Tile Row Store Buffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_vdencTileRowStoreBuffer), |
| "Failed to allocate VDENC Tile Row Store Buffer"); |
| |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForSurface; |
| MOS_ZeroMemory(&allocParamsForSurface, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForSurface.Type = MOS_GFXRES_BUFFER; |
| allocParamsForSurface.TileType = MOS_TILE_LINEAR; |
| allocParamsForSurface.Format = Format_Buffer; |
| allocParamsForSurface.dwBytes = m_numLcu * 4; |
| allocParamsForSurface.pBufName = "VDEnc Cumulative CU Count Streamout Surface"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForSurface, |
| &m_vdencCumulativeCuCountStreamoutSurface), |
| "Failed to allocate VDEnc Cumulative CU Count Streamout Surface"); |
| |
| // Move from CodechalVdencHevcState::AllocateEncResources() |
| |
| // PAK stream-out buffer |
| allocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_PAK_STREAMOUT_SIZE; |
| allocParamsForBufferLinear.pBufName = "Pak StreamOut Buffer"; |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resStreamOutBuffer[0]), |
| "Failed to allocate Pak Stream Out Buffer."); |
| |
| // VDENC Intra Row Store Scratch buffer |
| // 1 cacheline per MB |
| // Double the size for Tile Replay |
| uint32_t size = MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) * CODECHAL_CACHELINE_SIZE * 2 * 2; |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource( |
| m_standard, size, 1, vdencIntraRowStoreScratch, "vdencIntraRowStoreScratch")); |
| |
| // VDENC Statistics buffer |
| // Enabled for BRC only |
| size = MOS_ALIGN_CEIL(m_vdencBrcStatsBufferSize * m_maxTileNumber, CODECHAL_PAGE_SIZE); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource( |
| m_standard, size, 1, vdencStats, "vdencStats")); |
| |
| // end of CodechalVdencHevcState::AllocateEncResources() |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_rsvdState->AllocateEncResources()); |
| } |
| #endif |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::FreeEncResources() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_osInterface->pfnFreeResource(m_osInterface, &m_vdencTileRowStoreBuffer); |
| m_osInterface->pfnFreeResource(m_osInterface, &m_vdencCumulativeCuCountStreamoutSurface); |
| |
| // Free ME resources |
| HmeParams hmeParams; |
| |
| MOS_ZeroMemory(&hmeParams, sizeof(hmeParams)); |
| hmeParams.ps16xMeMvDataBuffer = &m_s16XMeMvDataBuffer; |
| hmeParams.ps32xMeMvDataBuffer = &m_s32XMeMvDataBuffer; |
| hmeParams.ps4xMeDistortionBuffer = &m_s4XMeDistortionBuffer; |
| hmeParams.ps4xMeMvDataBuffer = &m_s4XMeMvDataBuffer; |
| DestroyMEResources(&hmeParams); |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_rsvdState->FreeEncResources()); |
| } |
| #endif |
| |
| return CodechalVdencHevcState::FreeEncResources(); |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::AllocateBrcResources() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::AllocateBrcResources()); |
| |
| uint32_t* data = nullptr; |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = 1; |
| |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = sizeof(uint32_t); |
| allocParamsForBufferLinear.pBufName = "TileRowBRCSyncSemaphore"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resTileRowBRCsyncSemaphore), |
| "Failed to create Tile Row BRC sync Semaphore Memory."); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resTileRowBRCsyncSemaphore, |
| &lockFlagsWriteOnly)); |
| |
| *data = 0; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_resTileRowBRCsyncSemaphore)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::FreeBrcResources() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resTileRowBRCsyncSemaphore); |
| return CodechalVdencHevcState::FreeBrcResources(); |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::AllocateTileLevelBatch() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| // Only allocate when the number of tile changed |
| if (m_numTileBatchAllocated >= m_numTiles) |
| { |
| return eStatus; |
| } |
| |
| // Make it simple, free first if need reallocate |
| if (m_numTileBatchAllocated > 0) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeTileLevelBatch()); |
| } |
| |
| // First allocate the batch buffer array |
| for (int32_t idx = 0; idx < CODECHAL_VDENC_BRC_NUM_OF_PASSES; idx++) |
| { |
| if (m_tileLevelBatchBuffer[idx] == nullptr) |
| { |
| m_tileLevelBatchBuffer[idx] = (PMHW_BATCH_BUFFER)MOS_AllocAndZeroMemory(sizeof(MHW_BATCH_BUFFER) * m_numTiles); |
| |
| if (nullptr == m_tileLevelBatchBuffer[idx]) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Allocate memory for tile batch buffer failed"); |
| return MOS_STATUS_NO_SPACE; |
| } |
| } |
| |
| // Allocate the batch buffer for each tile |
| uint32_t i = 0; |
| for (i = 0; i < m_numTiles; i++) |
| { |
| MOS_ZeroMemory(&m_tileLevelBatchBuffer[idx][i], sizeof(MHW_BATCH_BUFFER)); |
| m_tileLevelBatchBuffer[idx][i].bSecondLevel = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb( |
| m_osInterface, |
| &m_tileLevelBatchBuffer[idx][i], |
| nullptr, |
| m_tileLevelBatchSize)); |
| } |
| } |
| |
| // Record the number of allocated batch buffer for tiles |
| m_numTileBatchAllocated = m_numTiles; |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::FreeTileLevelBatch() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| // Free the batch buffer for each tile |
| uint32_t i = 0; |
| uint32_t j = 0; |
| for (i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++) |
| { |
| for (j = 0; j < m_numTileBatchAllocated; j++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_FreeBb(m_osInterface, &m_tileLevelBatchBuffer[i][j], nullptr)); |
| } |
| |
| MOS_FreeMemory(m_tileLevelBatchBuffer[i]); |
| m_tileLevelBatchBuffer[i] = nullptr; |
| } |
| |
| // Reset the number of tile batch allocated |
| m_numTileBatchAllocated = 0; |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::AllocateTileRowLevelBRCBatch() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| // Only allocate when the number of tile row changed |
| if (m_numTileRowBRCBatchAllocated >= m_numTileRows) |
| { |
| return eStatus; |
| } |
| |
| // Make it simple, free first if need reallocate |
| if (m_numTileRowBRCBatchAllocated > 0) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeTileRowLevelBRCBatch()); |
| } |
| |
| // First allocate the batch buffer array |
| for (int32_t idx = 0; idx < CODECHAL_VDENC_BRC_NUM_OF_PASSES; idx++) |
| { |
| if (m_TileRowBRCBatchBuffer[idx] == nullptr) |
| { |
| m_TileRowBRCBatchBuffer[idx] = (PMHW_BATCH_BUFFER)MOS_AllocAndZeroMemory(sizeof(MHW_BATCH_BUFFER) * m_numTileRows); |
| |
| if (nullptr == m_TileRowBRCBatchBuffer[idx]) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Allocate memory for tile row level BRC batch buffer failed"); |
| return MOS_STATUS_NO_SPACE; |
| } |
| } |
| |
| // Allocate the batch buffer for each tile row |
| uint32_t i = 0; |
| for (i = 0; i < m_numTileRows; i++) |
| { |
| MOS_ZeroMemory(&m_TileRowBRCBatchBuffer[idx][i], sizeof(MHW_BATCH_BUFFER)); |
| m_TileRowBRCBatchBuffer[idx][i].bSecondLevel = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb( |
| m_osInterface, |
| &m_TileRowBRCBatchBuffer[idx][i], |
| nullptr, |
| m_hwInterface->m_hucCommandBufferSize)); |
| } |
| } |
| |
| // Record the number of allocated batch buffer for tiles |
| m_numTileRowBRCBatchAllocated = m_numTileRows; |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::FreeTileRowLevelBRCBatch() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| // Free the batch buffer for each tile row |
| uint32_t i = 0; |
| uint32_t j = 0; |
| for (i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++) |
| { |
| for (j = 0; j < m_numTileRowBRCBatchAllocated; j++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_FreeBb(m_osInterface, &m_TileRowBRCBatchBuffer[i][j], nullptr)); |
| } |
| |
| MOS_FreeMemory(m_TileRowBRCBatchBuffer[i]); |
| m_TileRowBRCBatchBuffer[i] = nullptr; |
| } |
| |
| // Reset the number of tile row BRC batch allocated |
| m_numTileRowBRCBatchAllocated = 0; |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::InitializePicture(const EncoderParams& params) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| // common initilization |
| return CodechalVdencHevcState::InitializePicture(params); |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetPictureStructs() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetPictureStructs()); |
| |
| if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat && |
| (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat) |
| { |
| if (Format_YUY2 != m_reconSurface.Format) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Recon surface format is not correct!"); |
| } |
| else if (m_reconSurface.dwHeight < m_oriFrameHeight * 2 || |
| m_reconSurface.dwWidth < m_oriFrameWidth / 2) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Recon surface allocation size is not correct!"); |
| } |
| else |
| { |
| // update Recon surface to Variant format |
| CodechalEncodeHevcBase::UpdateYUY2SurfaceInfo(&m_reconSurface, m_is10BitHevc); |
| } |
| } |
| |
| // Frame level BRC pass set to one pass when tile replay is enabled |
| if (m_enableTileReplay) |
| { |
| m_numPasses = 0; |
| } |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| m_rsvdState->SetPictureStructs(); |
| } |
| #endif |
| |
| // EOS is not working on GEN12, disable it by setting below to false (WA) |
| m_lastPicInSeq = false; |
| m_lastPicInStream = false; |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::GetStatusReport( |
| EncodeStatus *encodeStatus, |
| EncodeStatusReport *encodeStatusReport) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport); |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpVdencOutputs())); |
| |
| // When tile replay is enabled with tile replay, need to report out the tile size and the bit stream is not continous |
| if ((encodeStatusReport->UsedVdBoxNumber == 1) && (!m_enableTileReplay || (m_enableTileReplay && encodeStatusReport->NumberTilesInFrame == 1))) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::GetStatusReport(encodeStatus, encodeStatusReport)); |
| return eStatus; |
| } |
| |
| // Allocate the tile size report memory |
| encodeStatusReport->SizeOfTileInfoBuffer = encodeStatusReport->NumberTilesInFrame * sizeof(CodechalTileInfo); |
| if (encodeStatusReport->pHEVCTileinfo) |
| { |
| MOS_FreeMemory(encodeStatusReport->pHEVCTileinfo); |
| encodeStatusReport->pHEVCTileinfo = nullptr; |
| } |
| encodeStatusReport->pHEVCTileinfo = (CodechalTileInfo *)MOS_AllocAndZeroMemory(encodeStatusReport->SizeOfTileInfoBuffer); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport->pHEVCTileinfo); |
| |
| // In case of CQP, PAK integration kernel is not called, so used tile size record from HW |
| // PAK integration kernel does not handle stitching for single pipe mode |
| PCODECHAL_ENCODE_BUFFER tileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx]; |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[encodeStatusReport->CurrOriginalPic.FrameIdx]; |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.ReadOnly = 1; |
| HCPPakHWTileSizeRecord_G12* tileStatusReport = (HCPPakHWTileSizeRecord_G12*)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &tileSizeStatusReport->sResource, |
| &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport); |
| |
| encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL; |
| encodeStatusReport->PanicMode = false; |
| encodeStatusReport->AverageQp = 0; |
| encodeStatusReport->QpY = 0; |
| encodeStatusReport->SuggestedQpYDelta = 0; |
| encodeStatusReport->NumberPasses = 1; |
| encodeStatusReport->bitstreamSize = 0; |
| encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0; |
| encodeStatusReport->NumberSlices = 0; |
| |
| uint32_t* sliceSize = nullptr; |
| |
| // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call |
| if (encodeStatus->sliceReport.pSliceSize) |
| { |
| sliceSize = (uint32_t*)m_osInterface->pfnLockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize, &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(sliceSize); |
| } |
| |
| uint32_t totalCU = 0; |
| uint32_t sliceCount = 0; |
| double sumQp = 0.0; |
| for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++) |
| { |
| if (tileStatusReport[i].Length == 0) |
| { |
| encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE; |
| return eStatus; |
| } |
| //update tile info with HW counter |
| if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHWCounterAutoIncrementEnforced(m_osInterface) && m_enableTileReplay) |
| { |
| MOS_LOCK_PARAMS LockFlagsNoOverWrite; |
| MOS_ZeroMemory(&LockFlagsNoOverWrite, sizeof(MOS_LOCK_PARAMS)); |
| LockFlagsNoOverWrite.WriteOnly = 1; |
| LockFlagsNoOverWrite.NoOverWrite = 1; |
| |
| uint8_t* dataHWCountTileReplay = (uint8_t*)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resHwCountTileReplay, |
| &LockFlagsNoOverWrite); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(dataHWCountTileReplay); |
| uint64_t *pAddress2Counter = (uint64_t *)(dataHWCountTileReplay + i * sizeof(HwCounter)); |
| encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count = *pAddress2Counter; |
| encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count = SwapEndianness(encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count); //Report back in Big endian |
| encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV = *(++pAddress2Counter); |
| encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV = SwapEndianness(encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV); //Report back in Big endian |
| CODECHAL_ENCODE_NORMALMESSAGE("tile = %d, hwCounterValue.Count = 0x%llx, hwCounterValue.IV = 0x%llx", i, encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count, encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV); |
| if (dataHWCountTileReplay) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resHwCountTileReplay); |
| } |
| } |
| encodeStatusReport->pHEVCTileinfo[i].TileSizeInBytes = tileStatusReport[i].Length; |
| // The offset only valid if there is no stream stitching |
| encodeStatusReport->pHEVCTileinfo[i].TileBitStreamOffset = tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE; |
| encodeStatusReport->pHEVCTileinfo[i].TileRowNum = i / tileParams[i].NumOfTileColumnsInFrame; |
| encodeStatusReport->pHEVCTileinfo[i].TileColNum = i % tileParams[i].NumOfTileColumnsInFrame; |
| encodeStatusReport->NumTileReported = i + 1; |
| encodeStatusReport->bitstreamSize += tileStatusReport[i].Length; |
| totalCU += (tileParams[i].TileHeightInMinCbMinus1 + 1) * (tileParams[i].TileWidthInMinCbMinus1 + 1); |
| sumQp += tileStatusReport[i].Hcp_Qp_Status_Count; |
| |
| if (sliceSize) |
| { |
| encodeStatusReport->pSliceSizes = (uint16_t*)sliceSize; |
| encodeStatusReport->NumberSlices += (uint8_t)tileStatusReport[i].Hcp_Slice_Count_Tile; |
| uint16_t prevCumulativeSliceSize = 0; |
| // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App |
| for (uint32_t idx = 0; idx < tileStatusReport[i].Hcp_Slice_Count_Tile; idx++) |
| { |
| // PAK output the sliceSize at 16DW intervals. |
| CODECHAL_ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]); |
| |
| //convert cummulative slice size to individual, first slice may have PPS/SPS, |
| uint32_t CurrAccumulatedSliceSize = sliceSize[sliceCount * 16]; |
| encodeStatusReport->pSliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize; |
| prevCumulativeSliceSize += encodeStatusReport->pSliceSizes[sliceCount]; |
| sliceCount++; |
| } |
| } |
| } |
| |
| if (sliceSize) |
| { |
| encodeStatusReport->SizeOfSliceSizesBuffer = sizeof(uint16_t) * encodeStatusReport->NumberSlices; |
| encodeStatusReport->SliceSizeOverflow = (encodeStatus->sliceReport.SliceSizeOverflow >> 16) & 1; |
| m_osInterface->pfnUnlockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CalculatePSNR(encodeStatus, encodeStatusReport)); |
| |
| if (encodeStatusReport->bitstreamSize == 0 || |
| encodeStatusReport->bitstreamSize >m_bitstreamUpperBound) |
| { |
| encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR; |
| encodeStatusReport->bitstreamSize = 0; |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| |
| if (totalCU != 0) |
| { |
| encodeStatusReport->QpY = encodeStatusReport->AverageQp = |
| (uint8_t)((sumQp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU |
| } |
| else |
| { |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| if (m_enableTileStitchByHW) |
| { |
| if (tileStatusReport) |
| { |
| // clean-up the tile status report buffer |
| MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame); |
| m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource); |
| } |
| return eStatus; |
| } |
| |
| //Driver stitching is not allowed for secure encode case |
| if (!m_osInterface->osCpInterface->IsCpEnabled()) |
| { |
| uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr; |
| tempBsBuffer = bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer); |
| |
| PCODEC_REF_LIST currRefList = encodeStatus->encodeStatusReport.pCurrRefList; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.ReadOnly = 1; |
| uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &currRefList->resBitstreamBuffer, |
| &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(bitstream); |
| |
| for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++) |
| { |
| uint32_t offset = tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE; |
| uint32_t len = tileStatusReport[i].Length; |
| |
| MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len); |
| bufPtr += len; |
| } |
| |
| MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize); |
| MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize], m_bitstreamUpperBound - encodeStatusReport->bitstreamSize); |
| |
| if (bitstream) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &currRefList->resBitstreamBuffer); |
| } |
| |
| MOS_FreeMemory(tempBsBuffer); |
| } |
| |
| if (tileStatusReport) |
| { |
| // clean-up the tile status report buffer |
| MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::ValidateRefFrameData(PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| bool isRandomAccess = false; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(slcParams); |
| |
| if (slcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE) |
| { |
| if (slcParams->num_ref_idx_l0_active_minus1 != slcParams->num_ref_idx_l1_active_minus1) |
| { |
| isRandomAccess = true; |
| } |
| |
| for (auto j = 0; j < CODEC_MAX_NUM_REF_FRAME_HEVC; j++) |
| { |
| if (slcParams->RefPicList[0][j].PicEntry != slcParams->RefPicList[1][j].PicEntry) |
| { |
| isRandomAccess = true; |
| } |
| } |
| } |
| |
| if (isRandomAccess) |
| { |
| if (m_hevcPicParams->bEnableRollingIntraRefresh) |
| { |
| CODECHAL_ENCODE_ASSERT(false); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_rsvdState->ValidateRefFrameData(isRandomAccess)); |
| } |
| #endif |
| |
| uint8_t maxNumRef0 = isRandomAccess ? 2 : m_numMaxVdencL0Ref; |
| uint8_t maxNumRef1 = isRandomAccess ? 1 : m_numMaxVdencL1Ref; |
| |
| if (slcParams->num_ref_idx_l0_active_minus1 > maxNumRef0 - 1) |
| { |
| CODECHAL_ENCODE_ASSERT(false); |
| slcParams->num_ref_idx_l0_active_minus1 = maxNumRef0 - 1; |
| } |
| |
| if (slcParams->num_ref_idx_l1_active_minus1 > maxNumRef1 - 1) |
| { |
| CODECHAL_ENCODE_ASSERT(false); |
| slcParams->num_ref_idx_l1_active_minus1 = maxNumRef1 - 1; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::UserFeatureKeyReport() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::UserFeatureKeyReport()); |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| CodecHalEncode_WriteKey64(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, m_kmdVeOveride.Value); |
| CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe); |
| CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface)); |
| #endif |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::EncodeKernelFunctions() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE) |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| m_rawSurfaceToEnc, |
| CodechalDbgAttr::attrEncodeRawInputSurface, |
| "SrcSurf"))); |
| |
| CODECHAL_DEBUG_TOOL( |
| PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0]; |
| for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++) |
| { |
| CODEC_PICTURE refPic = l0RefFrameList[refIdx]; |
| |
| if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid) |
| { |
| // L0 references |
| uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx; |
| m_debugInterface->m_refIndex = (uint16_t)m_refList[refPicIdx]->iFieldOrderCnt[0]; |
| std::string refSurfName = "RefSurf_List0_POC" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| &m_refList[refPicIdx]->sRefBuffer, |
| CodechalDbgAttr::attrReferenceSurfaces, |
| refSurfName.data())) |
| } |
| } |
| |
| if (!m_lowDelay) |
| { |
| PCODEC_PICTURE l1RefFrameList = m_hevcSliceParams->RefPicList[LIST_1]; |
| for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++) |
| { |
| CODEC_PICTURE refPic = l1RefFrameList[refIdx]; |
| |
| if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid) |
| { |
| // L1 references |
| uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx; |
| m_debugInterface->m_refIndex = (uint16_t)m_refList[refPicIdx]->iFieldOrderCnt[0]; |
| std::string refSurfName = "RefSurf_List1_POC" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| &m_refList[refPicIdx]->sRefBuffer, |
| CodechalDbgAttr::attrReferenceSurfaces, |
| refSurfName.data())) |
| } |
| } |
| }); |
| |
| auto singleTaskPhaseSupported = m_singleTaskPhaseSupported; // local variable to save current setting before overwriting |
| |
| if (m_16xMeSupported) |
| { |
| // disable SingleTaskPhase for now with SHME |
| m_singleTaskPhaseSupported = false; |
| |
| CodechalEncodeCscDs::KernelParams cscScalingKernelParams; |
| MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams)); |
| |
| cscScalingKernelParams.bLastTaskInPhaseCSC = |
| cscScalingKernelParams.bLastTaskInPhase4xDS = false; |
| cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled); |
| cscScalingKernelParams.bLastTaskInPhase32xDS = !m_hmeEnabled; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams)); |
| } |
| |
| if (m_b16XMeEnabled) |
| { |
| if (m_b32XMeEnabled) |
| { |
| //HME_P kernel for 32xME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_32x)); |
| } |
| |
| //HME_P kernel for 16xME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_16x)); |
| |
| //StreamIn kernel, 4xME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_4x)); |
| } |
| |
| // retrieve SingleTaskPhase setting (SAO will need STP enabled setting) |
| m_singleTaskPhaseSupported = singleTaskPhaseSupported; |
| |
| CODECHAL_DEBUG_TOOL( |
| if (m_hmeEnabled) { |
| CODECHAL_ME_OUTPUT_PARAMS meOutputParams; |
| |
| MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams)); |
| meOutputParams.psMeMvBuffer = &m_s4XMeMvDataBuffer; |
| meOutputParams.psMeBrcDistortionBuffer = nullptr; |
| meOutputParams.psMeDistortionBuffer = &m_s4XMeDistortionBuffer; |
| meOutputParams.b16xMeInUse = false; |
| meOutputParams.b32xMeInUse = false; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &meOutputParams.psMeMvBuffer->OsResource, |
| CodechalDbgAttr::attrOutput, |
| "MvData", |
| meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch, |
| CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) * (m_downscaledFrameFieldHeightInMb4x * 4) : 0, |
| CODECHAL_MEDIA_STATE_4X_ME)); |
| |
| //CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| // &meOutputParams.psMeBrcDistortionBuffer->OsResource, |
| // CodechalDbgAttr::attrOutput, |
| // "BrcDist", |
| // meOutputParams.psMeBrcDistortionBuffer->dwHeight *meOutputParams.psMeBrcDistortionBuffer->dwPitch, |
| // CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4), 8) : 0, |
| // CODECHAL_MEDIA_STATE_4X_ME)); |
| if (meOutputParams.psMeDistortionBuffer) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &meOutputParams.psMeDistortionBuffer->OsResource, |
| CodechalDbgAttr::attrOutput, |
| "MeDist", |
| meOutputParams.psMeDistortionBuffer->dwHeight *meOutputParams.psMeDistortionBuffer->dwPitch, |
| CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4 * 10), 8) : 0, |
| CODECHAL_MEDIA_STATE_4X_ME)); |
| } |
| if (m_b16XMeEnabled) |
| { |
| MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams)); |
| meOutputParams.psMeMvBuffer = &m_s16XMeMvDataBuffer; |
| meOutputParams.psMeBrcDistortionBuffer = nullptr; |
| meOutputParams.psMeDistortionBuffer = nullptr; |
| meOutputParams.b16xMeInUse = true; |
| meOutputParams.b32xMeInUse = false; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| m_debugInterface->DumpBuffer( |
| &meOutputParams.psMeMvBuffer->OsResource, |
| CodechalDbgAttr::attrOutput, |
| "MvData", |
| meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch, |
| CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) * (m_downscaledFrameFieldHeightInMb16x * 4) : 0, |
| CODECHAL_MEDIA_STATE_16X_ME)); |
| } |
| if (m_b32XMeEnabled) |
| { |
| MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams)); |
| meOutputParams.psMeMvBuffer = &m_s32XMeMvDataBuffer; |
| meOutputParams.psMeBrcDistortionBuffer = nullptr; |
| meOutputParams.psMeDistortionBuffer = nullptr; |
| meOutputParams.b16xMeInUse = false; |
| meOutputParams.b32xMeInUse = true; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| m_debugInterface->DumpBuffer( |
| &meOutputParams.psMeMvBuffer->OsResource, |
| CodechalDbgAttr::attrOutput, |
| "MvData", |
| meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch, |
| CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) * (m_downscaledFrameFieldHeightInMb32x * 4) : 0, |
| CODECHAL_MEDIA_STATE_32X_ME)); |
| } |
| |
| MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams)); |
| meOutputParams.pResVdenStreamInBuffer = &(m_resVdencStreamInBuffer[m_currRecycledBufIdx]); |
| meOutputParams.psMeMvBuffer = &m_s4XMeMvDataBuffer; |
| meOutputParams.psMeDistortionBuffer = &m_s4XMeDistortionBuffer; |
| meOutputParams.b16xMeInUse = false; |
| meOutputParams.bVdencStreamInInUse = true; |
| if (m_vdencStreamInEnabled) { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_resVdencStreamInBuffer[m_currRecycledBufIdx], |
| CodechalDbgAttr::attrOutput, |
| "StreaminData", |
| m_picWidthInMb * m_picHeightInMb * CODECHAL_CACHELINE_SIZE, |
| 0, |
| CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN)); |
| } |
| }) |
| #endif |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| // Use FrameStats buffer if in single pipe mode. |
| if (m_numPipe == 1) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::ReadSliceSize(cmdBuffer)); |
| return eStatus; |
| } |
| |
| // In multi-tile multi-pipe mode, use PAK integration kernel output |
| // PAK integration kernel accumulates frame statistics across tiles, which should be used to setup slice size report |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = true; |
| |
| uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize + sizeof(uint32_t) * 2); // encodeStatus is offset by 2 DWs in the resource |
| |
| // Report slice size to app only when dynamic scaling is enabled |
| if (!m_hevcSeqParams->SliceSizeControl) |
| { |
| // Clear slice size report structure in EncodeStatus |
| uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_encodeStatusBuf.resStatusBuffer, &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| EncodeStatus* dataStatus = (EncodeStatus*)(data + baseOffset); |
| MOS_ZeroMemory(&(dataStatus->sliceReport), sizeof(EncodeStatusSliceReport)); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_encodeStatusBuf.resStatusBuffer); |
| |
| return eStatus; |
| } |
| |
| uint32_t sizeOfSliceSizesBuffer = MOS_ALIGN_CEIL(m_numLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); |
| |
| if (IsFirstPipe()) |
| { |
| if (IsFirstPass()) |
| { |
| // Create/ Initialize slice report buffer once per frame, to be used across passes |
| if (Mos_ResourceIsNull(&m_resSliceReport[m_encodeStatusBuf.wCurrIndex])) |
| { |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = sizeOfSliceSizesBuffer; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]), |
| "Failed to create HEVC VDEnc Slice Report Buffer "); |
| } |
| |
| // Clear slice size structure to be sent in EncodeStatusReport buffer |
| uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| MOS_ZeroMemory(data, sizeOfSliceSizesBuffer); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]); |
| |
| // Set slice size pointer in slice size structure |
| data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, (&m_encodeStatusBuf.resStatusBuffer), &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| EncodeStatus* dataStatus = (EncodeStatus*)(data + baseOffset); |
| (dataStatus)->sliceReport.pSliceSize = &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]; |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_encodeStatusBuf.resStatusBuffer); |
| } |
| |
| // Copy Slize size data buffer from PAK to be sent back to App |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CopyDataBlock(cmdBuffer, |
| &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource, |
| m_hevcTileStatsOffset.uiHevcSliceStreamout, |
| &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], |
| 0, |
| sizeOfSliceSizesBuffer)); |
| |
| MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams; |
| MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS)); |
| miCpyMemMemParams.presSrc = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Slice size overflow is in m_resFrameStatStreamOutBuffer DW0[16] |
| miCpyMemMemParams.dwSrcOffset = m_hevcFrameStatsOffset.uiHevcPakStatistics; |
| miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer; |
| miCpyMemMemParams.dwDstOffset = baseOffset + m_encodeStatusBuf.dwSliceReportOffset; // Slice size overflow is at DW0 EncodeStatusSliceReport |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams)); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::ExecutePictureLevel() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| int32_t currentPass = GetCurrentPass(); |
| int32_t currentPipe = GetCurrentPipe(); |
| |
| if (IsFirstPipe() && IsFirstPass()) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams[m_virtualEngineBbIndex])); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileStatistics()); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRoundingValues()); |
| } |
| |
| if (m_hevcPicParams->bUsedAsRef || (m_brcEnabled && !m_hevcSeqParams->ParallelBRC)) |
| { |
| if (m_currRefSync == nullptr) |
| { |
| m_currRefSync = &m_refSync[m_currMbCodeIdx]; |
| } |
| } |
| else |
| { |
| m_currRefSync = nullptr; |
| } |
| |
| if (m_lookaheadPass && !m_lookaheadUpdate) |
| { |
| m_lookaheadUpdate = (m_currLaDataIdx >= m_lookaheadDepth - 1); |
| } |
| |
| m_firstTaskInPhase = m_singleTaskPhaseSupported ? IsFirstPass() : false; |
| m_lastTaskInPhase = m_singleTaskPhaseSupported ? IsLastPass() : true; |
| |
| // Per frame maximum HuC kernels is 5 - BRC Init, BRC Update, PAK Int, BRC Update, PAK Int |
| m_hucCommandsSize = m_hwInterface->m_hucCommandBufferSize * 5; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE); |
| |
| if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()) \ |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum"); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize()); |
| |
| if (!m_singleTaskPhaseSupportedInPak) |
| { |
| // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together |
| m_firstTaskInPhase = true; |
| m_lastTaskInPhase = true; |
| } |
| |
| if (m_lookaheadUpdate) |
| { |
| m_lastTaskInPhase = false; |
| } |
| |
| // PAK pass type for each pass: VDEnc+PAK vs. PAK-only |
| SetPakPassType(); |
| |
| bool pakOnlyMultipassEnable; |
| |
| // "PAK-Only Multi-Pass Enable" will be decided by HUC kernel for BRC |
| if (m_numPipe >= 2) |
| { |
| pakOnlyMultipassEnable = false; |
| } |
| else |
| { |
| pakOnlyMultipassEnable = false; |
| } |
| |
| bool panicEnabled = (m_brcEnabled) && (m_panicEnable) && (IsLastPass()) && !m_pakOnlyPass; |
| |
| uint32_t rollingILimit = (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_ROW) ? MOS_ROUNDUP_DIVIDE(m_frameHeight, 32) : (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_COLUMN) ? MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) : 0; |
| |
| m_refList[m_currReconstructedPic.FrameIdx]->rollingIntraRefreshedPosition = |
| CodecHal_Clip3(0, rollingILimit, m_hevcPicParams->IntraInsertionLocation + m_hevcPicParams->IntraInsertionSize); |
| |
| // For ACQP / BRC, update pic params rolling intra reference location here before cmd buffer is prepared. |
| PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0]; |
| for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++) |
| { |
| CODEC_PICTURE refPic = l0RefFrameList[refIdx]; |
| |
| if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid) |
| { |
| uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx; |
| m_hevcPicParams->RollingIntraReferenceLocation[refIdx] = m_refList[refPicIdx]->rollingIntraRefreshedPosition; |
| } |
| } |
| |
| if (IsFirstPass()) |
| { |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| MHW_MI_MMIOREGISTERS mmioRegister; |
| bool validMmio = m_hwInterface->GetMfxInterface()->ConvertToMiRegister(m_vdboxIndex, mmioRegister); |
| if (validMmio) |
| { |
| HalOcaInterface::On1stLevelBBStart( |
| cmdBuffer, |
| *m_hwInterface->GetOsInterface()->pOsContext, |
| m_hwInterface->GetOsInterface()->CurrentGpuContextHandle, |
| *m_hwInterface->GetMiInterface(), |
| mmioRegister); |
| } |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| } |
| |
| if (m_numPipe >= 2) |
| { |
| // Send Cmd Buffer Header for VE in last pipe only |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| bool requestFrameTracking = m_singleTaskPhaseSupported ? IsFirstPass() : IsLastPass(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking)); |
| |
| MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams; |
| MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS)); |
| forceWakeupParams.bMFXPowerWellControl = true; |
| forceWakeupParams.bMFXPowerWellControlMask = true; |
| forceWakeupParams.bHEVCPowerWellControl = true; |
| forceWakeupParams.bHEVCPowerWellControlMask = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd( |
| &cmdBuffer, |
| &forceWakeupParams)); |
| |
| // clean-up per VDBOX semaphore memory, only in the first BRC pass. Same semaphore is re-used across BRC passes for stitch command |
| if (IsFirstPass()) |
| { |
| if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[currentPipe].sResource)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| SetSemaphoreMem( |
| &m_resVdBoxSemaphoreMem[currentPipe].sResource, |
| &cmdBuffer, |
| 0)); |
| } |
| |
| // Do not clear BRC PAK semaphore because of timing issue with =0 on 1st pipe and +1 on 2nd pipe |
| } |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| } |
| else if (IsFirstPass()) |
| { |
| // Send force wake command for VDBOX |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams; |
| MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS)); |
| forceWakeupParams.bMFXPowerWellControl = true; |
| forceWakeupParams.bMFXPowerWellControlMask = true; |
| forceWakeupParams.bHEVCPowerWellControl = true; |
| forceWakeupParams.bHEVCPowerWellControlMask = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd( |
| &cmdBuffer, |
| &forceWakeupParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| } |
| |
| if (m_vdencHucUsed && IsFirstPipe()) |
| { |
| // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs |
| perfTag.CallType = m_singleTaskPhaseSupported ? CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE : |
| CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, perfTag.CallType); |
| |
| m_resVdencBrcUpdateDmemBufferPtr[0] = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo); |
| |
| // Invoke BRC init/reset FW |
| if (m_brcInit || m_brcReset) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset()); |
| } |
| |
| if (!m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE); |
| } |
| |
| // Invoke BRC update FW |
| // When tile replay is enabled, BRC update is also called at tile row level |
| if (m_enableTileReplay) |
| { |
| m_FrameLevelBRCForTileRow = true; |
| m_TileRowLevelBRC = false; |
| } |
| else |
| { |
| m_FrameLevelBRCForTileRow = false; |
| m_TileRowLevelBRC = false; |
| } |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate()); |
| |
| m_brcInit = m_brcReset = false; |
| } |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_hevcVdencAcqpEnabled)) && (m_numPipe == 1)) |
| { |
| // Send command buffer header at the beginning (OS dependent) |
| // frame tracking tag is only added in the last command buffer header |
| bool requestFrameTracking = m_singleTaskPhaseSupported ? |
| m_firstTaskInPhase : |
| m_lastTaskInPhase; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking)); |
| } |
| |
| // clean-up per VDBOX semaphore memory |
| if (currentPipe < 0) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| // Ensure the previous BRC Update is done, before executing PAK |
| if (m_vdencHucUsed && (m_numPipe >= 2)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resBrcPakSemaphoreMem.sResource, 1, MHW_MI_ATOMIC_INC, &cmdBuffer)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand( |
| &m_resBrcPakSemaphoreMem.sResource, |
| &cmdBuffer, |
| m_numPipe)); |
| |
| // Program some placeholder cmds to resolve the hazard between pipe sync |
| MHW_MI_STORE_DATA_PARAMS dataParams; |
| dataParams.pOsResource = &m_resDelayMinus; |
| dataParams.dwResourceOffset = 0; |
| dataParams.dwValue = 0xDE1A; |
| for (uint32_t i = 0; i < m_numDelay; i++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( |
| &cmdBuffer, |
| &dataParams)); |
| } |
| |
| //clean HW semaphore memory |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resBrcPakSemaphoreMem.sResource, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer)); |
| } |
| |
| if ((!IsFirstPass()) && m_vdencHuCConditional2ndPass) |
| { |
| MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams; |
| |
| // Insert conditional batch buffer end |
| MOS_ZeroMemory( |
| &miConditionalBatchBufferEndParams, |
| sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS)); |
| |
| // VDENC uses HuC FW generated semaphore for conditional 2nd pass |
| miConditionalBatchBufferEndParams.presSemaphoreBuffer = |
| &m_resPakMmioBuffer; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd( |
| &cmdBuffer, |
| &miConditionalBatchBufferEndParams)); |
| |
| auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex); |
| |
| uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource |
| |
| // Write back the HCP image control register for RC6 may clean it out |
| MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams; |
| MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams)); |
| miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer; |
| miLoadRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset; |
| miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams)); |
| |
| MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams; |
| MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams)); |
| miStoreRegMemParams.presStoreBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite]; |
| miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS); |
| miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams)); |
| |
| MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams)); |
| miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer; |
| miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset; |
| miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams)); |
| } |
| |
| if (IsFirstPass() && m_osInterface->bTagResourceSync) |
| { |
| // This is a short term WA to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB |
| // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning |
| // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine |
| // as long as Dec/VP/Enc won't depend on this PAK so soon. |
| |
| MOS_RESOURCE globalGpuContextSyncTagBuffer; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource( |
| m_osInterface, |
| &globalGpuContextSyncTagBuffer)); |
| |
| MHW_MI_STORE_DATA_PARAMS params; |
| params.pOsResource = &globalGpuContextSyncTagBuffer; |
| params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal); |
| uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal); |
| params.dwValue = (value > 0) ? (value - 1) : 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, ¶ms)); |
| } |
| |
| if (IsFirstPipe()) |
| { |
| if (IsFirstPass()) |
| { |
| // Check other dependent VDBOXs if they are ready |
| // The inter frame sync method was changed, remove this first, to be tuned |
| // CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForVDBOX(&cmdBuffer)); |
| |
| // clean-up HW semaphore memory |
| if (m_currRefSync && !Mos_ResourceIsNull(&m_currRefSync->resSemaphoreMem.sResource)) |
| { |
| // Ensure this semaphore is not used before. If yes, wait until it is done. |
| // The inter frame sync method was changed, remove this first, to be tuned |
| // CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| // SendHWWaitCommand(&pCurrRefSync->resSemaphoreMem.sResource, &cmdBuffer, 1)); |
| |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_currRefSync->resSemaphoreMem.sResource; |
| storeDataParams.dwResourceOffset = 0; |
| storeDataParams.dwValue = 0; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( |
| &cmdBuffer, |
| &storeDataParams)); |
| } |
| } |
| |
| if (!m_lookaheadUpdate) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES)); |
| } |
| } |
| |
| PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams = dynamic_cast<PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12>(m_vdencInterface->CreateMhwVdboxPipeModeSelectParams()); |
| |
| SetHcpPipeModeSelectParams(*pipeModeSelectParams); |
| |
| // HCP_PIPE_SELECT can not be generated by FW in BRC mode for GEN11+ |
| { |
| MHW_VDBOX_VDENC_CONTROL_STATE_PARAMS vdencControlStateParams; |
| MHW_MI_VD_CONTROL_STATE_PARAMS vdControlStateParams; |
| |
| //set up VDENC_CONTROL_STATE command |
| { |
| MOS_ZeroMemory(&vdencControlStateParams, sizeof(MHW_VDBOX_VDENC_CONTROL_STATE_PARAMS)); |
| vdencControlStateParams.bVdencInitialization = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| static_cast<MhwVdboxVdencInterfaceG12X*>(m_vdencInterface)->AddVdencControlStateCmd(&cmdBuffer, &vdencControlStateParams)); |
| } |
| |
| //set up VD_CONTROL_STATE command |
| { |
| MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS)); |
| vdControlStateParams.initialization = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(&cmdBuffer, &vdControlStateParams)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams)); |
| } |
| |
| MHW_VDBOX_SURFACE_PARAMS srcSurfaceParams; |
| SetHcpSrcSurfaceParams(srcSurfaceParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &srcSurfaceParams)); |
| |
| MHW_VDBOX_SURFACE_PARAMS reconSurfaceParams; |
| SetHcpReconSurfaceParams(reconSurfaceParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &reconSurfaceParams)); |
| |
| // Add the surface state for reference picture, GEN12 HW change |
| reconSurfaceParams.ucSurfaceStateId = CODECHAL_HCP_REF_SURFACE_ID; |
| *m_pipeBufAddrParams = {}; |
| SetHcpPipeBufAddrParams(*m_pipeBufAddrParams); |
| |
| #ifdef _MMC_SUPPORTED |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| m_rsvdState->SetHcpReconSurfaceParams(reconSurfaceParams, m_slotForRecNotFiltered); |
| } |
| #endif |
| #endif |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &reconSurfaceParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPipeBufAddrCmd(&cmdBuffer)); |
| |
| MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams; |
| SetHcpIndObjBaseAddrParams(indObjBaseAddrParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams)); |
| |
| MHW_VDBOX_QM_PARAMS fqmParams, qmParams; |
| SetHcpQmStateParams(fqmParams, qmParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams)); |
| |
| SetVdencPipeModeSelectParams(*pipeModeSelectParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams)); |
| |
| MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2]; |
| SetVdencSurfaceStateParams(srcSurfaceParams, reconSurfaceParams, dsSurfaceParams[0], dsSurfaceParams[1]); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &srcSurfaceParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &reconSurfaceParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2)); |
| |
| SetVdencPipeBufAddrParams(*m_pipeBufAddrParams); |
| m_pipeBufAddrParams->pRawSurfParam = &srcSurfaceParams; |
| m_pipeBufAddrParams->pDecodedReconParam = &reconSurfaceParams; |
| #ifdef _MMC_SUPPORTED |
| m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams); |
| #endif |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams)); |
| |
| MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams; |
| SetHcpPicStateParams(picStateParams); |
| |
| if (m_vdencHucUsed && (!m_hevcPicParams->tiles_enabled_flag)) |
| { |
| // 2nd level batch buffer |
| m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize; |
| HalOcaInterface::OnSubLevelBBStart(cmdBuffer, *m_osInterface->pOsContext, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource, 0, true, 0); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx])); |
| |
| // save offset for next 2nd level batch buffer usage |
| m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset += m_hwInterface->m_vdencBatchBuffer2ndGroupSize; |
| } |
| // When tile is enabled, below commands are needed for each tile instead of each picture |
| else if (!m_hevcPicParams->tiles_enabled_flag) |
| { |
| SetAddCommands(CODECHAL_CMD1, &cmdBuffer, true, m_roundInterValue, m_roundIntraValue, m_lowDelay); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(&cmdBuffer, &picStateParams)); |
| |
| SetAddCommands(CODECHAL_CMD2, &cmdBuffer, true, m_roundInterValue, m_roundIntraValue, m_lowDelay, m_refIdxMapping, m_slotForRecNotFiltered); |
| } |
| |
| // Send HEVC_VP9_RDOQ_STATE command |
| if (m_hevcRdoqEnabled && !m_hevcPicParams->tiles_enabled_flag) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| |
| m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::ExecuteSliceLevel() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| if (!m_hevcPicParams->tiles_enabled_flag) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::ExecuteSliceLevel()); |
| |
| if (m_lookaheadPass) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AnalyzeLookaheadStats()); |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_vdencLaStatsBuffer, |
| CodechalDbgAttr::attrVdencOutput, |
| "_LookaheadStats", |
| m_brcLooaheadStatsBufferSize, |
| 0, |
| CODECHAL_NUM_MEDIA_STATES))); |
| } |
| } |
| else |
| { |
| if (m_vdencHucUsed && m_enableTileReplay) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncWithTileRowLevelBRC()); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncTileLevel()); |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::EncTileLevel() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| int32_t currentPipe = GetCurrentPipe(); |
| int32_t currentPass = GetCurrentPass(); |
| |
| if (currentPipe < 0 || currentPass < 0) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| // Currently this implementation is only for CQP, single pass |
| // Allocate more tile batch when try multiple passes |
| if (IsFirstPass() && IsFirstPipe() && (!m_osInterface->bUsesPatchList)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileLevelBatch()); |
| } |
| |
| PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams = dynamic_cast<PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12>(m_vdencInterface->CreateMhwVdboxPipeModeSelectParams()); |
| |
| SetHcpPipeModeSelectParams(*pipeModeSelectParams); |
| SetVdencPipeModeSelectParams(*pipeModeSelectParams); |
| |
| MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState; |
| SetHcpSliceStateCommonParams(sliceState); |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| MHW_MI_VD_CONTROL_STATE_PARAMS vdControlStateParams; |
| uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex]; |
| |
| MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams; |
| |
| // Construct The third level batch buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructTLB(&m_thirdLevelBatchBuffer)); |
| |
| for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++) |
| { |
| for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++) |
| { |
| PCODEC_ENCODER_SLCDATA slcData = m_slcData; |
| uint32_t slcCount, idx, sliceNumInTile = 0; |
| |
| idx = tileRow * numTileColumns + tileCol; |
| |
| if ((m_numPipe > 1) && (tileCol != currentPipe)) |
| { |
| continue; |
| } |
| |
| MOS_COMMAND_BUFFER tileBatchBuf; |
| PMOS_COMMAND_BUFFER tempCmdBuf = &cmdBuffer; |
| uint8_t *data = nullptr; |
| |
| // Move tile level commands to first level command buffer when use patch list. |
| if (!m_osInterface->bUsesPatchList) |
| { |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = true; |
| |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(m_tileLevelBatchBuffer[currentPass][idx].OsResource), &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory(&tileBatchBuf, sizeof(tileBatchBuf)); |
| tileBatchBuf.pCmdBase = tileBatchBuf.pCmdPtr = (uint32_t *)data; |
| tileBatchBuf.iRemaining = m_tileLevelBatchSize; |
| |
| HalOcaInterface::OnSubLevelBBStart(cmdBuffer, *m_osInterface->pOsContext, &m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource, 0, true, 0); |
| // Add batch buffer start for tile |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_tileLevelBatchBuffer[currentPass][idx])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, &cmdBuffer)); |
| |
| if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHWCounterAutoIncrementEnforced(m_osInterface) && m_enableTileReplay) |
| { |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface()); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->ReadEncodeCounterFromHW( |
| m_osInterface, |
| &tileBatchBuf, |
| &m_resHwCountTileReplay, |
| (uint16_t)idx)); |
| } |
| |
| tempCmdBuf = &tileBatchBuf; |
| } |
| |
| // Construct the tile batch |
| // To be moved to one sub function later |
| // HCP Lock for multiple pipe mode |
| if (m_numPipe > 1) |
| { |
| MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS)); |
| vdControlStateParams.scalableModePipeLock = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(tempCmdBuf, &vdControlStateParams)); |
| } |
| // VDENC_PIPE_MODE_SELECT |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(tempCmdBuf, pipeModeSelectParams)); |
| // HCP_PIPE_MODE_SELECT |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(tempCmdBuf, pipeModeSelectParams)); |
| |
| // 3rd level batch buffer |
| if (m_hevcVdencAcqpEnabled || m_brcEnabled) |
| { |
| HalOcaInterface::OnSubLevelBBStart(cmdBuffer, *m_osInterface->pOsContext, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource, 0, true, 0); |
| m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(tempCmdBuf, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx])); |
| if (m_hevcRdoqEnabled) |
| { |
| MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams; |
| SetHcpPicStateParams(picStateParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(tempCmdBuf, &picStateParams)); |
| } |
| } |
| else |
| { |
| HalOcaInterface::OnSubLevelBBStart(cmdBuffer, *m_osInterface->pOsContext, &m_thirdLevelBatchBuffer.OsResource, 0, true, 0); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(tempCmdBuf, &m_thirdLevelBatchBuffer)); |
| } |
| |
| // HCP_TILE_CODING commmand |
| // Set Tile replay related parameters |
| tileParams[idx].IsFirstPass = IsFirstPass(); |
| tileParams[idx].IsLastPass = IsLastPass(); |
| tileParams[idx].bTileReplayEnable = m_enableTileReplay; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG12*>(m_hcpInterface)->AddHcpTileCodingCmd(tempCmdBuf, &tileParams[idx])); |
| |
| for (slcCount = 0; slcCount < m_numSlices; slcCount++) |
| { |
| bool lastSliceInTile = false, sliceInTile = false; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount, |
| &tileParams[idx], |
| &sliceInTile, |
| &lastSliceInTile)); |
| |
| if (!sliceInTile) |
| { |
| continue; |
| } |
| |
| if (m_hevcVdencAcqpEnabled || m_brcEnabled) |
| { |
| // save offset for next 2nd level batch buffer usage |
| // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice |
| // dwVdencBatchBufferPerSliceConstSize: constant size for each slice |
| // m_vdencBatchBufferPerSliceVarSize: variable size for each slice |
| |
| // starting location for executing slice level cmds |
| // To do: Improvize to only add current slice wSlcCount |
| m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize; |
| |
| for (uint32_t j = 0; j < slcCount; j++) |
| { |
| m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset |
| += (m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[j]); |
| } |
| |
| } |
| |
| SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, tileParams, lastSliceInTile, idx); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(tempCmdBuf, &sliceState)); |
| |
| // Send VD_PIPELINE_FLUSH command for each slice |
| MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams)); |
| vdPipelineFlushParams.Flags.bWaitDoneMFX = 1; |
| vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1; |
| vdPipelineFlushParams.Flags.bFlushVDENC = 1; |
| vdPipelineFlushParams.Flags.bFlushHEVC = 1; |
| vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(tempCmdBuf, &vdPipelineFlushParams)); |
| |
| sliceNumInTile++; |
| } // end of slice |
| |
| if (0 == sliceNumInTile) |
| { |
| // One tile must have at least one slice |
| CODECHAL_ENCODE_ASSERT(false); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| break; |
| } |
| |
| if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1)) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| //HCP unLock for multiple pipe mode |
| if (m_numPipe > 1) |
| { |
| MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS)); |
| vdControlStateParams.scalableModePipeUnlock = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(tempCmdBuf, &vdControlStateParams)); |
| } |
| |
| // Send VD_PIPELINE_FLUSH command |
| MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams)); |
| vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1; |
| vdPipelineFlushParams.Flags.bFlushHEVC = 1; |
| vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(tempCmdBuf, &vdPipelineFlushParams)); |
| |
| // Send MI_FLUSH command |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(tempCmdBuf, &flushDwParams)); |
| |
| // Update head pointer for capture mode |
| if (m_CaptureModeEnable && IsLastPipe()) |
| { |
| MHW_MI_LOAD_REGISTER_IMM_PARAMS registerImmParams; |
| MOS_ZeroMemory(®isterImmParams, sizeof(registerImmParams)); |
| registerImmParams.dwData = 1; |
| registerImmParams.dwRegister = m_VdboxVDENCRegBase[currentPipe] + 0x90; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(tempCmdBuf, ®isterImmParams)); |
| } |
| |
| if (!m_osInterface->bUsesPatchList) |
| { |
| // Add batch buffer end at the end of each tile batch, 2nd level batch buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(tempCmdBuf, nullptr)); |
| |
| std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass()) + "]_PIPE[" + std::to_string(GetCurrentPipe()) + "]_TILELEVEL"; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( |
| tempCmdBuf, |
| CODECHAL_NUM_MEDIA_STATES, |
| pakPassName.data()));) |
| |
| if (data) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &(m_tileLevelBatchBuffer[currentPass][idx].OsResource)); |
| } |
| } |
| |
| } // end of row tile |
| } // end of column tile |
| |
| m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams); |
| |
| // Insert end of sequence/stream if set |
| // To be moved to slice level? |
| if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe()) |
| { |
| MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams; |
| MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams)); |
| pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq; |
| pakInsertObjectParams.bLastPicInStream = m_lastPicInStream; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams)); |
| } |
| |
| // Send VD_CONTROL_STATE (Memory Implict Flush) |
| MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS)); |
| vdControlStateParams.memoryImplicitFlush = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(&cmdBuffer, &vdControlStateParams)); |
| |
| |
| // Send VD_PIPELINE_FLUSH command |
| MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams)); |
| vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1; |
| vdPipelineFlushParams.Flags.bFlushHEVC = 1; |
| vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams)); |
| |
| // Send MI_FLUSH command |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams)); |
| |
| // Set the HW semaphore to indicate current pipe done |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[currentPipe].sResource)) |
| { |
| flushDwParams.pOsResource = &m_resVdBoxSemaphoreMem[currentPipe].sResource; |
| flushDwParams.dwDataDW1 = currentPass + 1; |
| } |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams)); |
| |
| if (IsFirstPipe()) |
| { |
| // first pipe needs to ensure all other pipes are ready |
| for (uint32_t i = 0; i < m_numPipe; i++) |
| { |
| if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[i].sResource)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| SendHWWaitCommand(&m_resVdBoxSemaphoreMem[i].sResource, |
| &cmdBuffer, |
| currentPass + 1)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| SetSemaphoreMem(&m_resVdBoxSemaphoreMem[i].sResource, |
| &cmdBuffer, |
| 0x0)); |
| } |
| } |
| |
| // Whenever ACQP/ BRC is enabled with tiling, PAK Integration kernel is needed. |
| // ACQP/ BRC need PAK integration kernel to aggregate statistics |
| if (m_vdencHucUsed) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer)); |
| } |
| |
| // Use HW stitch commands only in the scalable mode |
| // For single pipe with tile replay, stitch also needed |
| if (m_enableTileStitchByHW) |
| { |
| if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP && !m_hevcVdencAcqpEnabled) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrateStitch(&cmdBuffer)); |
| } |
| // 2nd level BB buffer for stitching cmd |
| // current location to add cmds in 2nd level batch buffer |
| m_HucStitchCmdBatchBuffer.iCurrent = 0; |
| // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass |
| m_HucStitchCmdBatchBuffer.dwOffset = 0; |
| HalOcaInterface::OnSubLevelBBStart(cmdBuffer, *m_osInterface->pOsContext, &m_HucStitchCmdBatchBuffer.OsResource, 0, true, 0); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer)); |
| // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSliceSize(&cmdBuffer)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES)); |
| |
| if (m_numPipe <= 1) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer)); |
| |
| // BRC PAK statistics different for each pass |
| if (m_brcEnabled) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer)); |
| } |
| } |
| |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| // Signal HW semaphore for the reference frame dependency (i.e., current coding frame waits for the reference frame being ready) |
| if (m_currRefSync && !Mos_ResourceIsNull(&m_currRefSync->resSemaphoreMem.sResource)) |
| { |
| // the reference frame semaphore must be set in each pass because of the conditional BRC batch buffer. Some BRC passes could be skipped. |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_currRefSync->resSemaphoreMem.sResource; |
| storeDataParams.dwResourceOffset = 0; |
| storeDataParams.dwValue = 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiStoreDataImmCmd( |
| &cmdBuffer, |
| &storeDataParams)); |
| } |
| } |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); |
| } |
| |
| std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass()) + "]_PIPE[" + std::to_string(GetCurrentPipe()) + "]"; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer( |
| &cmdBuffer, |
| CODECHAL_NUM_MEDIA_STATES, |
| pakPassName.data()));) |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| bool nullRendering = m_videoContextUsesNullHw; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering)); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers()); |
| if (m_mmcState) |
| { |
| m_mmcState->UpdateUserFeatureKey(&m_reconSurface); |
| } |
| ) |
| |
| if (IsFirstPipe() && |
| IsLastPass() && |
| m_signalEnc && |
| m_currRefSync && |
| !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse)) |
| { |
| // signal semaphore |
| MOS_SYNC_PARAMS syncParams; |
| syncParams = g_cInitSyncParams; |
| syncParams.GpuContext = m_videoContext; |
| syncParams.presSyncResource = &m_currRefSync->resSyncObject; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams)); |
| m_currRefSync->uiSemaphoreObjCount++; |
| m_currRefSync->bInUsed = true; |
| } |
| } |
| |
| // Reset parameters for next PAK execution |
| if (IsLastPipe() && |
| IsLastPass()) |
| { |
| if (!m_singleTaskPhaseSupported) |
| { |
| m_osInterface->pfnResetPerfBufferID(m_osInterface); |
| } |
| |
| m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS; |
| |
| if (m_hevcSeqParams->ParallelBRC) |
| { |
| m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite = |
| (m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; |
| } |
| |
| m_newPpsHeader = 0; |
| m_newSeqHeader = 0; |
| m_frameNum++; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::EncWithTileRowLevelBRC() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| int32_t currentPipe = GetCurrentPipe(); |
| int32_t currentPass = GetCurrentPass(); |
| |
| if (currentPipe < 0 || currentPass < 0) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| // Revisit the buffer reuse for multiple frames later |
| if (IsFirstPass() && IsFirstPipe()) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileLevelBatch()); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileRowLevelBRCBatch()); |
| } |
| |
| PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams = dynamic_cast<PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12>(m_vdencInterface->CreateMhwVdboxPipeModeSelectParams()); |
| |
| SetHcpPipeModeSelectParams(*pipeModeSelectParams); |
| SetVdencPipeModeSelectParams(*pipeModeSelectParams); |
| |
| MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState; |
| SetHcpSliceStateCommonParams(sliceState); |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| MHW_MI_VD_CONTROL_STATE_PARAMS vdControlStateParams; |
| uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| |
| MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams; |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex]; |
| |
| m_FrameLevelBRCForTileRow = false; |
| m_TileRowLevelBRC = true; |
| |
| for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++) |
| { |
| for (m_tileRowPass = 0; m_tileRowPass < m_NumPassesForTileReplay; m_tileRowPass++) |
| { |
| for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++) |
| { |
| PCODEC_ENCODER_SLCDATA slcData = m_slcData; |
| uint32_t slcCount, idx, sliceNumInTile = 0; |
| |
| idx = tileRow * numTileColumns + tileCol; |
| |
| if ((m_numPipe > 1) && (tileCol != currentPipe)) |
| { |
| continue; |
| } |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = true; |
| |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource), &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_COMMAND_BUFFER tileBatchBuf; |
| MOS_ZeroMemory(&tileBatchBuf, sizeof(tileBatchBuf)); |
| tileBatchBuf.pCmdBase = tileBatchBuf.pCmdPtr = (uint32_t *)data; |
| tileBatchBuf.iRemaining = m_tileLevelBatchSize; |
| |
| // Add batch buffer start for tile |
| HalOcaInterface::OnSubLevelBBStart(cmdBuffer, *m_osInterface->pOsContext, &m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource, 0, true, 0); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_tileLevelBatchBuffer[m_tileRowPass][idx])); |
| |
| if (m_numPipe > 1) |
| { |
| //wait for last tile row BRC update completion |
| if ((!IsFirstPipe()) && (!IsFirstPassForTileReplay())) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(&m_resTileRowBRCsyncSemaphore, &tileBatchBuf, 0xFF)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSemaphoreMem(&m_resTileRowBRCsyncSemaphore, &tileBatchBuf, 0x0)); |
| } |
| } |
| |
| // Add conditional batch buffer end before tile row level second pass |
| // To unify the single pipe and multiple pipe cases, add this for each tile |
| |
| // To add the sync logic here to make sure the previous tile row BRC update is done |
| |
| if (!IsFirstPassForTileReplay()) |
| { |
| MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS miEnhancedConditionalBatchBufferEndParams; |
| |
| MOS_ZeroMemory( |
| &miEnhancedConditionalBatchBufferEndParams, |
| sizeof(MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS)); |
| |
| // VDENC uses HuC FW generated semaphore for conditional 2nd pass |
| miEnhancedConditionalBatchBufferEndParams.presSemaphoreBuffer = |
| &m_resPakMmioBuffer; |
| |
| miEnhancedConditionalBatchBufferEndParams.dwParamsType = MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS::ENHANCED_PARAMS; |
| miEnhancedConditionalBatchBufferEndParams.enableEndCurrentBatchBuffLevel = true; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd( |
| &tileBatchBuf, |
| (PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS)(&miEnhancedConditionalBatchBufferEndParams))); |
| } |
| |
| // counter should be read after conditional batch buffer |
| // in case second pass is not executed then counter should not be read |
| if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHWCounterAutoIncrementEnforced(m_osInterface) && m_enableTileReplay) |
| { |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface()); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->ReadEncodeCounterFromHW( |
| m_osInterface, |
| &tileBatchBuf, |
| &m_resHwCountTileReplay, |
| (uint16_t)idx)); |
| } |
| |
| // Construct the tile batch |
| // To be moved to one sub function later |
| // HCP Lock for multiple pipe mode |
| if (m_numPipe > 1) |
| { |
| MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS)); |
| vdControlStateParams.scalableModePipeLock = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(&tileBatchBuf, &vdControlStateParams)); |
| } |
| |
| // VDENC_PIPE_MODE_SELECT |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&tileBatchBuf, pipeModeSelectParams)); |
| // HCP_PIPE_MODE_SELECT |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&tileBatchBuf, pipeModeSelectParams)); |
| |
| // 3nd level batch buffer |
| if (m_hevcVdencAcqpEnabled || m_brcEnabled) |
| { |
| m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize; |
| HalOcaInterface::OnSubLevelBBStart(cmdBuffer, *m_osInterface->pOsContext, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource, 0, true, 0); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&tileBatchBuf, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx])); |
| |
| if (m_hevcRdoqEnabled) |
| { |
| MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams; |
| SetHcpPicStateParams(picStateParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&tileBatchBuf, &picStateParams)); |
| } |
| } |
| |
| // HCP_TILE_CODING commmand |
| // Set Tile replay related parameters |
| tileParams[idx].IsFirstPass = IsFirstPassForTileReplay(); |
| tileParams[idx].IsLastPass = IsLastPassForTileReplay(); |
| tileParams[idx].bTileReplayEnable = m_enableTileReplay; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG12*>(m_hcpInterface)->AddHcpTileCodingCmd(&tileBatchBuf, &tileParams[idx])); |
| |
| for (slcCount = 0; slcCount < m_numSlices; slcCount++) |
| { |
| bool lastSliceInTile = false, sliceInTile = false; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount, |
| &tileParams[idx], |
| &sliceInTile, |
| &lastSliceInTile)); |
| |
| if (!sliceInTile) |
| { |
| continue; |
| } |
| |
| if (m_hevcVdencAcqpEnabled || m_brcEnabled) |
| { |
| // save offset for next 2nd level batch buffer usage |
| // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice |
| // dwVdencBatchBufferPerSliceConstSize: constant size for each slice |
| // m_vdencBatchBufferPerSliceVarSize: variable size for each slice |
| |
| // starting location for executing slice level cmds |
| // To do: Improvize to only add current slice wSlcCount |
| m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize; |
| |
| for (uint32_t j = 0; j < slcCount; j++) |
| { |
| m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset |
| += (m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[j]); |
| } |
| } |
| |
| SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, tileParams, lastSliceInTile, idx); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(&tileBatchBuf, &sliceState)); |
| |
| // Send VD_PIPELINE_FLUSH command for each slice |
| MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams)); |
| vdPipelineFlushParams.Flags.bWaitDoneMFX = 1; |
| vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1; |
| vdPipelineFlushParams.Flags.bFlushVDENC = 1; |
| vdPipelineFlushParams.Flags.bFlushHEVC = 1; |
| vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&tileBatchBuf, &vdPipelineFlushParams)); |
| |
| sliceNumInTile++; |
| } // end of slice |
| |
| if (0 == sliceNumInTile) |
| { |
| // One tile must have at least one slice |
| CODECHAL_ENCODE_ASSERT(false); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| break; |
| } |
| |
| if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1)) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| //HCP unLock for multiple pipe mode |
| if (m_numPipe > 1) |
| { |
| MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS)); |
| vdControlStateParams.scalableModePipeUnlock = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(&tileBatchBuf, &vdControlStateParams)); |
| } |
| |
| // Send VD_PIPELINE_FLUSH command |
| MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams)); |
| vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1; |
| vdPipelineFlushParams.Flags.bFlushHEVC = 1; |
| vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&tileBatchBuf, &vdPipelineFlushParams)); |
| |
| // Send MI_FLUSH command |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&tileBatchBuf, &flushDwParams)); |
| |
| // Add batch buffer end at the end of each tile batch, 2nd level batch buffer |
| (&m_tileLevelBatchBuffer[m_tileRowPass][idx])->iCurrent = tileBatchBuf.iOffset; |
| (&m_tileLevelBatchBuffer[m_tileRowPass][idx])->iRemaining = tileBatchBuf.iRemaining; |
| (&m_tileLevelBatchBuffer[m_tileRowPass][idx])->pData = data; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &m_tileLevelBatchBuffer[m_tileRowPass][idx])); |
| |
| if (data) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &(m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource)); |
| } |
| } // end of row tile |
| |
| // Set the semaphore for tile row BRC update |
| if ((m_numPipe > 1) && (!IsFirstPipe()) && (!IsLastPassForTileReplay())) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| SetSemaphoreMem( |
| &m_resVdBoxSemaphoreMem[currentPipe].sResource, |
| &cmdBuffer, |
| 0xFF)); |
| } |
| |
| //turn on protection again in case conditionalbatchbufferexit turns off the protection |
| if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHWCounterAutoIncrementEnforced(m_osInterface) && m_enableTileReplay) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, &cmdBuffer)); |
| } |
| |
| // Run tile row based BRC on pipe 0 |
| if (IsFirstPipe() && (!IsLastPassForTileReplay())) |
| { |
| m_CurrentTileRow = tileRow; |
| m_CurrentPassForTileReplay = m_tileRowPass; |
| m_CurrentPassForOverAll++; |
| |
| // Before tile row BRC update, make sure all pipes are complete |
| if (m_numPipe > 1) |
| { |
| for (uint32_t i = 1; i < m_numPipe; i++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0xFF)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSemaphoreMem(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0x0)); |
| } |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcTileRowUpdate(&cmdBuffer)); |
| } |
| |
| //turn on protection again in case conditionalbatchbufferexit turns off the protection |
| if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHWCounterAutoIncrementEnforced(m_osInterface) && m_enableTileReplay) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, &cmdBuffer)); |
| } |
| |
| //Refresh counter after every tilerowpass |
| if (m_tileRowPass < m_NumPassesForTileReplay - 1) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->RefreshCounter(m_osInterface, &cmdBuffer)); |
| } |
| } |
| |
| // Update head pointer for capture mode |
| if (m_CaptureModeEnable && IsLastPipe()) |
| { |
| MHW_MI_LOAD_REGISTER_IMM_PARAMS registerImmParams; |
| MOS_ZeroMemory(®isterImmParams, sizeof(registerImmParams)); |
| registerImmParams.dwData = 1; |
| registerImmParams.dwRegister = m_VdboxVDENCRegBase[currentPipe] + 0x90; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(&cmdBuffer, ®isterImmParams)); |
| } |
| |
| //refresh encode counter after every rowpass |
| if (tileRow < numTileRows - 1) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->RefreshCounter(m_osInterface, &cmdBuffer)); |
| } |
| } |
| |
| m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams); |
| |
| // Insert end of sequence/stream if se |
| // To be moved to slice level? |
| if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe()) |
| { |
| MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams; |
| MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams)); |
| pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq; |
| pakInsertObjectParams.bLastPicInStream = m_lastPicInStream; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams)); |
| } |
| |
| // Send VD_CONTROL_STATE (Memory Implict Flush) |
| MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS)); |
| vdControlStateParams.memoryImplicitFlush = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(&cmdBuffer, &vdControlStateParams)); |
| |
| // Send VD_PIPELINE_FLUSH command |
| MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams)); |
| vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1; |
| vdPipelineFlushParams.Flags.bFlushHEVC = 1; |
| vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams)); |
| |
| // Send MI_FLUSH command |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams)); |
| |
| // Set the HW semaphore to indicate current pipe done |
| if (m_numPipe > 1) |
| { |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[currentPipe].sResource)) |
| { |
| flushDwParams.pOsResource = &m_resVdBoxSemaphoreMem[currentPipe].sResource; |
| flushDwParams.dwDataDW1 = 0xFF; |
| } |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams)); |
| } |
| |
| if (IsFirstPipe()) |
| { |
| // first pipe needs to ensure all other pipes are ready |
| if (m_numPipe > 1) |
| { |
| for (uint32_t i = 0; i < m_numPipe; i++) |
| { |
| if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[i].sResource)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0xFF)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSemaphoreMem(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0x0)); |
| } |
| } |
| } |
| |
| // Whenever ACQP/ BRC is enabled with tiling, PAK Integration kernel is needed. |
| // ACQP/ BRC need PAK integration kernel to aggregate statistics |
| if (m_vdencHucUsed) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer)); |
| } |
| |
| // Use HW stitch commands only in the scalable mode |
| // For single pipe with tile replay, stitch also needed |
| if (m_enableTileStitchByHW) |
| { |
| // 2nd level BB buffer for stitching cmd |
| // current location to add cmds in 2nd level batch buffer |
| m_HucStitchCmdBatchBuffer.iCurrent = 0; |
| // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass |
| m_HucStitchCmdBatchBuffer.dwOffset = 0; |
| HalOcaInterface::OnSubLevelBBStart(cmdBuffer, *m_osInterface->pOsContext, &m_HucStitchCmdBatchBuffer.OsResource, 0, true, 0); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer)); |
| // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES)); |
| |
| if (m_numPipe <= 1) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer)); |
| |
| // BRC PAK statistics different for each pass |
| if (m_brcEnabled) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer)); |
| } |
| } |
| |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| // Signal HW semaphore for the reference frame dependency (i.e., current coding frame waits for the reference frame being ready) |
| if (m_currRefSync && !Mos_ResourceIsNull(&m_currRefSync->resSemaphoreMem.sResource)) |
| { |
| // the reference frame semaphore must be set in each pass because of the conditional BRC batch buffer. Some BRC passes could be skipped. |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_currRefSync->resSemaphoreMem.sResource; |
| storeDataParams.dwResourceOffset = 0; |
| storeDataParams.dwValue = 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiStoreDataImmCmd( |
| &cmdBuffer, |
| &storeDataParams)); |
| } |
| } |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase || (m_numPipe >= 2)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| bool nullRendering = m_videoContextUsesNullHw; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering)); |
| |
| CODECHAL_DEBUG_TOOL( |
| if (m_mmcState) |
| { |
| m_mmcState->UpdateUserFeatureKey(&m_reconSurface); |
| } |
| ) |
| |
| if (IsFirstPipe() && |
| IsLastPass() && |
| m_signalEnc && |
| m_currRefSync && |
| !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse)) |
| { |
| // signal semaphore |
| MOS_SYNC_PARAMS syncParams; |
| syncParams = g_cInitSyncParams; |
| syncParams.GpuContext = m_videoContext; |
| syncParams.presSyncResource = &m_currRefSync->resSyncObject; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams)); |
| m_currRefSync->uiSemaphoreObjCount++; |
| m_currRefSync->bInUsed = true; |
| } |
| } |
| |
| // Reset parameters for next PAK execution |
| if (IsLastPipe() && |
| IsLastPass()) |
| { |
| if (!m_singleTaskPhaseSupported) |
| { |
| m_osInterface->pfnResetPerfBufferID(m_osInterface); |
| } |
| |
| m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS; |
| |
| if (m_hevcSeqParams->ParallelBRC) |
| { |
| m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite = |
| (m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; |
| } |
| |
| m_newPpsHeader = 0; |
| m_newSeqHeader = 0; |
| m_frameNum++; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::ConstructBatchBufferHuCBRC(PMOS_RESOURCE batchBuffer) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_slcData); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(batchBuffer); |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = true; |
| |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, batchBuffer, &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_COMMAND_BUFFER constructedCmdBuf; |
| MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf)); |
| constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data; |
| constructedCmdBuf.iRemaining = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE); |
| |
| // 1st Group : PIPE_MODE_SELECT |
| // set PIPE_MODE_SELECT command |
| // This is not needed for GEN11/GEN12 as single pass SAO is supported |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams; |
| pipeModeSelectParams.Mode = m_mode; |
| pipeModeSelectParams.bVdencEnabled = true; |
| pipeModeSelectParams.bAdvancedRateControlEnable = true; |
| pipeModeSelectParams.bRdoqEnable = m_hevcRdoqEnabled; |
| pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY; |
| pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY; |
| pipeModeSelectParams.bStreamOutEnabled = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&constructedCmdBuf, &pipeModeSelectParams)); |
| |
| MHW_BATCH_BUFFER TempBatchBuffer; |
| MOS_ZeroMemory(&TempBatchBuffer, sizeof(MHW_BATCH_BUFFER)); |
| TempBatchBuffer.iSize = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE); |
| TempBatchBuffer.pData = data; |
| |
| // set MI_BATCH_BUFFER_END command |
| int32_t cmdBufOffset = constructedCmdBuf.iOffset; |
| |
| TempBatchBuffer.iCurrent = constructedCmdBuf.iOffset; |
| TempBatchBuffer.iRemaining = constructedCmdBuf.iRemaining; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer)); |
| constructedCmdBuf.pCmdPtr += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4; |
| constructedCmdBuf.iOffset = TempBatchBuffer.iCurrent; |
| constructedCmdBuf.iRemaining = TempBatchBuffer.iRemaining; |
| |
| m_miBatchBufferEndCmdSize = constructedCmdBuf.iOffset - cmdBufOffset; |
| CODECHAL_ENCODE_ASSERT(m_hwInterface->m_vdencBatchBuffer1stGroupSize == constructedCmdBuf.iOffset); |
| |
| SetAddCommands(CODECHAL_CMD1, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay); |
| m_picStateCmdStartInBytes = constructedCmdBuf.iOffset; |
| |
| // set HCP_PIC_STATE command |
| MHW_VDBOX_HEVC_PIC_STATE_G12 hevcPicState; |
| SetHcpPicStateParams(hevcPicState); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(&constructedCmdBuf, &hevcPicState)); |
| m_cmd2StartInBytes = constructedCmdBuf.iOffset; |
| |
| SetAddCommands(CODECHAL_CMD2, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay, m_refIdxMapping, m_slotForRecNotFiltered); |
| |
| // set MI_BATCH_BUFFER_END command |
| TempBatchBuffer.iCurrent = constructedCmdBuf.iOffset; |
| TempBatchBuffer.iRemaining = constructedCmdBuf.iRemaining; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer)); |
| constructedCmdBuf.pCmdPtr += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4; |
| constructedCmdBuf.iOffset = TempBatchBuffer.iCurrent; |
| constructedCmdBuf.iRemaining = TempBatchBuffer.iRemaining; |
| |
| CODECHAL_ENCODE_ASSERT(m_hwInterface->m_vdencBatchBuffer2ndGroupSize + m_hwInterface->m_vdencBatchBuffer1stGroupSize |
| == constructedCmdBuf.iOffset); |
| |
| // 3rd Group : HCP_WEIGHTSOFFSETS_STATE + HCP_SLICE_STATE + HCP_PAK_INSERT_OBJECT + VDENC_WEIGHT_OFFSETS_STATE |
| MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState; |
| SetHcpSliceStateCommonParams(sliceState); |
| |
| // slice level cmds for each slice |
| PCODEC_ENCODER_SLCDATA slcData = m_slcData; |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex]; |
| |
| for (uint32_t startLCU = 0, slcCount = 0; slcCount < m_numSlices; slcCount++) |
| { |
| bool lastSliceInTile = false, sliceInTile = false; |
| |
| if (IsFirstPass()) |
| { |
| slcData[slcCount].CmdOffset = startLCU * (m_hcpInterface->GetHcpPakObjSize()) * sizeof(uint32_t); |
| } |
| |
| uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| uint32_t idx = 0; |
| for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++) |
| { |
| for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++) |
| { |
| idx = tileRow * numTileColumns + tileCol; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount, |
| &tileParams[idx], |
| &sliceInTile, |
| &lastSliceInTile)); |
| |
| if (sliceInTile) |
| { |
| break; |
| } |
| } |
| if (sliceInTile) |
| { |
| break; |
| } |
| } |
| |
| SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, tileParams, lastSliceInTile, idx); |
| |
| m_vdencBatchBufferPerSliceVarSize[slcCount] = 0; |
| |
| // set HCP_WEIGHTOFFSET_STATE command |
| // This slice level command is issued, if the weighted_pred_flag or weighted_bipred_flag equals one. |
| // If zero, then this command is not issued. |
| if (m_hevcVdencWeightedPredEnabled) |
| { |
| MHW_VDBOX_HEVC_WEIGHTOFFSET_PARAMS hcpWeightOffsetParams; |
| MOS_ZeroMemory(&hcpWeightOffsetParams, sizeof(hcpWeightOffsetParams)); |
| // HuC based WP ignores App based weights |
| if (!m_hevcPicParams->bEnableGPUWeightedPrediction) |
| { |
| for (auto k = 0; k < 2; k++) // k=0: LIST_0, k=1: LIST_1 |
| { |
| // Luma, Chroma Offset |
| for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++) |
| { |
| hcpWeightOffsetParams.LumaOffsets[k][i] = (int16_t)m_hevcSliceParams->luma_offset[k][i]; |
| // Cb, Cr |
| for (auto j = 0; j < 2; j++) |
| { |
| hcpWeightOffsetParams.ChromaOffsets[k][i][j] = (int16_t)m_hevcSliceParams->chroma_offset[k][i][j]; |
| } |
| } |
| |
| // Luma Weight |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy( |
| &hcpWeightOffsetParams.LumaWeights[k], |
| sizeof(hcpWeightOffsetParams.LumaWeights[k]), |
| &m_hevcSliceParams->delta_luma_weight[k], |
| sizeof(m_hevcSliceParams->delta_luma_weight[k]))); |
| // Chroma Weight |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy( |
| &hcpWeightOffsetParams.ChromaWeights[k], |
| sizeof(hcpWeightOffsetParams.ChromaWeights[k]), |
| &m_hevcSliceParams->delta_chroma_weight[k], |
| sizeof(m_hevcSliceParams->delta_chroma_weight[k]))); |
| } |
| } |
| |
| // 1st HCP_WEIGHTOFFSET_STATE cmd - P & B |
| if (m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE) |
| { |
| hcpWeightOffsetParams.ucList = LIST_0; |
| |
| cmdBufOffset = constructedCmdBuf.iOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(&constructedCmdBuf, nullptr, &hcpWeightOffsetParams)); |
| m_hcpWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset; |
| // 1st HcpWeightOffset cmd is not always inserted (except weighted prediction + P, B slices) |
| m_vdencBatchBufferPerSliceVarSize[slcCount] += m_hcpWeightOffsetStateCmdSize; |
| } |
| |
| // 2nd HCP_WEIGHTOFFSET_STATE cmd - B only |
| if (m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE) |
| { |
| hcpWeightOffsetParams.ucList = LIST_1; |
| |
| cmdBufOffset = constructedCmdBuf.iOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(&constructedCmdBuf, nullptr, &hcpWeightOffsetParams)); |
| m_hcpWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset; |
| // 2nd HcpWeightOffset cmd is not always inserted (except weighted prediction + B slices) |
| m_vdencBatchBufferPerSliceVarSize[slcCount] += m_hcpWeightOffsetStateCmdSize; |
| } |
| } |
| |
| // set HCP_SLICE_STATE command |
| cmdBufOffset = constructedCmdBuf.iOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSliceStateCmd(&constructedCmdBuf, &sliceState)); |
| m_hcpSliceStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset; |
| |
| // set 1st HCP_PAK_INSERT_OBJECT command |
| // insert AU, SPS, PPS headers before first slice header |
| if (sliceState.bInsertBeforeSliceHeaders) |
| { |
| uint32_t maxBytesInPakInsertObjCmd = ((2 << 11) - 1) * 4; // 12 bits for DwordLength field in PAK_INSERT_OBJ cmd |
| m_1stPakInsertObjectCmdSize = 0; |
| |
| for (auto i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++) |
| { |
| uint32_t nalUnitPosiSize = sliceState.ppNalUnitParams[i]->uiSize; |
| uint32_t nalUnitPosiOffset = sliceState.ppNalUnitParams[i]->uiOffset; |
| |
| while (nalUnitPosiSize > 0) |
| { |
| uint32_t bitSize = MOS_MIN(maxBytesInPakInsertObjCmd * 8, nalUnitPosiSize * 8); |
| uint32_t offSet = nalUnitPosiOffset; |
| |
| MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams; |
| MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams)); |
| pakInsertObjectParams.bEmulationByteBitsInsert = sliceState.ppNalUnitParams[i]->bInsertEmulationBytes; |
| pakInsertObjectParams.uiSkipEmulationCheckCount = sliceState.ppNalUnitParams[i]->uiSkipEmulationCheckCount; |
| pakInsertObjectParams.pBsBuffer = sliceState.pBsBuffer; |
| pakInsertObjectParams.dwBitSize = bitSize; |
| pakInsertObjectParams.dwOffset = offSet; |
| |
| if (nalUnitPosiSize > maxBytesInPakInsertObjCmd) |
| { |
| nalUnitPosiSize -= maxBytesInPakInsertObjCmd; |
| nalUnitPosiOffset += maxBytesInPakInsertObjCmd; |
| } |
| else |
| { |
| nalUnitPosiSize = 0; |
| } |
| |
| cmdBufOffset = constructedCmdBuf.iOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&constructedCmdBuf, &pakInsertObjectParams)); |
| |
| // this info needed again in BrcUpdate HuC FW const |
| m_1stPakInsertObjectCmdSize += (constructedCmdBuf.iOffset - cmdBufOffset); |
| } |
| } |
| // 1st PakInsertObject cmd is not always inserted for each slice |
| m_vdencBatchBufferPerSliceVarSize[slcCount] += m_1stPakInsertObjectCmdSize; |
| } |
| |
| // set 2nd HCP_PAK_INSERT_OBJECT command |
| // Insert slice header |
| MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams; |
| MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams)); |
| pakInsertObjectParams.bLastHeader = true; |
| pakInsertObjectParams.bEmulationByteBitsInsert = true; |
| |
| // App does the slice header packing, set the skip count passed by the app |
| pakInsertObjectParams.uiSkipEmulationCheckCount = sliceState.uiSkipEmulationCheckCount; |
| pakInsertObjectParams.pBsBuffer = sliceState.pBsBuffer; |
| pakInsertObjectParams.dwBitSize = sliceState.dwLength; |
| pakInsertObjectParams.dwOffset = sliceState.dwOffset; |
| |
| // For HEVC VDEnc Dynamic Slice |
| if (m_hevcSeqParams->SliceSizeControl) |
| { |
| pakInsertObjectParams.bLastHeader = false; |
| pakInsertObjectParams.bEmulationByteBitsInsert = false; |
| pakInsertObjectParams.dwBitSize = m_hevcSliceParams->BitLengthSliceHeaderStartingPortion; |
| pakInsertObjectParams.bResetBitstreamStartingPos = true; |
| } |
| |
| uint32_t byteSize = (pakInsertObjectParams.dwBitSize + 7) >> 3; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject( |
| &constructedCmdBuf, |
| &pakInsertObjectParams)); |
| |
| // 2nd PakInsertObject cmd is always inserted for each slice |
| // so already reflected in dwVdencBatchBufferPerSliceConstSize |
| m_vdencBatchBufferPerSliceVarSize[slcCount] += (MOS_ALIGN_CEIL(byteSize, sizeof(uint32_t))) / sizeof(uint32_t) * 4; |
| |
| // set 3rd HCP_PAK_INSERT_OBJECT command |
| if (m_hevcSeqParams->SliceSizeControl) |
| { |
| // Send HCP_PAK_INSERT_OBJ command. For dynamic slice, we are skipping the beginning part of slice header. |
| pakInsertObjectParams.bLastHeader = true; |
| pakInsertObjectParams.dwBitSize = sliceState.dwLength - m_hevcSliceParams->BitLengthSliceHeaderStartingPortion; |
| pakInsertObjectParams.dwOffset += ((m_hevcSliceParams->BitLengthSliceHeaderStartingPortion + 7) / 8); // Skips the first 5 bytes which is Start Code + Nal Unit Header |
| pakInsertObjectParams.bResetBitstreamStartingPos = true; |
| |
| cmdBufOffset = constructedCmdBuf.iOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject( |
| &constructedCmdBuf, |
| &pakInsertObjectParams)); |
| // 3rd PakInsertObject cmd is not always inserted for each slice |
| m_vdencBatchBufferPerSliceVarSize[slcCount] += (constructedCmdBuf.iOffset - cmdBufOffset); |
| } |
| |
| // set VDENC_WEIGHT_OFFSETS_STATE command |
| MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams; |
| MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams)); |
| vdencWeightOffsetParams.bWeightedPredEnabled = m_hevcVdencWeightedPredEnabled; |
| vdencWeightOffsetParams.isLowDelay = m_lowDelay; |
| |
| if (vdencWeightOffsetParams.bWeightedPredEnabled) |
| { |
| uint8_t lumaLog2WeightDenom = m_hevcPicParams->bEnableGPUWeightedPrediction ? 6 : m_hevcSliceParams->luma_log2_weight_denom; |
| vdencWeightOffsetParams.dwDenom = 1 << lumaLog2WeightDenom; |
| |
| if (!m_hevcPicParams->bEnableGPUWeightedPrediction) |
| { |
| // Luma Offsets |
| for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++) |
| { |
| vdencWeightOffsetParams.LumaOffsets[0][i] = (int16_t)m_hevcSliceParams->luma_offset[0][i]; |
| vdencWeightOffsetParams.LumaOffsets[1][i] = (int16_t)m_hevcSliceParams->luma_offset[1][i]; |
| } |
| |
| // Luma Weights |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy( |
| &vdencWeightOffsetParams.LumaWeights[0], |
| sizeof(vdencWeightOffsetParams.LumaWeights[0]), |
| &m_hevcSliceParams->delta_luma_weight[0], |
| sizeof(m_hevcSliceParams->delta_luma_weight[0])), |
| "Failed to copy luma weight 0 memory."); |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy( |
| &vdencWeightOffsetParams.LumaWeights[1], |
| sizeof(vdencWeightOffsetParams.LumaWeights[1]), |
| &m_hevcSliceParams->delta_luma_weight[1], |
| sizeof(m_hevcSliceParams->delta_luma_weight[1])), |
| "Failed to copy luma weight 1 memory."); |
| } |
| } |
| |
| cmdBufOffset = constructedCmdBuf.iOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd( |
| &constructedCmdBuf, |
| nullptr, |
| &vdencWeightOffsetParams)); |
| m_vdencWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset; |
| |
| // set MI_BATCH_BUFFER_END command |
| TempBatchBuffer.iCurrent = constructedCmdBuf.iOffset; |
| TempBatchBuffer.iRemaining = constructedCmdBuf.iRemaining; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer)); |
| constructedCmdBuf.pCmdPtr += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4; |
| constructedCmdBuf.iOffset = TempBatchBuffer.iCurrent; |
| constructedCmdBuf.iRemaining = TempBatchBuffer.iRemaining; |
| |
| m_vdencBatchBufferPerSliceVarSize[slcCount] += ENCODE_VDENC_HEVC_PADDING_DW_SIZE * 4; |
| for (auto i = 0; i < ENCODE_VDENC_HEVC_PADDING_DW_SIZE ; i++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiNoop(&constructedCmdBuf, nullptr)); |
| } |
| startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice; |
| } |
| |
| if (data) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, batchBuffer); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::ConstructTLB(PMHW_BATCH_BUFFER thirdLevelBatchBuffer) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(thirdLevelBatchBuffer); |
| |
| MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams; |
| SetHcpPicStateParams(picStateParams); |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = true; |
| |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(thirdLevelBatchBuffer->OsResource), &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_COMMAND_BUFFER constructedCmdBuf; |
| MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf)); |
| constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data; |
| constructedCmdBuf.iRemaining = m_thirdLBSize; |
| |
| SetAddCommands(CODECHAL_CMD1, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay); |
| |
| // HCP_PIC_STATE |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(&constructedCmdBuf, &picStateParams)); |
| |
| SetAddCommands(CODECHAL_CMD2, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay, m_refIdxMapping, m_slotForRecNotFiltered); |
| |
| // Send HEVC_VP9_RDOQ_STATE command |
| if (m_hevcRdoqEnabled) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&constructedCmdBuf, &picStateParams)); |
| } |
| |
| thirdLevelBatchBuffer->iCurrent = constructedCmdBuf.iOffset; |
| thirdLevelBatchBuffer->iRemaining = constructedCmdBuf.iRemaining; |
| thirdLevelBatchBuffer->pData = data; |
| // set MI_BATCH_BUFFER_END command |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, thirdLevelBatchBuffer)); |
| |
| std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass()) + "]_PIPE[" + std::to_string(GetCurrentPipe()) + "]_TLB"; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( |
| &constructedCmdBuf, |
| CODECHAL_NUM_MEDIA_STATES, |
| pakPassName.data()));) |
| |
| if (data) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &(thirdLevelBatchBuffer->OsResource)); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCBrcInitReset() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = true; |
| |
| // Setup BrcInit DMEM |
| auto hucVdencBrcInitDmem = (PCODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G12)m_osInterface->pfnLockResource( |
| m_osInterface, &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(hucVdencBrcInitDmem); |
| MOS_ZeroMemory(hucVdencBrcInitDmem, sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G12)); |
| |
| hucVdencBrcInitDmem->BRCFunc_U32 = (m_enableTileReplay ? 1 : 0) << 7; //bit0 0: Init; 1: Reset, bit7 0: frame-based; 1: tile-based |
| hucVdencBrcInitDmem->UserMaxFrame = GetProfileLevelMaxFrameSize(); |
| hucVdencBrcInitDmem->InitBufFull_U32 = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit); |
| hucVdencBrcInitDmem->BufSize_U32 = m_hevcSeqParams->VBVBufferSizeInBit; |
| hucVdencBrcInitDmem->TargetBitrate_U32 = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS; // map DDI params(in Kbits) to huc (in bits) |
| hucVdencBrcInitDmem->MaxRate_U32 = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS; |
| hucVdencBrcInitDmem->MinRate_U32 = 0; |
| hucVdencBrcInitDmem->FrameRateM_U32 = m_hevcSeqParams->FrameRate.Numerator; |
| hucVdencBrcInitDmem->FrameRateD_U32 = m_hevcSeqParams->FrameRate.Denominator; |
| hucVdencBrcInitDmem->ACQP_U32 = 0; |
| if (m_hevcSeqParams->UserMaxPBFrameSize > 0) |
| { |
| //Backup CodingType as need to set it as B_Tpye to get MaxFrameSize for P/B frames. |
| auto CodingTypeTemp = m_hevcPicParams->CodingType; |
| m_hevcPicParams->CodingType = B_TYPE; |
| hucVdencBrcInitDmem->ProfileLevelMaxFramePB_U32 = GetProfileLevelMaxFrameSize(); |
| m_hevcPicParams->CodingType = CodingTypeTemp; |
| } |
| else |
| { |
| hucVdencBrcInitDmem->ProfileLevelMaxFramePB_U32 = hucVdencBrcInitDmem->UserMaxFrame; |
| } |
| |
| if (m_brcEnabled) |
| { |
| switch (m_hevcSeqParams->RateControlMethod) |
| { |
| case RATECONTROL_ICQ: |
| hucVdencBrcInitDmem->BRCFlag = 0; |
| break; |
| case RATECONTROL_CBR: |
| hucVdencBrcInitDmem->BRCFlag = 1; |
| break; |
| case RATECONTROL_VBR: |
| hucVdencBrcInitDmem->BRCFlag = 2; |
| hucVdencBrcInitDmem->ACQP_U32 = 0; |
| break; |
| case RATECONTROL_VCM: |
| hucVdencBrcInitDmem->BRCFlag = 3; |
| break; |
| case RATECONTROL_QVBR: |
| hucVdencBrcInitDmem->BRCFlag = 2; |
| hucVdencBrcInitDmem->ACQP_U32 = m_hevcSeqParams->ICQQualityFactor;; |
| break; |
| default: |
| break; |
| } |
| |
| // Low Delay BRC |
| if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) |
| { |
| hucVdencBrcInitDmem->BRCFlag = 5; |
| } |
| |
| switch (m_hevcSeqParams->MBBRC) |
| { |
| case mbBrcInternal: |
| case mbBrcEnabled: |
| hucVdencBrcInitDmem->CuQpCtrl_U8 = 3; |
| break; |
| case mbBrcDisabled: |
| hucVdencBrcInitDmem->CuQpCtrl_U8 = 0; |
| break; |
| default: |
| break; |
| } |
| } |
| else if (m_hevcVdencAcqpEnabled) |
| { |
| hucVdencBrcInitDmem->BRCFlag = 0; |
| |
| // 0=No CUQP; 1=CUQP for I-frame; 2=CUQP for P/B-frame |
| // bit operation, bit 1 for I-frame, bit 2 for P/B frame |
| // In VDENC mode, the field "Cu_Qp_Delta_Enabled_Flag" should always be set to 1. |
| if (m_hevcSeqParams->QpAdjustment) |
| { |
| hucVdencBrcInitDmem->CuQpCtrl_U8 = 3; // wPictureCodingType I:0, P:1, B:2 |
| } |
| else |
| { |
| hucVdencBrcInitDmem->CuQpCtrl_U8 = 0; // wPictureCodingType I:0, P:1, B:2 |
| } |
| } |
| |
| hucVdencBrcInitDmem->SSCFlag = m_hevcSeqParams->SliceSizeControl; |
| |
| // LDB case, NumP=0 & NumB=100, but GopP=100 & GopB=0 |
| |
| hucVdencBrcInitDmem->GopP_U16 = m_hevcSeqParams->GopPicSize - m_hevcSeqParams->NumOfBInGop[0] - 1; |
| hucVdencBrcInitDmem->GopB_U16 = (uint16_t)m_hevcSeqParams->NumOfBInGop[0]; |
| |
| hucVdencBrcInitDmem->FrameWidth_U16 = (uint16_t)m_frameWidth; |
| hucVdencBrcInitDmem->FrameHeight_U16 = (uint16_t)m_frameHeight; |
| |
| hucVdencBrcInitDmem->GopB1_U16 = (uint16_t)m_hevcSeqParams->NumOfBInGop[1]; |
| hucVdencBrcInitDmem->GopB2_U16 = (uint16_t)m_hevcSeqParams->NumOfBInGop[2]; |
| |
| hucVdencBrcInitDmem->MinQP_U8 = m_hevcPicParams->BRCMinQp < 10 ? 10 : m_hevcPicParams->BRCMinQp; // Setting values from arch spec |
| hucVdencBrcInitDmem->MaxQP_U8 = m_hevcPicParams->BRCMaxQp < 10 ? 51 : (m_hevcPicParams->BRCMaxQp > 51 ? 51 : m_hevcPicParams->BRCMaxQp); // Setting values from arch spec |
| |
| hucVdencBrcInitDmem->MaxBRCLevel_U8 = 1; |
| hucVdencBrcInitDmem->BRCPyramidEnable_U8 = 0; |
| //QP modulation settings |
| if (m_hevcSeqParams->HierarchicalFlag) |
| { |
| // Low delay P/B max support Gop 4, layer 3; RA max support Gop 8, layer 4 |
| hucVdencBrcInitDmem->MaxBRCLevel_U8 = m_hevcSeqParams->LowDelayMode ? 3 : 4; |
| hucVdencBrcInitDmem->BRCPyramidEnable_U8 = 1; |
| } |
| |
| hucVdencBrcInitDmem->LumaBitDepth_U8 = m_hevcSeqParams->bit_depth_luma_minus8 + 8; |
| hucVdencBrcInitDmem->ChromaBitDepth_U8 = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; |
| |
| if (m_hevcSeqParams->SourceBitDepth == ENCODE_HEVC_BIT_DEPTH_10) |
| { |
| hucVdencBrcInitDmem->LumaBitDepth_U8 = 10; |
| hucVdencBrcInitDmem->ChromaBitDepth_U8 = 10; |
| } |
| |
| if ((hucVdencBrcInitDmem->LowDelayMode_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW))) |
| { |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshPB0_S8, 8 * sizeof(int8_t), (void *)m_lowdelayDevThreshPB, 8 * sizeof(int8_t)); |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshVBR0_S8, 8 * sizeof(int8_t), (void*)m_lowdelayDevThreshVBR, 8 * sizeof(int8_t)); |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshI0_S8, 8 * sizeof(int8_t), (void*)m_lowdelayDevThreshI, 8 * sizeof(int8_t)); |
| } |
| else |
| { |
| static int8_t DevThreshPB0_S8[8]; |
| static int8_t DevThreshVBR0_S8[8]; |
| static int8_t DevThreshI0_S8[8]; |
| |
| uint64_t inputbitsperframe = uint64_t(hucVdencBrcInitDmem->MaxRate_U32*100. / (hucVdencBrcInitDmem->FrameRateM_U32 * 100.0 / hucVdencBrcInitDmem->FrameRateD_U32)); |
| if (m_brcEnabled && !hucVdencBrcInitDmem->BufSize_U32) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("VBV BufSize should not be 0 for BRC case\n"); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| } |
| uint64_t vbvsz = hucVdencBrcInitDmem->BufSize_U32; |
| double bps_ratio = inputbitsperframe / (vbvsz / m_devStdFPS); |
| if (bps_ratio < m_bpsRatioLow) bps_ratio = m_bpsRatioLow; |
| if (bps_ratio > m_bpsRatioHigh) bps_ratio = m_bpsRatioHigh; |
| |
| for (int i = 0; i < m_numDevThreshlds / 2; i++) { |
| DevThreshPB0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshPBFPNEG[i], bps_ratio)); |
| DevThreshPB0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_postMultPB*pow(m_devThreshPBFPPOS[i], bps_ratio)); |
| |
| DevThreshI0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshIFPNEG[i], bps_ratio)); |
| DevThreshI0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_postMultPB*pow(m_devThreshIFPPOS[i], bps_ratio)); |
| |
| DevThreshVBR0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshVBRNEG[i], bps_ratio)); |
| DevThreshVBR0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_posMultVBR*pow(m_devThreshVBRPOS[i], bps_ratio)); |
| } |
| |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshPB0_S8, 8 * sizeof(int8_t), (void*)DevThreshPB0_S8, 8 * sizeof(int8_t)); |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshVBR0_S8, 8 * sizeof(int8_t), (void*)DevThreshVBR0_S8, 8 * sizeof(int8_t)); |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshI0_S8, 8 * sizeof(int8_t), (void*)DevThreshI0_S8, 8 * sizeof(int8_t)); |
| } |
| |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshP0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshP0, 4 * sizeof(int8_t)); |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshB0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshB0, 4 * sizeof(int8_t)); |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshI0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshI0, 4 * sizeof(int8_t)); |
| |
| if (m_brcEnabled) |
| { |
| // initQPIP, initQPB values will be used for BRC in the future |
| int32_t initQPIP = 0, initQPB = 0; |
| ComputeVDEncInitQP(initQPIP, initQPB); |
| hucVdencBrcInitDmem->InitQPIP_U8 = (uint8_t)initQPIP; |
| hucVdencBrcInitDmem->InitQPB_U8 = (uint8_t)initQPB; |
| } |
| else |
| { |
| hucVdencBrcInitDmem->InitQPIP_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta; |
| hucVdencBrcInitDmem->InitQPB_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta; |
| } |
| |
| hucVdencBrcInitDmem->TopFrmSzThrForAdapt2Pass_U8 = 32; |
| hucVdencBrcInitDmem->BotFrmSzThrForAdapt2Pass_U8 = 24; |
| |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshP0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshP0, 7 * sizeof(uint8_t)); |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshB0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshB0, 7 * sizeof(uint8_t)); |
| MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshI0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshI0, 7 * sizeof(uint8_t)); |
| |
| if (m_vdencStreamInEnabled && m_hevcPicParams->NumROI && !m_vdencNativeROIEnabled) |
| { |
| hucVdencBrcInitDmem->StreamInROIEnable_U8 = 1; |
| hucVdencBrcInitDmem->StreamInSurfaceEnable_U8 = 1; |
| } |
| |
| hucVdencBrcInitDmem->TopQPDeltaThrForAdapt2Pass_U8 = 2; |
| hucVdencBrcInitDmem->BotQPDeltaThrForAdapt2Pass_U8 = 1; |
| |
| if ((m_hevcSeqParams->SlidingWindowSize != 0) && (m_hevcSeqParams->MaxBitRatePerSlidingWindow != 0)) |
| { |
| hucVdencBrcInitDmem->SlidingWindow_Size_U32 = m_hevcSeqParams->SlidingWindowSize; |
| hucVdencBrcInitDmem->SLIDINGWINDOW_MaxRateRatio = m_hevcSeqParams->MaxBitRatePerSlidingWindow * 100 / m_hevcSeqParams->TargetBitRate; |
| } |
| else |
| { |
| if (m_hevcSeqParams->FrameRate.Denominator == 0) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("FrameRate.Deminator is zero!"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| uint32_t framerate = m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator; |
| hucVdencBrcInitDmem->SlidingWindow_Size_U32 = MOS_MIN(framerate, 60); |
| hucVdencBrcInitDmem->SLIDINGWINDOW_MaxRateRatio = 120; |
| } |
| |
| // Tile Row based BRC |
| if (m_enableTileReplay) |
| { |
| uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3; |
| uint32_t residual = (1 << shift) - 1; |
| hucVdencBrcInitDmem->SlideWindowRC = 0; //Reserved for now |
| hucVdencBrcInitDmem->MaxLogCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| hucVdencBrcInitDmem->FrameWidthInLCU = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift; |
| hucVdencBrcInitDmem->FrameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift; |
| } |
| |
| // Long term reference |
| hucVdencBrcInitDmem->LongTermRefEnable_U8 = true; |
| hucVdencBrcInitDmem->LongTermRefMsdk_U8 = true; |
| hucVdencBrcInitDmem->IsLowDelay_U8 = m_lowDelay; |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx]); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetConstDataHuCBrcUpdate() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = true; |
| |
| auto hucConstData = (PCODECHAL_VDENC_HEVC_HUC_BRC_CONSTANT_DATA_G12)m_osInterface->pfnLockResource( |
| m_osInterface, &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(hucConstData); |
| |
| MOS_SecureMemcpy(hucConstData->SLCSZ_THRDELTAI_U16, sizeof(m_hucConstantData), m_hucConstantData, sizeof(m_hucConstantData)); |
| |
| MOS_SecureMemcpy(hucConstData->RDQPLambdaI, sizeof(m_rdQpLambdaI), m_rdQpLambdaI, sizeof(m_rdQpLambdaI)); |
| MOS_SecureMemcpy(hucConstData->RDQPLambdaP, sizeof(m_rdQpLambdaP), m_rdQpLambdaP, sizeof(m_rdQpLambdaP)); |
| |
| if (m_hevcVisualQualityImprovement) |
| { |
| MOS_SecureMemcpy(hucConstData->SADQPLambdaI, sizeof(m_sadQpLambdaI), m_sadQpLambdaI_VQI, sizeof(m_sadQpLambdaI_VQI)); |
| MOS_SecureMemcpy(hucConstData->PenaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode), m_penaltyForIntraNonDC32x32PredMode_VQI, sizeof(m_penaltyForIntraNonDC32x32PredMode_VQI)); |
| } |
| else |
| { |
| MOS_SecureMemcpy(hucConstData->SADQPLambdaI, sizeof(m_sadQpLambdaI), m_sadQpLambdaI, sizeof(m_sadQpLambdaI)); |
| MOS_SecureMemcpy(hucConstData->PenaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode), m_penaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode)); |
| } |
| |
| MOS_SecureMemcpy(hucConstData->SADQPLambdaP, sizeof(m_sadQpLambdaP), m_sadQpLambdaP, sizeof(m_sadQpLambdaP)); |
| |
| if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) |
| { |
| const int numEstrateThreshlds = 7; |
| |
| for (int i = 0; i < numEstrateThreshlds + 1; i++) |
| { |
| for (int j = 0; j < m_numDevThreshlds + 1; j++) |
| { |
| hucConstData->FrmSzAdjTabI_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszI[j][i]; |
| hucConstData->FrmSzAdjTabP_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszP[j][i]; |
| hucConstData->FrmSzAdjTabB_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszB[j][i]; |
| } |
| } |
| } |
| |
| // ModeCosts depends on frame type |
| if (m_pictureCodingType == I_TYPE) |
| { |
| MOS_SecureMemcpy(hucConstData->ModeCosts, sizeof(m_hucModeCostsIFrame), m_hucModeCostsIFrame, sizeof(m_hucModeCostsIFrame)); |
| } |
| else |
| { |
| MOS_SecureMemcpy(hucConstData->ModeCosts, sizeof(m_hucModeCostsPbFrame), m_hucModeCostsPbFrame, sizeof(m_hucModeCostsPbFrame)); |
| } |
| |
| // starting location in batch buffer for each slice |
| uint32_t baseLocation = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize; |
| uint32_t currentLocation = baseLocation; |
| |
| auto slcData = m_slcData; |
| // HCP_WEIGHTSOFFSETS_STATE + HCP_SLICE_STATE + HCP_PAK_INSERT_OBJECT + VDENC_WEIGHT_OFFSETS_STATE |
| for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++) |
| { |
| auto hevcSlcParams = &m_hevcSliceParams[slcCount]; |
| // HuC FW require unit in Bytes |
| hucConstData->Slice[slcCount].SizeOfCMDs |
| = (uint16_t)(m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[slcCount]); |
| |
| // HCP_WEIGHTOFFSET_STATE cmd |
| if (m_hevcVdencWeightedPredEnabled) |
| { |
| // 1st HCP_WEIGHTOFFSET_STATE cmd - P & B |
| if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE) |
| { |
| hucConstData->Slice[slcCount].HcpWeightOffsetL0_StartInBytes = (uint16_t)currentLocation; // HCP_WEIGHTOFFSET_L0 starts in byte from beginning of the SLB. 0xFFFF means unavailable in SLB |
| currentLocation += m_hcpWeightOffsetStateCmdSize; |
| } |
| |
| // 2nd HCP_WEIGHTOFFSET_STATE cmd - B |
| if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE) |
| { |
| hucConstData->Slice[slcCount].HcpWeightOffsetL1_StartInBytes = (uint16_t)currentLocation; // HCP_WEIGHTOFFSET_L1 starts in byte from beginning of the SLB. 0xFFFF means unavailable in SLB |
| currentLocation += m_hcpWeightOffsetStateCmdSize; |
| } |
| } |
| else |
| { |
| // 0xFFFF means unavailable in SLB |
| hucConstData->Slice[slcCount].HcpWeightOffsetL0_StartInBytes = 0xFFFF; |
| hucConstData->Slice[slcCount].HcpWeightOffsetL1_StartInBytes = 0xFFFF; |
| } |
| |
| // HCP_SLICE_STATE cmd |
| hucConstData->Slice[slcCount].SliceState_StartInBytes = (uint16_t)currentLocation; // HCP_WEIGHTOFFSET is not needed |
| currentLocation += m_hcpSliceStateCmdSize; |
| |
| // VDENC_WEIGHT_OFFSETS_STATE cmd |
| hucConstData->Slice[slcCount].VdencWeightOffset_StartInBytes // VdencWeightOffset cmd is the last one expect BatchBufferEnd cmd |
| = (uint16_t)(baseLocation + hucConstData->Slice[slcCount].SizeOfCMDs - m_vdencWeightOffsetStateCmdSize - m_miBatchBufferEndCmdSize - ENCODE_VDENC_HEVC_PADDING_DW_SIZE * 4); |
| |
| // logic from PakInsertObject cmd |
| uint32_t bitSize = (m_hevcSeqParams->SliceSizeControl) ? (hevcSlcParams->BitLengthSliceHeaderStartingPortion) : slcData[slcCount].BitSize; // 40 for HEVC VDEnc Dynamic Slice |
| uint32_t byteSize = (bitSize + 7) >> 3; |
| uint32_t sliceHeaderSizeInBytes = (bitSize + 7) >> 3; |
| // 1st PakInsertObject cmd with AU, SPS, PPS headers only exists for the first slice |
| if (slcCount == 0) |
| { |
| // assumes that there is no 3rd PakInsertObject cmd for SSC |
| currentLocation += m_1stPakInsertObjectCmdSize; |
| } |
| |
| hucConstData->Slice[slcCount].SliceHeaderPIO_StartInBytes = (uint16_t)currentLocation; |
| |
| // HuC FW requires true slice header size in bits without byte alignment |
| hucConstData->Slice[slcCount].SliceHeader_SizeInBits = (uint16_t)(sliceHeaderSizeInBytes * 8); |
| if (!IsFirstPass()) |
| { |
| PBSBuffer bsBuffer = &m_bsBuffer; |
| CODECHAL_ENCODE_CHK_NULL_RETURN(bsBuffer); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(bsBuffer->pBase); |
| uint8_t *sliceHeaderLastByte = (uint8_t*)(bsBuffer->pBase + slcData[slcCount].SliceOffset + sliceHeaderSizeInBytes - 1); |
| for (auto i = 0; i < 8; i++) |
| { |
| uint8_t mask = 1 << i; |
| if (*sliceHeaderLastByte & mask) |
| { |
| hucConstData->Slice[slcCount].SliceHeader_SizeInBits -= (i + 1); |
| break; |
| } |
| } |
| } |
| |
| if (m_hevcVdencWeightedPredEnabled) |
| { |
| hucConstData->Slice[slcCount].WeightTable_StartInBits = (uint16_t)hevcSlcParams->PredWeightTableBitOffset; |
| hucConstData->Slice[slcCount].WeightTable_EndInBits = (uint16_t)(hevcSlcParams->PredWeightTableBitOffset + (hevcSlcParams->PredWeightTableBitLength)); |
| } |
| else |
| { |
| // number of bits from beginning of slice header, 0xffff means not awailable |
| hucConstData->Slice[slcCount].WeightTable_StartInBits = 0xFFFF; |
| hucConstData->Slice[slcCount].WeightTable_EndInBits = 0xFFFF; |
| } |
| |
| baseLocation += hucConstData->Slice[slcCount].SizeOfCMDs; |
| currentLocation = baseLocation; |
| } |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx]); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCBrcUpdate() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = true; |
| uint32_t currentPass = m_enableTileReplay ? m_CurrentPassForOverAll : GetCurrentPass(); |
| |
| // Program update DMEM |
| auto hucVdencBrcUpdateDmem = (PCODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G12)m_osInterface->pfnLockResource( |
| m_osInterface, &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(hucVdencBrcUpdateDmem); |
| MOS_ZeroMemory(hucVdencBrcUpdateDmem, sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G12)); |
| |
| hucVdencBrcUpdateDmem->TARGETSIZE_U32 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)? m_hevcSeqParams->InitVBVBufferFullnessInBit : |
| MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit); |
| hucVdencBrcUpdateDmem->FrameID_U32 = m_storeData; // frame number |
| MOS_SecureMemcpy(hucVdencBrcUpdateDmem->startGAdjFrame_U16, 4 * sizeof(uint16_t), (void*)m_startGAdjFrame, 4 * sizeof(uint16_t)); |
| hucVdencBrcUpdateDmem->TargetSliceSize_U16 = (uint16_t)m_hevcPicParams->MaxSliceSizeInBytes; |
| auto slbSliceSize = (m_hwInterface->m_vdenc2ndLevelBatchBufferSize - m_hwInterface->m_vdencBatchBuffer1stGroupSize - |
| m_hwInterface->m_vdencBatchBuffer2ndGroupSize) / ENCODE_HEVC_VDENC_NUM_MAX_SLICES; |
| hucVdencBrcUpdateDmem->SLB_Data_SizeInBytes = (uint16_t)(slbSliceSize * m_numSlices + |
| m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize); |
| hucVdencBrcUpdateDmem->PIPE_MODE_SELECT_StartInBytes = 0xFFFF; // HuC need not need to modify the pipe mode select command in Gen11+ |
| hucVdencBrcUpdateDmem->CMD1_StartInBytes = (uint16_t)m_hwInterface->m_vdencBatchBuffer1stGroupSize; |
| hucVdencBrcUpdateDmem->PIC_STATE_StartInBytes = (uint16_t)m_picStateCmdStartInBytes; |
| hucVdencBrcUpdateDmem->CMD2_StartInBytes = (uint16_t)m_cmd2StartInBytes; |
| |
| if (m_prevStoreData != m_storeData) |
| { |
| m_prevStoreData = m_storeData; |
| |
| int32_t oldestIdx = -1; |
| int32_t selectedSlot = -1; |
| uint32_t oldestAge = 0; |
| for (int i = 0; i < CODECHAL_ENCODE_HEVC_VDENC_WP_DATA_BLOCK_NUMBER; i++) |
| { |
| if (slotInfo[i].isUsed == true && slotInfo[i].isRef) |
| { |
| slotInfo[i].age++; |
| if (slotInfo[i].age >= oldestAge) |
| { |
| oldestAge = slotInfo[i].age; |
| oldestIdx = i; |
| } |
| } |
| if ((selectedSlot == -1) && (slotInfo[i].isUsed == false || !slotInfo[i].isRef)) |
| { |
| selectedSlot = i; |
| } |
| } |
| |
| if (selectedSlot == -1) |
| { |
| selectedSlot = oldestIdx; |
| } |
| |
| slotInfo[selectedSlot].age = 0; |
| slotInfo[selectedSlot].poc = m_hevcPicParams->CurrPicOrderCnt; |
| slotInfo[selectedSlot].isUsed = true; |
| slotInfo[selectedSlot].isRef = m_hevcPicParams->bUsedAsRef; |
| |
| m_curPicSlot = selectedSlot; |
| } |
| |
| hucVdencBrcUpdateDmem->Current_Data_Offset = m_curPicSlot * m_weightHistSize; |
| |
| for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++) |
| { |
| CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][refIdx]; |
| auto refPOC = m_hevcPicParams->RefFramePOCList[refPic.FrameIdx]; |
| for (int i = 0; i < CODECHAL_ENCODE_HEVC_VDENC_WP_DATA_BLOCK_NUMBER; i++) |
| { |
| if (slotInfo[i].poc == refPOC) |
| { |
| hucVdencBrcUpdateDmem->Ref_Data_Offset[refIdx] = i * m_weightHistSize; |
| break; |
| } |
| } |
| } |
| |
| for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++) |
| { |
| CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][refIdx]; |
| auto refPOC = m_hevcPicParams->RefFramePOCList[refPic.FrameIdx]; |
| for (int i = 0; i < CODECHAL_ENCODE_HEVC_VDENC_WP_DATA_BLOCK_NUMBER; i++) |
| { |
| if (slotInfo[i].poc == refPOC) |
| { |
| hucVdencBrcUpdateDmem->Ref_Data_Offset[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] = i * m_weightHistSize; |
| break; |
| } |
| } |
| } |
| |
| hucVdencBrcUpdateDmem->MaxNumSliceAllowed_U16 = (uint16_t)GetMaxAllowedSlices(m_hevcSeqParams->Level); |
| |
| if (m_FrameLevelBRCForTileRow) |
| { |
| hucVdencBrcUpdateDmem->OpMode_U8 = 0x4; |
| } |
| else if (m_TileRowLevelBRC) |
| { |
| hucVdencBrcUpdateDmem->OpMode_U8 = 0x8; |
| } |
| else |
| { |
| hucVdencBrcUpdateDmem->OpMode_U8 // 1: BRC (including ACQP), 2: Weighted prediction (should not be enabled in first pass) |
| = (m_hevcVdencWeightedPredEnabled && m_hevcPicParams->bEnableGPUWeightedPrediction && !IsFirstPass()) ? 3 : 1; // 01: BRC, 10: WP never used, 11: BRC + WP |
| } |
| |
| if (m_pictureCodingType == I_TYPE) |
| { |
| hucVdencBrcUpdateDmem->CurrentFrameType_U8 = HEVC_BRC_FRAME_TYPE_I; |
| } |
| else if (m_hevcSeqParams->HierarchicalFlag) |
| { |
| if (m_hevcPicParams->HierarchLevelPlus1 > 0) |
| { |
| std::map<int, HEVC_BRC_FRAME_TYPE> hierchLevelPlus1_to_brclevel{ |
| {1, HEVC_BRC_FRAME_TYPE_P_OR_LB}, |
| {2, HEVC_BRC_FRAME_TYPE_B}, |
| {3, HEVC_BRC_FRAME_TYPE_B1}, |
| {4, HEVC_BRC_FRAME_TYPE_B2}}; |
| hucVdencBrcUpdateDmem->CurrentFrameType_U8 = hierchLevelPlus1_to_brclevel.count(m_hevcPicParams->HierarchLevelPlus1) ? hierchLevelPlus1_to_brclevel[m_hevcPicParams->HierarchLevelPlus1] : HEVC_BRC_FRAME_TYPE_INVALID; |
| //Invalid HierarchLevelPlus1 or LBD frames at level 3 eror check. |
| if ((hucVdencBrcUpdateDmem->CurrentFrameType_U8 == HEVC_BRC_FRAME_TYPE_INVALID) || |
| (m_hevcSeqParams->LowDelayMode && hucVdencBrcUpdateDmem->CurrentFrameType_U8 == HEVC_BRC_FRAME_TYPE_B2)) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("HEVC_BRC_FRAME_TYPE_INVALID or LBD picture doesn't support Level 4\n"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| else if(!m_hevcSeqParams->LowDelayMode) //RA |
| { |
| //if L0/L1 both points to previous frame, then its LBD otherwise its is level 1 RA B. |
| auto B_or_LDB_brclevel = m_lowDelay ? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B; |
| std::map<int, HEVC_BRC_FRAME_TYPE> codingtype_to_brclevel{ |
| {P_TYPE, HEVC_BRC_FRAME_TYPE_P_OR_LB}, |
| {B_TYPE, B_or_LDB_brclevel}, |
| {B1_TYPE, HEVC_BRC_FRAME_TYPE_B1}, |
| {B2_TYPE, HEVC_BRC_FRAME_TYPE_B2}}; |
| hucVdencBrcUpdateDmem->CurrentFrameType_U8 = codingtype_to_brclevel.count(m_pictureCodingType) ? codingtype_to_brclevel[m_pictureCodingType] : HEVC_BRC_FRAME_TYPE_INVALID; |
| //Invalid CodingType. |
| if (hucVdencBrcUpdateDmem->CurrentFrameType_U8 == HEVC_BRC_FRAME_TYPE_INVALID) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid CodingType\n"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| else //LDB |
| { |
| hucVdencBrcUpdateDmem->CurrentFrameType_U8 = HEVC_BRC_FRAME_TYPE_P_OR_LB; //No Hierarchical info for LDB, treated as flat case |
| } |
| } |
| else |
| { |
| hucVdencBrcUpdateDmem->CurrentFrameType_U8 = HEVC_BRC_FRAME_TYPE_P_OR_LB; |
| } |
| |
| // Num_Ref_L1 should be always same as Num_Ref_L0 |
| hucVdencBrcUpdateDmem->Num_Ref_L0_U8 = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1; |
| hucVdencBrcUpdateDmem->Num_Ref_L1_U8 = m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1; |
| hucVdencBrcUpdateDmem->Num_Slices = (uint8_t)m_hevcPicParams->NumSlices; |
| |
| // CQP_QPValue_U8 setting is needed since ACQP is also part of ICQ |
| hucVdencBrcUpdateDmem->CQP_QPValue_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta; |
| hucVdencBrcUpdateDmem->CQP_FracQP_U8 = 0; |
| if (m_hevcPicParams->BRCPrecision == 1) |
| { |
| hucVdencBrcUpdateDmem->MaxNumPass_U8 = 1; |
| } |
| else |
| { |
| hucVdencBrcUpdateDmem->MaxNumPass_U8 = CODECHAL_VDENC_BRC_NUM_OF_PASSES; |
| } |
| |
| MOS_SecureMemcpy(hucVdencBrcUpdateDmem->gRateRatioThreshold_U8, 7 * sizeof(uint8_t), (void*)m_rateRatioThreshold, 7 * sizeof(uint8_t)); |
| MOS_SecureMemcpy(hucVdencBrcUpdateDmem->startGAdjMult_U8, 5 * sizeof(uint8_t), (void*)m_startGAdjMult, 5 * sizeof(uint8_t)); |
| MOS_SecureMemcpy(hucVdencBrcUpdateDmem->startGAdjDiv_U8, 5 * sizeof(uint8_t), (void*)m_startGAdjDiv, 5 * sizeof(uint8_t)); |
| MOS_SecureMemcpy(hucVdencBrcUpdateDmem->gRateRatioThresholdQP_U8, 8 * sizeof(uint8_t), (void*)m_rateRatioThresholdQP, 8 * sizeof(uint8_t)); |
| |
| hucVdencBrcUpdateDmem->IPAverageCoeff_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) ? 0 : 64; |
| hucVdencBrcUpdateDmem->CurrentPass_U8 = (uint8_t)currentPass; |
| |
| if ((m_hevcVdencAcqpEnabled && m_hevcSeqParams->QpAdjustment) || (m_brcEnabled && (m_hevcSeqParams->MBBRC != 2))) |
| { |
| hucVdencBrcUpdateDmem->DeltaQPForSadZone0_S8 = -5; |
| hucVdencBrcUpdateDmem->DeltaQPForSadZone1_S8 = -2; |
| hucVdencBrcUpdateDmem->DeltaQPForSadZone2_S8 = 2; |
| hucVdencBrcUpdateDmem->DeltaQPForSadZone3_S8 = 5; |
| hucVdencBrcUpdateDmem->DeltaQPForMvZero_S8 = -4; |
| hucVdencBrcUpdateDmem->DeltaQPForMvZone0_S8 = -2; |
| hucVdencBrcUpdateDmem->DeltaQPForMvZone1_S8 = 0; |
| hucVdencBrcUpdateDmem->DeltaQPForMvZone2_S8 = 2; |
| } |
| |
| if (m_hevcVdencWeightedPredEnabled) |
| { |
| hucVdencBrcUpdateDmem->LumaLog2WeightDenom_S8 = 6; |
| hucVdencBrcUpdateDmem->ChromaLog2WeightDenom_S8 = 6; |
| } |
| |
| // chroma weights are not confirmed to be supported from HW team yet |
| hucVdencBrcUpdateDmem->DisabledFeature_U8 = 0; // bit mask, 1 (bit0): disable chroma weight setting |
| |
| hucVdencBrcUpdateDmem->SlidingWindow_Enable_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_LOW); |
| hucVdencBrcUpdateDmem->LOG_LCU_Size_U8 = 6; |
| hucVdencBrcUpdateDmem->ReEncodePositiveQPDeltaThr_S8 = 4; |
| hucVdencBrcUpdateDmem->ReEncodeNegativeQPDeltaThr_S8 = -5; |
| hucVdencBrcUpdateDmem->SceneChgPrevIntraPctThreshold_U8 = 96; |
| hucVdencBrcUpdateDmem->SceneChgCurIntraPctThreshold_U8 = 192; |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_rsvdState->SetDmemHuCBrcUpdate(hucVdencBrcUpdateDmem)); |
| } |
| #endif |
| |
| // reset skip frame statistics |
| m_numSkipFrames = 0; |
| m_sizeSkipFrames = 0; |
| |
| // For tile row based BRC |
| if (m_TileRowLevelBRC) |
| { |
| hucVdencBrcUpdateDmem->MaxNumTileHuCCallMinus1 = m_hevcPicParams->num_tile_rows_minus1; |
| hucVdencBrcUpdateDmem->TileHucCallIndex = (uint8_t)m_CurrentTileRow; |
| hucVdencBrcUpdateDmem->TileHuCCallPassIndex = m_CurrentPassForTileReplay + 1; |
| hucVdencBrcUpdateDmem->TileHuCCallPassMax = m_NumPassesForTileReplay; |
| |
| // Need change App to pass real max bit rate rather than to enlarge it with 1000 |
| if (m_hevcSeqParams->FrameRate.Numerator) |
| { |
| hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS * |
| m_hevcSeqParams->FrameRate.Denominator + (m_hevcSeqParams->FrameRate.Numerator >> 1)) / |
| m_hevcSeqParams->FrameRate.Numerator); |
| } |
| else |
| { |
| hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS + 15) / 30); |
| } |
| |
| uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| uint32_t startIdx = m_CurrentTileRow * numTileColumns; |
| uint32_t endIdx = startIdx + numTileColumns - 1; |
| uint32_t LCUsInTile = 0; |
| |
| for (uint32_t idx = 0; idx < numTileColumns; idx ++) |
| { |
| LCUsInTile += m_hevcPicParams->tile_row_height[m_CurrentTileRow] * m_hevcPicParams->tile_column_width[idx]; |
| } |
| |
| hucVdencBrcUpdateDmem->StartTileIdx = (uint8_t)startIdx; |
| hucVdencBrcUpdateDmem->EndTileIdx = (uint8_t)endIdx; |
| hucVdencBrcUpdateDmem->TileSizeInLCU = (uint16_t)LCUsInTile; |
| } |
| else if (m_FrameLevelBRCForTileRow) |
| { |
| hucVdencBrcUpdateDmem->MaxNumTileHuCCallMinus1 = m_hevcPicParams->num_tile_rows_minus1; |
| hucVdencBrcUpdateDmem->TileHucCallIndex = 0; |
| hucVdencBrcUpdateDmem->TileHuCCallPassIndex = 0; |
| hucVdencBrcUpdateDmem->TileHuCCallPassMax = m_NumPassesForTileReplay; |
| |
| // Need change App to pass real max bit rate rather than to enlarge it with 1000 |
| if (m_hevcSeqParams->FrameRate.Numerator) |
| { |
| hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS * |
| m_hevcSeqParams->FrameRate.Denominator + (m_hevcSeqParams->FrameRate.Numerator >> 1)) / |
| m_hevcSeqParams->FrameRate.Numerator); |
| } |
| else |
| { |
| hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS + 15) / 30); |
| } |
| } |
| |
| // Long term reference |
| hucVdencBrcUpdateDmem->IsLongTermRef = CodecHal_PictureIsLongTermRef(m_currReconstructedPic); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetRegionsHuCBrcUpdate(virtualAddrParams)); |
| |
| // With multiple tiles, ensure that HuC BRC kernel is fed with vdenc frame level statistics from HuC PAK Int kernel |
| // Applicable for scalable/ non-scalable mode |
| if (m_hevcPicParams->tiles_enabled_flag) |
| { |
| virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 VDEnc Statistics Buffer (Input) - VDENC_HEVC_VP9_FRAME_BASED_STATISTICS_STREAMOUT |
| virtualAddrParams->regionParams[1].dwOffset = m_hevcFrameStatsOffset.uiVdencStatistics; |
| } |
| |
| if (m_numPipe > 1) |
| { |
| virtualAddrParams->regionParams[2].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 2 PAK Statistics Buffer (Input) - MFX_PAK_FRAME_STATISTICS |
| virtualAddrParams->regionParams[2].dwOffset = m_hevcFrameStatsOffset.uiHevcPakStatistics; |
| virtualAddrParams->regionParams[7].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 7 Slice Stat Streamout (Input) |
| virtualAddrParams->regionParams[7].dwOffset = m_hevcFrameStatsOffset.uiHevcSliceStreamout; |
| // In scalable-mode, use PAK Integration kernel output to get bistream size |
| virtualAddrParams->regionParams[8].presRegion = &m_resBrcDataBuffer; |
| } |
| |
| // Tile reset case, use previous frame BRC data |
| if ((m_numPipe != m_numPipePre) && IsFirstPass()) |
| { |
| if (m_numPipePre > 1) |
| { |
| virtualAddrParams->regionParams[8].presRegion = &m_resBrcDataBuffer; |
| } |
| else |
| { |
| virtualAddrParams->regionParams[8].presRegion = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo); |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCTileRowBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetRegionsHuCBrcUpdate(virtualAddrParams)); |
| |
| // For tile replay, the tile based statistics is directly passed to HUC kernel |
| virtualAddrParams->regionParams[1].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 1 � VDEnc Statistics Buffer (Input) |
| virtualAddrParams->regionParams[1].dwOffset = m_hevcTileStatsOffset.uiVdencStatistics; |
| |
| virtualAddrParams->regionParams[2].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 2 � PAK Statistics Buffer (Input) |
| virtualAddrParams->regionParams[2].dwOffset = m_hevcTileStatsOffset.uiHevcPakStatistics; |
| |
| virtualAddrParams->regionParams[7].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 7 � Slice Stat Streamout (Input) |
| virtualAddrParams->regionParams[7].dwOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout; |
| |
| virtualAddrParams->regionParams[12].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 12 � Tile encoded information (Input) |
| |
| return eStatus; |
| } |
| |
| void CodechalVdencHevcStateG12::SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE& sliceStateParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CodechalVdencHevcState::SetHcpSliceStateCommonParams(sliceStateParams); |
| |
| static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceStateParams).dwNumPipe = m_numPipe; |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| m_rsvdState->SetHcpSliceStateCommonParams(sliceStateParams, m_slotForRecNotFiltered); |
| } |
| #endif |
| } |
| |
| void CodechalVdencHevcStateG12::SetHcpSliceStateParams( |
| MHW_VDBOX_HEVC_SLICE_STATE& sliceState, |
| PCODEC_ENCODER_SLCDATA slcData, |
| uint16_t slcCount, |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileCodingParams, |
| bool lastSliceInTile, |
| uint32_t idx) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CodechalEncodeHevcBase::SetHcpSliceStateParams(sliceState, slcData, slcCount); |
| |
| sliceState.bLastSliceInTile = lastSliceInTile ? true : false; |
| sliceState.bLastSliceInTileColumn = (lastSliceInTile & tileCodingParams[idx].IsLastTileofColumn) ? true : false; |
| static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12&>(sliceState).pTileCodingParams = tileCodingParams + idx; |
| static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12&>(sliceState).dwTileID = idx; |
| |
| // update pass status |
| if (m_enableTileReplay && m_FrameLevelBRCForTileRow) |
| { |
| sliceState.bFirstPass = true; |
| sliceState.bLastPass = false; |
| } |
| else if (m_enableTileReplay && m_TileRowLevelBRC) |
| { |
| sliceState.bFirstPass = IsFirstPassForTileReplay(); |
| sliceState.bLastPass = IsLastPassForTileReplay(); |
| } |
| } |
| |
| void CodechalVdencHevcStateG12::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& vdboxPipeModeSelectParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CodechalEncodeHevcBase::SetHcpPipeModeSelectParams(vdboxPipeModeSelectParams); |
| |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12& pipeModeSelectParams = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12&>(vdboxPipeModeSelectParams); |
| |
| if (m_numPipe > 1) |
| { |
| // Running in the multiple VDBOX mode |
| if (IsFirstPipe()) |
| { |
| pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT; |
| } |
| else if (IsLastPipe()) |
| { |
| pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT; |
| } |
| else |
| { |
| pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE; |
| } |
| pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE; |
| } |
| else |
| { |
| pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY; |
| pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY; |
| } |
| |
| // In single pipe mode, if TileBasedReplayMode is enabled, the bit stream for each tile will not be continuous |
| if (m_hevcPicParams->tiles_enabled_flag) |
| { |
| pipeModeSelectParams.bTileBasedReplayMode = m_enableTileReplay; |
| } |
| else |
| { |
| pipeModeSelectParams.bTileBasedReplayMode = 0; |
| } |
| |
| // To enable VDENC/PAK statistics stream out for BRC only |
| // Is stream out needed for ACQP? check this out! |
| pipeModeSelectParams.bBRCEnabled = m_hevcVdencAcqpEnabled || m_vdencBrcEnabled; |
| } |
| |
| void CodechalVdencHevcStateG12::SetVdencPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& vdboxPipeModeSelectParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CodechalVdencHevcState::SetVdencPipeModeSelectParams(vdboxPipeModeSelectParams); |
| |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12& pipeModeSelectParams = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12&>(vdboxPipeModeSelectParams); |
| |
| // Enable RGB encoding |
| pipeModeSelectParams.bRGBEncodingMode = m_RGBEncodingEnable; |
| |
| // Capture mode enable |
| pipeModeSelectParams.bWirelessEncodeEnabled = m_CaptureModeEnable; |
| pipeModeSelectParams.ucWirelessSessionId = 0; |
| |
| // Set random access flag |
| pipeModeSelectParams.bIsRandomAccess = !m_lowDelay; |
| |
| // Set lookahead pass flag |
| pipeModeSelectParams.bLookaheadPass = m_lookaheadPass; |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| m_rsvdState->SetVdencPipeModeSelectParams(pipeModeSelectParams); |
| } |
| #endif |
| } |
| |
| void CodechalVdencHevcStateG12::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CodechalEncodeHevcBase::SetHcpPipeBufAddrParams(pipeBufAddrParams); |
| |
| PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex]; |
| if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource) && (m_numPipe > 1)) |
| { |
| pipeBufAddrParams.presLcuBaseAddressBuffer = &tileStatisticsBuffer->sResource; |
| pipeBufAddrParams.dwLcuStreamOutOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout; |
| pipeBufAddrParams.presFrameStatStreamOutBuffer = &tileStatisticsBuffer->sResource; |
| pipeBufAddrParams.dwFrameStatStreamOutOffset = m_hevcTileStatsOffset.uiHevcPakStatistics; |
| } |
| |
| // SAO Row Store is GEN12 specific |
| pipeBufAddrParams.presSaoRowStoreBuffer = &m_vdencSAORowStoreBuffer; |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| m_rsvdState->SetHcpPipeBufAddrParams(pipeBufAddrParams, m_slotForRecNotFiltered); |
| } |
| #endif |
| |
| } |
| |
| void CodechalVdencHevcStateG12::SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE& picStateParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CodechalEncodeHevcBase::SetHcpPicStateParams(picStateParams); |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| m_rsvdState->SetHcpPicStateParams(picStateParams); |
| } |
| #endif |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::AddHcpRefIdxCmd( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| PMHW_BATCH_BUFFER batchBuffer, |
| PMHW_VDBOX_HEVC_SLICE_STATE params) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(params); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcPicParams); |
| |
| if (cmdBuffer == nullptr && batchBuffer == nullptr) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("There was no valid buffer to add the HW command to."); |
| return MOS_STATUS_NULL_POINTER; |
| } |
| |
| PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams = params->pEncodeHevcPicParams; |
| PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = params->pEncodeHevcSliceParams; |
| |
| if ((hevcPicParams->pps_curr_pic_ref_enabled_flag) || (hevcSlcParams->slice_type != CODECHAL_ENCODE_HEVC_I_SLICE)) |
| { |
| MHW_VDBOX_HEVC_REF_IDX_PARAMS_G12 refIdxParams; |
| |
| refIdxParams.CurrPic = hevcPicParams->CurrReconstructedPic; |
| refIdxParams.isEncode = true; |
| refIdxParams.ucList = LIST_0; |
| refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l0_active_minus1 + 1; |
| eStatus = MOS_SecureMemcpy(&refIdxParams.RefPicList, sizeof(refIdxParams.RefPicList), |
| &hevcSlcParams->RefPicList, sizeof(hevcSlcParams->RefPicList)); |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory."); |
| return eStatus; |
| } |
| |
| refIdxParams.hevcRefList = (void**)m_refList; |
| refIdxParams.poc_curr_pic = hevcPicParams->CurrPicOrderCnt; |
| for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++) |
| { |
| refIdxParams.poc_list[i] = hevcPicParams->RefFramePOCList[i]; |
| } |
| |
| refIdxParams.pRefIdxMapping = params->pRefIdxMapping; |
| refIdxParams.RefFieldPicFlag = 0; // there is no interlaced support in encoder |
| refIdxParams.RefBottomFieldFlag = 0; // there is no interlaced support in encoder |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| m_rsvdState->SetRefIdxParams(refIdxParams); |
| } |
| #endif |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams)); |
| |
| if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE) |
| { |
| refIdxParams.ucList = LIST_1; |
| refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l1_active_minus1 + 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams)); |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| void CodechalVdencHevcStateG12::SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CodechalVdencHevcState::SetVdencPipeBufAddrParams(pipeBufAddrParams); |
| |
| PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex]; |
| if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource)) |
| { |
| pipeBufAddrParams.presVdencStreamOutBuffer = &tileStatisticsBuffer->sResource; |
| pipeBufAddrParams.dwVdencStatsStreamOutOffset = m_hevcTileStatsOffset.uiVdencStatistics; |
| } |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| m_rsvdState->SetVdencPipeBufAddrParams(pipeBufAddrParams); |
| } |
| #endif |
| |
| pipeBufAddrParams.presVdencTileRowStoreBuffer = &m_vdencTileRowStoreBuffer; |
| pipeBufAddrParams.presVdencCumulativeCuCountStreamoutSurface = &m_vdencCumulativeCuCountStreamoutSurface; |
| pipeBufAddrParams.isLowDelayB = m_lowDelay; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetKernelParams( |
| EncOperation operation, |
| MHW_KERNEL_PARAM *kernelParams) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams); |
| |
| auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment(); |
| |
| kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads; |
| kernelParams->iIdCount = 1; |
| |
| switch (operation) |
| { |
| case VDENC_ME_P: |
| case VDENC_ME_B: |
| case VDENC_STREAMIN: |
| case VDENC_STREAMIN_HEVC: |
| case VDENC_STREAMIN_HEVC_RAB: |
| kernelParams->iBTCount = CODECHAL_VDENC_HME_END_G12 - CODECHAL_VDENC_HME_BEGIN_G12; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MEDIA_OBJECT_HEVC_VP9_VDENC_ME_CURBE_G12), (size_t)curbeAlignment); |
| kernelParams->iBlockWidth = 32; |
| kernelParams->iBlockHeight = 32; |
| break; |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested"); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetBindingTable( |
| EncOperation operation, |
| PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable); |
| |
| MOS_ZeroMemory(bindingTable, sizeof(*bindingTable)); |
| |
| switch (operation) |
| { |
| case VDENC_ME_P: |
| case VDENC_ME_B: |
| case VDENC_STREAMIN: |
| case VDENC_STREAMIN_HEVC: |
| case VDENC_STREAMIN_HEVC_RAB: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_VDENC_HME_END_G12 - CODECHAL_VDENC_HME_BEGIN_G12; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_VDENC_HME_BEGIN_G12; |
| break; |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++) |
| { |
| bindingTable->dwBindingTableEntries[i] = i; |
| } |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::EncodeMeKernel(HmeLevel hmeLevel) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| PMHW_KERNEL_STATE kernelState = nullptr; |
| if(hmeLevel == HME_LEVEL_4x) |
| { |
| kernelState = m_lowDelay ? &m_vdencStreaminKernelState : &m_vdencStreaminKernelStateRAB; |
| } |
| else |
| { |
| kernelState = m_lowDelay ? &m_vdencMeKernelState : &m_vdencMeKernelStateRAB; |
| } |
| auto encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME : |
| (hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME; |
| |
| // If Single Task Phase is not enabled, use BT count for the kernel state. |
| if (m_firstTaskInPhase || !m_singleTaskPhaseSupported) |
| { |
| uint32_t maxBtCount = m_singleTaskPhaseSupported ? |
| m_maxBtCount : kernelState->KernelParams.iBTCount; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf( |
| m_stateHeapInterface, |
| maxBtCount)); |
| m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable()); |
| } |
| |
| // Set up the DSH/SSH as normal |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| MHW_INTERFACE_DESCRIPTOR_PARAMS idParams; |
| MOS_ZeroMemory(&idParams, sizeof(idParams)); |
| idParams.pKernelState = kernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor( |
| m_stateHeapInterface, |
| 1, |
| &idParams)); |
| |
| //Setup curbe for StreamIn Kernel |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeCurbe(hmeLevel)); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_DSH_TYPE, |
| kernelState)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe( |
| encFunctionType, |
| kernelState)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_ISH_TYPE, |
| kernelState)); |
| ) |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0)); |
| |
| SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams(); |
| sendKernelCmdsParams.EncFunctionType = encFunctionType; |
| sendKernelCmdsParams.pKernelState = kernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams)); |
| |
| // Add binding table |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable( |
| m_stateHeapInterface, |
| kernelState)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMeSurfaces(hmeLevel, &cmdBuffer)); |
| |
| // Dump SSH for ME kernel |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_SSH_TYPE, |
| kernelState))); |
| |
| uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x : |
| (hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x; |
| |
| uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor); |
| uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor); |
| |
| CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; |
| MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); |
| walkerCodecParams.WalkerMode = m_walkerMode; |
| walkerCodecParams.dwResolutionX = resolutionX; |
| walkerCodecParams.dwResolutionY = resolutionY; |
| walkerCodecParams.bNoDependency = true; |
| walkerCodecParams.bMbaff = false; |
| walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported; |
| walkerCodecParams.ucGroupId = m_groupId; |
| |
| MHW_WALKER_PARAMS walkerParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( |
| m_hwInterface, |
| &walkerParams, |
| &walkerCodecParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId( |
| m_stateHeapInterface)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); |
| } |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( |
| &cmdBuffer, |
| encFunctionType, |
| nullptr))); |
| |
| m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase); |
| |
| m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0); |
| |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw); |
| m_lastTaskInPhase = false; |
| } |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetMeCurbe(HmeLevel hmeLevel) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_VDENC_HEVC_ME_CURBE_G12 curbe; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy( |
| &curbe, |
| sizeof(CODECHAL_VDENC_HEVC_ME_CURBE_G12), |
| ME_CURBE_INIT_G12, |
| sizeof(CODECHAL_VDENC_HEVC_ME_CURBE_G12))); |
| |
| PMHW_KERNEL_STATE kernelState = nullptr; |
| if(hmeLevel == HME_LEVEL_4x) |
| { |
| kernelState = m_lowDelay ? &m_vdencStreaminKernelState : &m_vdencStreaminKernelStateRAB; |
| } |
| else |
| { |
| kernelState = m_lowDelay ? &m_vdencMeKernelState : &m_vdencMeKernelStateRAB; |
| } |
| |
| bool useMvFromPrevStep; |
| bool writeDistortions; |
| uint32_t scaleFactor; |
| uint32_t mvShiftFactor = 0; |
| uint32_t prevMvReadPosFactor = 0; |
| |
| switch (hmeLevel) |
| { |
| case HME_LEVEL_32x: |
| useMvFromPrevStep = false; |
| writeDistortions = false; |
| scaleFactor = SCALE_FACTOR_32x; |
| mvShiftFactor = 1; |
| prevMvReadPosFactor = 0; |
| break; |
| case HME_LEVEL_16x: |
| useMvFromPrevStep = (m_b32XMeEnabled) ? true : false; |
| writeDistortions = false; |
| scaleFactor = SCALE_FACTOR_16x; |
| mvShiftFactor = 2; |
| prevMvReadPosFactor = 1; |
| break; |
| case HME_LEVEL_4x: |
| useMvFromPrevStep = (m_b16XMeEnabled) ? true : false; |
| writeDistortions = true; |
| scaleFactor = SCALE_FACTOR_4x; |
| mvShiftFactor = 2; |
| prevMvReadPosFactor = 0; |
| break; |
| default: |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| break; |
| } |
| |
| curbe.DW3.SubPelMode = 3; |
| curbe.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1; |
| curbe.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor); |
| curbe.DW5.QpPrimeY = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta; |
| curbe.DW6.WriteDistortions = writeDistortions; |
| curbe.DW6.UseMvFromPrevStep = useMvFromPrevStep; |
| curbe.DW6.SuperCombineDist = 5;//SuperCombineDist_Generic[pHevcSeqParams->TargetUsage]; Harded coded in KCM |
| curbe.DW6.MaxVmvR = 511 * 4; |
| curbe.DW15.MvShiftFactor = mvShiftFactor; |
| curbe.DW15.PrevMvReadPosFactor = prevMvReadPosFactor; |
| |
| if (m_pictureCodingType == B_TYPE) |
| { |
| // This field is irrelevant since we are not using the bi-direct search. |
| curbe.DW1.BiWeight = m_bframeMeBidirectionalWeight; |
| curbe.DW13.NumRefIdxL1MinusOne = m_hevcSliceParams->num_ref_idx_l1_active_minus1; |
| } |
| |
| if (m_pictureCodingType == P_TYPE || m_pictureCodingType == B_TYPE) |
| { |
| curbe.DW13.NumRefIdxL0MinusOne = m_hevcSliceParams->num_ref_idx_l0_active_minus1; |
| } |
| |
| if (hmeLevel == HME_LEVEL_4x) |
| { |
| curbe.DW30.ActualMBHeight = m_frameHeight; |
| curbe.DW30.ActualMBWidth = m_frameWidth; |
| } |
| else |
| { |
| curbe.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight); |
| curbe.DW30.ActualMBWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth); |
| } |
| |
| curbe.DW13.RefStreaminCost = 0; |
| // This flag is to indicate the ROI source type instead of indicating ROI is enabled or not |
| curbe.DW13.ROIEnable = 0; |
| |
| uint8_t meMethod = (m_pictureCodingType == B_TYPE) ? m_bMeMethodGeneric[m_hevcSeqParams->TargetUsage] : m_meMethodGeneric[m_hevcSeqParams->TargetUsage]; |
| uint8_t tableIdx = (m_pictureCodingType == B_TYPE) ? 1 : 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe.SPDelta), 14 * sizeof(uint32_t), |
| m_encodeSearchPath[tableIdx][meMethod], 14 * sizeof(uint32_t))); |
| |
| if (hmeLevel == HME_LEVEL_4x) |
| { |
| //StreamIn CURBE |
| curbe.DW6.LCUSize = 1;//Only LCU64 supported by the VDEnc HW |
| // Kernel should use driver-prepared stream-in surface during ROI/ Dirty-Rect |
| curbe.DW6.InputStreamInEn = (m_hevcPicParams->NumROI || (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType))); |
| curbe.DW31.MaxCuSize = 3; |
| curbe.DW31.MaxTuSize = 3; |
| switch (m_hevcSeqParams->TargetUsage) |
| { |
| case 1: |
| case 4: |
| curbe.DW36.NumMergeCandCu64x64 = 4; |
| curbe.DW36.NumMergeCandCu32x32 = 3; |
| curbe.DW36.NumMergeCandCu16x16 = 2; |
| curbe.DW36.NumMergeCandCu8x8 = 1; |
| curbe.DW31.NumImePredictors = m_imgStateImePredictors; |
| break; |
| case 7: |
| curbe.DW36.NumMergeCandCu64x64 = 2; |
| curbe.DW36.NumMergeCandCu32x32 = 2; |
| curbe.DW36.NumMergeCandCu16x16 = 2; |
| curbe.DW36.NumMergeCandCu8x8 = 0; |
| curbe.DW31.NumImePredictors = 4; |
| break; |
| } |
| } |
| |
| curbe.DW40._4xMeMvOutputDataSurfIndex = CODECHAL_VDENC_HME_MV_DATA_SURFACE_CM_G12; |
| curbe.DW41._16xOr32xMeMvInputDataSurfIndex = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_VDENC_32xME_MV_DATA_SURFACE_CM_G12 : CODECHAL_VDENC_16xME_MV_DATA_SURFACE_CM_G12; |
| curbe.DW42._4xMeOutputDistSurfIndex = CODECHAL_VDENC_HME_DISTORTION_SURFACE_CM_G12; |
| curbe.DW43._4xMeOutputBrcDistSurfIndex = CODECHAL_VDENC_HME_BRC_DISTORTION_CM_G12; |
| curbe.DW44.VMEFwdInterPredictionSurfIndex = CODECHAL_VDENC_HME_CURR_FOR_FWD_REF_CM_G12; |
| curbe.DW45.VMEBwdInterPredictionSurfIndex = CODECHAL_VDENC_HME_CURR_FOR_BWD_REF_CM_G12; |
| curbe.DW46.VDEncStreamInOutputSurfIndex = CODECHAL_VDENC_HME_VDENC_STREAMIN_OUTPUT_CM_G12; |
| curbe.DW47.VDEncStreamInInputSurfIndex = CODECHAL_VDENC_HME_VDENC_STREAMIN_INPUT_CM_G12; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData( |
| &curbe, |
| kernelState->dwCurbeOffset, |
| sizeof(curbe))); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SendMeSurfaces(HmeLevel hmeLevel, PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| |
| MOS_SURFACE *meMvDataBuffer; |
| uint32_t downscaledWidthInMb; |
| uint32_t downscaledHeightInMb; |
| |
| if (hmeLevel == HME_LEVEL_32x) |
| { |
| meMvDataBuffer = &m_s32XMeMvDataBuffer; |
| downscaledWidthInMb = m_downscaledWidthInMb32x; |
| downscaledHeightInMb = m_downscaledHeightInMb32x; |
| } |
| else if (hmeLevel == HME_LEVEL_16x) |
| { |
| meMvDataBuffer = &m_s16XMeMvDataBuffer; |
| downscaledWidthInMb = m_downscaledWidthInMb16x; |
| downscaledHeightInMb = m_downscaledHeightInMb16x; |
| } |
| else |
| { |
| meMvDataBuffer = &m_s4XMeMvDataBuffer; |
| downscaledWidthInMb = m_downscaledWidthInMb4x; |
| downscaledHeightInMb = m_downscaledHeightInMb4x; |
| } |
| |
| auto width = MOS_ALIGN_CEIL(downscaledWidthInMb * 32, 64); |
| auto height = downscaledHeightInMb * 4 * 10; |
| // Force the values |
| meMvDataBuffer->dwWidth = width; |
| meMvDataBuffer->dwHeight = height; |
| meMvDataBuffer->dwPitch = width; |
| |
| PMHW_KERNEL_STATE kernelState = nullptr; |
| if(hmeLevel == HME_LEVEL_4x) |
| { |
| kernelState = m_lowDelay ? &m_vdencStreaminKernelState : &m_vdencStreaminKernelStateRAB; |
| } |
| else |
| { |
| kernelState = m_lowDelay ? &m_vdencMeKernelState : &m_vdencMeKernelStateRAB; |
| } |
| auto bindingTable = (hmeLevel == HME_LEVEL_4x) ? |
| &m_vdencStreaminKernelBindingTable : &m_vdencMeKernelBindingTable; |
| uint32_t meMvBottomFieldOffset = 0; |
| |
| CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams; |
| MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); |
| surfaceCodecParams.bIs2DSurface = true; |
| surfaceCodecParams.bMediaBlockRW = true; |
| surfaceCodecParams.psSurface = meMvDataBuffer; |
| surfaceCodecParams.dwOffset = meMvBottomFieldOffset; |
| surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value; |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_MV_DATA_SURFACE_CM_G12]; |
| surfaceCodecParams.bIsWritable = true; |
| surfaceCodecParams.bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| if (hmeLevel == HME_LEVEL_16x && m_b32XMeEnabled) |
| { |
| // Pass 32x MV to 16x ME operation |
| MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); |
| surfaceCodecParams.bIs2DSurface = true; |
| surfaceCodecParams.bMediaBlockRW = true; |
| surfaceCodecParams.psSurface = &m_s32XMeMvDataBuffer; |
| surfaceCodecParams.dwOffset = 0; |
| surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value; |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_32xME_MV_DATA_SURFACE_CM_G12]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| else if (!(hmeLevel == HME_LEVEL_32x) && m_b16XMeEnabled) |
| { |
| // Pass 16x MV to 4x ME operation |
| MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); |
| surfaceCodecParams.bIs2DSurface = true; |
| surfaceCodecParams.bMediaBlockRW = true; |
| surfaceCodecParams.psSurface = &m_s16XMeMvDataBuffer; |
| surfaceCodecParams.dwOffset = 0; |
| surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value; |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_16xME_MV_DATA_SURFACE_CM_G12]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); |
| surfaceCodecParams.bIs2DSurface = true; |
| surfaceCodecParams.bMediaBlockRW = true; |
| surfaceCodecParams.psSurface = &m_s4XMeDistortionBuffer; |
| surfaceCodecParams.dwOffset = 0; |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_DISTORTION_SURFACE_CM_G12]; |
| surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value; |
| surfaceCodecParams.bIsWritable = true; |
| surfaceCodecParams.bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| |
| PMOS_SURFACE currScaledSurface = (hmeLevel == HME_LEVEL_4x) ? m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER) : |
| ((hmeLevel == HME_LEVEL_16x) ? m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER) : m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER)); |
| MOS_SURFACE refScaledSurface = *currScaledSurface; |
| bool currFieldPicture = CodecHal_PictureIsField(m_currOriginalPic) ? true : false; |
| bool currBottomField = CodecHal_PictureIsBottomField(m_currOriginalPic) ? true : false; |
| |
| uint8_t currVDirection = (!currFieldPicture) ? CODECHAL_VDIRECTION_FRAME : |
| ((currBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD); |
| uint32_t currScaledBottomFieldOffset = (hmeLevel == HME_LEVEL_4x) ? |
| (uint32_t)m_scaledBottomFieldOffset : ((hmeLevel == HME_LEVEL_16x) ? (uint32_t)m_scaled16xBottomFieldOffset : (uint32_t)m_scaled32xBottomFieldOffset); |
| |
| // Setup references 1...n |
| // LIST 0 references |
| for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++) |
| { |
| CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][refIdx]; |
| |
| if (!CodecHal_PictureIsInvalid(refPic)) |
| { |
| if (refIdx == 0) |
| { |
| // Current Picture Y - VME |
| MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); |
| surfaceCodecParams.bUseAdvState = true; |
| surfaceCodecParams.psSurface = currScaledSurface; |
| surfaceCodecParams.dwOffset = currBottomField ? currScaledBottomFieldOffset : 0; |
| surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value; |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_CURR_FOR_FWD_REF_CM_G12]; |
| surfaceCodecParams.ucVDirection = currVDirection; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| |
| bool refFieldPicture = CodecHal_PictureIsField(refPic) ? true : false; |
| bool refBottomField = CodecHal_PictureIsBottomField(refPic) ? true : false; |
| uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx; |
| uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx; |
| if (hmeLevel == HME_LEVEL_4x) |
| { |
| refScaledSurface.OsResource = m_trackedBuf->Get4xDsSurface(scaledIdx)->OsResource; |
| } |
| else if (hmeLevel == HME_LEVEL_16x) |
| { |
| refScaledSurface.OsResource = m_trackedBuf->Get16xDsSurface(scaledIdx)->OsResource; |
| } |
| else |
| { |
| refScaledSurface.OsResource = m_trackedBuf->Get32xDsSurface(scaledIdx)->OsResource; |
| } |
| uint32_t refScaledBottomFieldOffset = refBottomField ? currScaledBottomFieldOffset : 0; |
| |
| // L0 Reference Picture Y - VME |
| MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); |
| surfaceCodecParams.bUseAdvState = true; |
| surfaceCodecParams.psSurface = &refScaledSurface; |
| surfaceCodecParams.dwOffset = refBottomField ? refScaledBottomFieldOffset : 0; |
| surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value; |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_FWD_REF_IDX0_CM_G12 + (refIdx * 2)]; |
| surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME : |
| ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_RESERVED1_CM_G12 + (refIdx * 2)]; |
| surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME : |
| ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| } |
| |
| //List1 |
| for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++) |
| { |
| CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][refIdx]; |
| |
| if (!CodecHal_PictureIsInvalid(refPic)) |
| { |
| if (refIdx == 0) |
| { |
| // Current Picture Y - VME |
| MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); |
| surfaceCodecParams.bUseAdvState = true; |
| surfaceCodecParams.psSurface = currScaledSurface; |
| surfaceCodecParams.dwOffset = currBottomField ? currScaledBottomFieldOffset : 0; |
| surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value; |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_CURR_FOR_BWD_REF_CM_G12]; |
| surfaceCodecParams.ucVDirection = currVDirection; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| |
| bool refFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0; |
| bool refBottomField = CodecHal_PictureIsBottomField(refPic) ? 1 : 0; |
| auto refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx; |
| uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx; |
| |
| if (hmeLevel == HME_LEVEL_4x) |
| { |
| refScaledSurface.OsResource = m_trackedBuf->Get4xDsSurface(scaledIdx)->OsResource; |
| } |
| else if (hmeLevel == HME_LEVEL_16x) |
| { |
| refScaledSurface.OsResource = m_trackedBuf->Get16xDsSurface(scaledIdx)->OsResource; |
| } |
| else |
| { |
| refScaledSurface.OsResource = m_trackedBuf->Get32xDsSurface(scaledIdx)->OsResource; |
| } |
| uint32_t refScaledBottomFieldOffset = refBottomField ? currScaledBottomFieldOffset : 0; |
| |
| // L1 Reference Picture Y - VME |
| MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); |
| surfaceCodecParams.bUseAdvState = true; |
| surfaceCodecParams.psSurface = &refScaledSurface; |
| surfaceCodecParams.dwOffset = refBottomField ? refScaledBottomFieldOffset : 0; |
| surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value; |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_BWD_REF_IDX0_CM_G12 + (refIdx * 2)]; |
| surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME : |
| ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_RESERVED9_CM_G12 + (refIdx * 2)]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| } |
| |
| if (hmeLevel == HME_LEVEL_4x) |
| { |
| CODECHAL_ENCODE_CHK_NULL_RETURN(&m_resVdencStreamInBuffer[m_currRecycledBufIdx]); |
| |
| auto streamingSize = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32) * (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32) * CODECHAL_CACHELINE_SIZE; |
| |
| // Send driver-prepared stream-in surface as input during ROI/ Dirty-Rect |
| if (m_hevcPicParams->NumROI || (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType))) |
| { |
| MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); |
| surfaceCodecParams.dwSize = MOS_BYTES_TO_DWORDS(streamingSize); |
| surfaceCodecParams.bIs2DSurface = false; |
| surfaceCodecParams.presBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx]; |
| surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_VDENC_STREAMIN_CODEC].Value; |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_VDENC_STREAMIN_INPUT_CM_G12]; |
| surfaceCodecParams.bIsWritable = true; |
| surfaceCodecParams.bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| else // Clear stream-in surface otherwise |
| { |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = true; |
| |
| auto data = m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resVdencStreamInBuffer[m_currRecycledBufIdx], |
| &lockFlags); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory( |
| data, |
| streamingSize); |
| |
| m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_resVdencStreamInBuffer[m_currRecycledBufIdx]); |
| } |
| |
| MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams)); |
| surfaceCodecParams.dwSize = MOS_BYTES_TO_DWORDS(streamingSize); |
| surfaceCodecParams.bIs2DSurface = false; |
| surfaceCodecParams.presBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx]; |
| surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_VDENC_STREAMIN_CODEC].Value; |
| surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_VDENC_STREAMIN_OUTPUT_CM_G12]; |
| surfaceCodecParams.bIsWritable = true; |
| surfaceCodecParams.bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS |
| CodechalVdencHevcStateG12::GetKernelHeaderAndSize( |
| void *binary, |
| EncOperation operation, |
| uint32_t krnStateIdx, |
| void *krnHeader, |
| uint32_t *krnSize) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(binary); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(binary, operation, krnStateIdx, krnHeader, krnSize)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::AddVdencWalkerStateCmd( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| PMHW_VDBOX_HEVC_SLICE_STATE params) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(params); |
| |
| MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G12 vdencWalkerStateParams; |
| vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_HEVC; |
| vdencWalkerStateParams.pHevcEncSeqParams = params->pEncodeHevcSeqParams; |
| vdencWalkerStateParams.pHevcEncPicParams = params->pEncodeHevcPicParams; |
| vdencWalkerStateParams.pEncodeHevcSliceParams = params->pEncodeHevcSliceParams; |
| vdencWalkerStateParams.pTileCodingParams = static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G12>(params)->pTileCodingParams; |
| vdencWalkerStateParams.dwTileId = static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G12>(params)->dwTileID; |
| switch (static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G12>(params)->dwNumPipe) |
| { |
| case 0: |
| case 1: |
| vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE; |
| break; |
| case 2: |
| vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_TWO_PIPE; |
| break; |
| case 4: |
| vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE; |
| break; |
| default: |
| vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_INVALID; |
| CODECHAL_ENCODE_ASSERT(false); |
| break; |
| } |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| m_rsvdState->SetVdencWalkerStateParams(vdencWalkerStateParams); |
| } |
| #endif |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::GetSystemPipeNumberCommon() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_USER_FEATURE_VALUE_DATA userFeatureData; |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| |
| MOS_STATUS statusKey = MOS_STATUS_SUCCESS; |
| statusKey = MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY, |
| &userFeatureData); |
| |
| bool disableScalability = false; |
| if (statusKey == MOS_STATUS_SUCCESS) |
| { |
| disableScalability = userFeatureData.i32Data ? true : false; |
| } |
| |
| MEDIA_SYSTEM_INFO *gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(gtSystemInfo); |
| |
| if (gtSystemInfo && disableScalability == false) |
| { |
| // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface |
| m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled); |
| } |
| else |
| { |
| m_numVdbox = 1; |
| } |
| |
| CODECHAL_ENCODE_VERBOSEMESSAGE("System VDBOX number = %d.", m_numVdbox); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::Initialize(CodechalSetting * settings) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_USER_FEATURE_VALUE_DATA userFeatureData; |
| // Tile Replay Enable should be passed from DDI, will change later when DDI is ready |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_TILEREPLAY_ENABLE_ID_G12, |
| &userFeatureData); |
| m_enableTileReplay = userFeatureData.i32Data ? true : false; |
| |
| m_skipFrameBasedHWCounterRead = m_enableTileReplay; |
| |
| // RGB Encoding Enable should be passed from DDI, will change later when DDI is ready |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_RGB_ENCODING_ENABLE_ID_G12, |
| &userFeatureData); |
| m_RGBEncodingEnable = userFeatureData.i32Data ? true : false; |
| |
| // Capture mode with display Enable should be passed from DDI, will change later |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_CAPTURE_MODE_ENABLE_ID_G12, |
| &userFeatureData); |
| m_CaptureModeEnable = userFeatureData.i32Data ? true : false; |
| |
| // common initilization |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::Initialize(settings)); |
| |
| MEDIA_FEATURE_TABLE *skuTable = m_osInterface->pfnGetSkuTable(m_osInterface); |
| if (MEDIA_IS_SKU(skuTable, FtrSimulationMode) && (m_enableTileReplay == true)) |
| { |
| m_frameTrackingEnabled = false; |
| } |
| |
| // To do: current size assumes 8Kx8K max resolution. Needs to be increased based on Gen12, along with m_maxNumNativeROI. |
| m_deltaQpRoiBufferSize = m_deltaQpBufferSize; |
| m_brcRoiBufferSize = m_roiStreamInBufferSize; |
| m_maxTileNumber = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE) * |
| CODECHAL_GET_HEIGHT_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE); |
| |
| // we need additional buffer for (1) 1 CL for size info at the beginning of each tile column (max of 4 vdbox in scalability mode) |
| // (2) CL alignment at end of every tile column |
| // as a result, increase the height by 1 for allocation purposes |
| m_numLcu = MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) * (MOS_ROUNDUP_DIVIDE(m_frameHeight, MAX_LCU_SIZE) + 1); |
| m_mbCodeSize = MOS_ALIGN_CEIL(2 * sizeof(uint32_t) * (m_numLcu * 5 + m_numLcu * 64 * 8), CODECHAL_PAGE_SIZE); |
| m_mbCodeSize += m_mvOffset; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSystemPipeNumberCommon()); |
| |
| if (MOS_VE_SUPPORTED(m_osInterface)) |
| { |
| m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE)); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState); |
| //scalability initialize |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface)); |
| } |
| |
| // Caculate the size for 3nd level batch buffer |
| // mhw_vdbox_hcp_g12_X::HCP_PIC_STATE_CMD::byteSize |
| // As this buffer is going to passed to HuC to generate the command, must be page aligned |
| // To add the HW interface get the buffer size later |
| |
| m_thirdLBSize = MOS_ALIGN_CEIL(1024, CODECHAL_PAGE_SIZE); |
| |
| // Caculate the batch buffer size for each tile |
| // To add the MHW interface later, can be fine tuned |
| m_tileLevelBatchSize = m_hwInterface->m_vdenc2ndLevelBatchBufferSize; |
| |
| // Caculate the size for MV temporal buffer |
| uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6)*((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE; |
| uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5)*((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE; |
| m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size); |
| |
| m_sizeOfHcpPakFrameStats = 9 * CODECHAL_CACHELINE_SIZE; |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| InitReserveState(settings); |
| #endif |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_STITCH, |
| &userFeatureData); |
| m_enableTileStitchByHW = userFeatureData.i32Data ? true : false; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_SEMAPHORE, |
| &userFeatureData); |
| m_enableHWSemaphore = userFeatureData.i32Data ? true : false; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VDBOX_HW_SEMAPHORE, |
| &userFeatureData); |
| m_enableVdBoxHWSemaphore = userFeatureData.i32Data ? true : false; |
| |
| // ACQP is now supported on Gen12 for TU1 / TU4 |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ACQP_ENABLE_ID, |
| &userFeatureData); |
| m_hevcVdencAcqpEnabled = userFeatureData.i32Data ? true : false; |
| |
| m_numDelay = 15; |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, |
| &userFeatureData); |
| m_kmdVeOveride.Value = (uint64_t)userFeatureData.i64Data; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_FORCE_SCALABILITY_ID_G12, |
| &userFeatureData); |
| m_forceScalability = userFeatureData.i32Data ? true : false; |
| #endif |
| |
| return eStatus; |
| } |
| |
| CodechalVdencHevcStateG12::CodechalVdencHevcStateG12( |
| CodechalHwInterface* hwInterface, |
| CodechalDebugInterface* debugInterface, |
| PCODECHAL_STANDARD_INFO standardInfo) |
| :CodechalVdencHevcState(hwInterface, debugInterface, standardInfo) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_useCommonKernel = true; |
| pfnGetKernelHeaderAndSize = GetKernelHeaderAndSize; |
| m_useHwScoreboard = false; |
| #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE) |
| m_kernelBase = (uint8_t*)IGCODECKRN_G12; |
| #endif |
| m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL; |
| m_scalabilityState = nullptr; |
| |
| MOS_ZeroMemory(&m_resPakcuLevelStreamoutData, sizeof(m_resPakcuLevelStreamoutData)); |
| MOS_ZeroMemory(&m_resPakSliceLevelStreamoutData, sizeof(m_resPakSliceLevelStreamoutData)); |
| MOS_ZeroMemory(m_resTileBasedStatisticsBuffer, sizeof(m_resTileBasedStatisticsBuffer)); |
| MOS_ZeroMemory(&m_resHuCPakAggregatedFrameStatsBuffer, sizeof(m_resHuCPakAggregatedFrameStatsBuffer)); |
| MOS_ZeroMemory(m_tileRecordBuffer, sizeof(m_tileRecordBuffer)); |
| MOS_ZeroMemory(&m_kmdVeOveride, sizeof(m_kmdVeOveride)); |
| MOS_ZeroMemory(&m_resHcpScalabilitySyncBuffer, sizeof(m_resHcpScalabilitySyncBuffer)); |
| |
| MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer)); |
| MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer)); |
| MOS_ZeroMemory(&m_resBrcSemaphoreMem, sizeof(m_resBrcSemaphoreMem)); |
| MOS_ZeroMemory(&m_resBrcPakSemaphoreMem, sizeof(m_resBrcPakSemaphoreMem)); |
| MOS_ZeroMemory(m_resVdBoxSemaphoreMem, sizeof(m_resVdBoxSemaphoreMem)); |
| MOS_ZeroMemory(&m_resPipeStartSemaMem, sizeof(m_resPipeStartSemaMem)); |
| |
| MOS_ZeroMemory(&m_vdencTileRowStoreBuffer, sizeof(m_vdencTileRowStoreBuffer)); |
| MOS_ZeroMemory(&m_thirdLevelBatchBuffer, sizeof(MHW_BATCH_BUFFER)); |
| MOS_ZeroMemory(&m_vdencSAORowStoreBuffer, sizeof(m_vdencSAORowStoreBuffer)); |
| |
| for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++) |
| { |
| MOS_ZeroMemory(&m_tileLevelBatchBuffer[i], sizeof(PMHW_BATCH_BUFFER)); |
| MOS_ZeroMemory(&m_TileRowBRCBatchBuffer[i], sizeof(PMHW_BATCH_BUFFER)); |
| } |
| |
| for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++) |
| { |
| for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++) |
| { |
| MOS_ZeroMemory(&m_resHucPakStitchDmemBuffer[k][i], sizeof(m_resHucPakStitchDmemBuffer[k][i])); |
| } |
| } |
| |
| MOS_ZeroMemory(&m_resBrcDataBuffer, sizeof(m_resBrcDataBuffer)); |
| MOS_ZeroMemory(&m_resTileRowBRCsyncSemaphore, sizeof(m_resTileRowBRCsyncSemaphore)); |
| |
| m_vdencBrcInitDmemBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G12); |
| m_vdencBrcUpdateDmemBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G12); |
| m_vdencBrcConstDataBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_CONSTANT_DATA_G12); |
| m_maxNumSlicesSupported = CODECHAL_VDENC_HEVC_MAX_SLICE_NUM; |
| |
| m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS; |
| m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_INIT_DSH_SIZE_HEVC_ENC; |
| #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE) |
| m_kernelBase = (uint8_t*)IGCODECKRN_G12; |
| #endif |
| |
| MOS_STATUS eStatus = CodecHalGetKernelBinaryAndSize( |
| m_kernelBase, |
| m_kuidCommon, |
| &m_kernelBinary, |
| &m_combinedKernelSize); |
| CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS); |
| |
| m_hwInterface->GetStateHeapSettings()->dwIshSize += |
| MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT)); |
| |
| m_hwInterface->m_hucCommandBufferSize += 64; |
| |
| Mos_CheckVirtualEngineSupported(m_osInterface, false, true); |
| Mos_SetVirtualEngineSupported(m_osInterface, true); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_encodeParState = MOS_New(CodechalDebugEncodeParG12, this)); |
| CreateHevcPar(); |
| ) |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetGpuCtxCreatOption() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface)) |
| { |
| CodechalEncoderState::SetGpuCtxCreatOption(); |
| } |
| else |
| { |
| m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation( |
| m_scalabilityState, |
| (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt)); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCPakIntegrate( |
| PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| int32_t currentPass = GetCurrentPass(); |
| |
| if(m_enableTileStitchByHW) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer()); |
| } |
| |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex]; |
| CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams); |
| |
| MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS)); |
| |
| // Add Virtual addr |
| virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc |
| virtualAddrParams->regionParams[0].dwOffset = 0; |
| virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 - HuC Frame statistics output |
| virtualAddrParams->regionParams[1].isWritable = true; |
| virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer; // Region 4 - Last Tile bitstream |
| virtualAddrParams->regionParams[4].dwOffset = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); |
| virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer; // Region 5 - HuC modifies the last tile bitstream before stitch command |
| virtualAddrParams->regionParams[5].dwOffset = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); |
| virtualAddrParams->regionParams[5].isWritable = true; |
| virtualAddrParams->regionParams[6].presRegion = &m_vdencBrcHistoryBuffer; // Region 6 History Buffer (Input/Output) |
| virtualAddrParams->regionParams[6].isWritable = true; |
| virtualAddrParams->regionParams[7].presRegion = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource; // Region 7 - HCP PIC state command |
| virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer; // Region 9 HuC outputs BRC data |
| virtualAddrParams->regionParams[9].isWritable = true; |
| if (m_enableTileStitchByHW) |
| { |
| virtualAddrParams->regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]; // Region 8 - data buffer read by HUC for stitching cmd generation |
| virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc |
| virtualAddrParams->regionParams[10].isWritable = true; |
| } |
| virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer |
| virtualAddrParams->regionParams[15].dwOffset = 0; |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::ConfigStitchDataBuffer() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| int32_t currentPass = GetCurrentPass(); |
| if (currentPass < 0 || |
| (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES && m_brcEnabled)) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = 1; |
| |
| HucCommandDataVdencG12 *hucStitchDataBuf = (HucCommandDataVdencG12 *)m_osInterface->pfnLockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly); |
| |
| MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandDataVdencG12)); |
| hucStitchDataBuf->TotalCommands = 1; |
| hucStitchDataBuf->InputCOM[0].SizeOfData = 0xF; |
| |
| HucInputCmdVdencG12 hucInputCmd; |
| MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdVdencG12)); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface); |
| hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0; |
| hucInputCmd.CmdMode = HUC_CMD_LIST_MODE; |
| hucInputCmd.LengthOfTable = (uint8_t)(m_numTiles); |
| hucInputCmd.CopySize = m_hwInterface->m_tileRecordSize; |
| |
| PMOS_RESOURCE presSrc = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource( |
| m_osInterface, |
| presSrc, |
| false, |
| false)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource( |
| m_osInterface, |
| &m_resBitstreamBuffer, |
| true, |
| true)); |
| |
| uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc); |
| uint64_t destAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_resBitstreamBuffer); |
| hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF); |
| hucInputCmd.SrcAddrTop = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32); |
| |
| hucInputCmd.DestAddrBottom = (uint32_t)(destAddr & 0x00000000FFFFFFFF); |
| hucInputCmd.DestAddrTop = (uint32_t)((destAddr & 0xFFFFFFFF00000000) >> 32); |
| |
| MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdVdencG12), &hucInputCmd, sizeof(HucInputCmdVdencG12)); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCPakIntegrateStitch( |
| PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| int32_t currentPass = GetCurrentPass(); |
| |
| MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS)); |
| |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex]; |
| CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer()); |
| |
| // Add Virtual addr |
| virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc |
| virtualAddrParams->regionParams[0].dwOffset = 0; |
| virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 - HuC Frame statistics output |
| virtualAddrParams->regionParams[1].isWritable = true; |
| virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer; // Region 4 - Last Tile bitstream |
| virtualAddrParams->regionParams[4].dwOffset = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); |
| virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer; // Region 5 - HuC modifies the last tile bitstream before stitch command |
| virtualAddrParams->regionParams[5].dwOffset = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); |
| virtualAddrParams->regionParams[5].isWritable = true; |
| virtualAddrParams->regionParams[6].presRegion = &m_vdencBrcHistoryBuffer; // Region 6 History Buffer (Input/Output) |
| virtualAddrParams->regionParams[6].isWritable = true; |
| virtualAddrParams->regionParams[7].presRegion = &m_thirdLevelBatchBuffer.OsResource; //&m_resHucPakStitchReadBatchBuffer; // Region 7 - HCP PIC state command |
| virtualAddrParams->regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]; // Region 8 - data buffer read by HUC for stitching cmd generation |
| virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer; // Region 9 HuC outputs BRC data |
| virtualAddrParams->regionParams[9].isWritable = true; |
| virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc |
| virtualAddrParams->regionParams[10].isWritable = true; |
| virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer |
| virtualAddrParams->regionParams[15].dwOffset = 0; |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCPakIntegrateStitch( |
| PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = true; |
| |
| int32_t currentPass = GetCurrentPass(); |
| |
| HucPakStitchDmemVdencG12 *hucPakStitchDmem = (HucPakStitchDmemVdencG12 *)m_osInterface->pfnLockResource( |
| m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem); |
| |
| MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemVdencG12)); |
| |
| // reset all the offsets to -1 |
| uint32_t TotalOffsetSize = sizeof(hucPakStitchDmem->TileSizeRecord_offset) + |
| sizeof(hucPakStitchDmem->VDENCSTAT_offset) + |
| sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) + |
| sizeof(hucPakStitchDmem->HEVC_Streamout_offset) + |
| sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) + |
| sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset); |
| MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF); |
| |
| uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0); //numTileColumns is nonzero and even number; 2 or 4 |
| CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe); //ucNumPipe is nonzero and even number; 2 or 4 |
| uint16_t numTiles = numTileRows * numTileColumns; |
| uint16_t numTilesPerPipe = m_numTiles / m_numPipe; |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex]; |
| CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams); |
| |
| hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth; |
| hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight; |
| hucPakStitchDmem->TotalNumberOfPAKs = 0; |
| hucPakStitchDmem->Codec = 2; //HEVC DP CQP |
| hucPakStitchDmem->MAXPass = 1; |
| hucPakStitchDmem->CurrentPass = 1; |
| hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; |
| hucPakStitchDmem->CabacZeroWordFlag = false; |
| hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8 |
| hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8 |
| hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc; |
| hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE; |
| // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record |
| hucPakStitchDmem->OffsetInCommandBuffer = tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8; |
| hucPakStitchDmem->LastTileBS_StartInBytes = (tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE) & (CODECHAL_PAGE_SIZE - 1); |
| |
| hucPakStitchDmem->StitchEnable = true; |
| hucPakStitchDmem->StitchCommandOffset = 0; |
| hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END; |
| |
| //Set the kernel output offsets |
| hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord; |
| hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = 0xFFFFFFFF; |
| hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF; |
| hucPakStitchDmem->VDENCSTAT_offset[0] = 0xFFFFFFFF; |
| |
| for (auto i = 0; i < m_numPipe; i++) |
| { |
| hucPakStitchDmem->NumTiles[i] = numTilesPerPipe; |
| |
| // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic. |
| // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region. |
| hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) + |
| m_hevcTileStatsOffset.uiTileSizeRecord; |
| } |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass])); |
| |
| MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS)); |
| dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]); |
| dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE); |
| dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCPakIntegrate( |
| PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = true; |
| |
| int32_t currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_VDENC_BRC_NUM_OF_PASSES) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| HucPakStitchDmemVdencG12* hucPakStitchDmem = (HucPakStitchDmemVdencG12*)m_osInterface->pfnLockResource( |
| m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem); |
| MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemVdencG12)); |
| |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex]; |
| CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams); |
| |
| // Reset all the offsets to be shared in the huc dmem (6*5 DW's) |
| MOS_FillMemory(hucPakStitchDmem, 120, 0xFF); |
| |
| uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| uint16_t numTiles = numTileRows * numTileColumns; |
| uint16_t numTilesPerPipe = m_numTiles / m_numPipe; |
| |
| hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE; |
| // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record |
| hucPakStitchDmem->OffsetInCommandBuffer = (m_numTiles - 1) * CODECHAL_CACHELINE_SIZE + 8; |
| hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth; |
| hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight; |
| hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe; |
| hucPakStitchDmem->Codec = 2; // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc |
| hucPakStitchDmem->MAXPass = m_brcEnabled ? CODECHAL_VDENC_BRC_NUM_OF_PASSES : 1; |
| hucPakStitchDmem->CurrentPass = (uint8_t) currentPass + 1; // Current BRC pass [1..MAXPass] |
| hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; |
| hucPakStitchDmem->CabacZeroWordFlag = false; |
| hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8 |
| hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8 |
| hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc; |
| hucPakStitchDmem->LastTileBS_StartInBytes = (tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE) & (CODECHAL_PAGE_SIZE - 1); |
| hucPakStitchDmem->PIC_STATE_StartInBytes = (uint16_t)m_picStateCmdStartInBytes; |
| CODECHAL_ENCODE_VERBOSEMESSAGE("last tile offset = 0x%x, LastTileBS_StartInBytes =0x%x, (tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE), hucPakStitchDmem->LastTileBS_StartInBytes"); |
| if(m_enableTileStitchByHW) |
| { |
| hucPakStitchDmem->StitchEnable = true; |
| hucPakStitchDmem->StitchCommandOffset = 0; |
| hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END; |
| } |
| |
| if (m_numPipe > 1) |
| { |
| //Set the kernel output offsets |
| hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = m_hevcFrameStatsOffset.uiHevcPakStatistics; |
| hucPakStitchDmem->HEVC_Streamout_offset[0] = m_hevcFrameStatsOffset.uiHevcSliceStreamout; |
| hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord; |
| hucPakStitchDmem->VDENCSTAT_offset[0] = m_hevcFrameStatsOffset.uiVdencStatistics; |
| |
| // Calculate number of slices that execute on a single pipe |
| for (auto tileRow = 0; tileRow < numTileRows; tileRow++) |
| { |
| for (auto tileCol = 0; tileCol < numTileColumns; tileCol++) |
| { |
| PCODEC_ENCODER_SLCDATA slcData = m_slcData; |
| uint16_t slcCount, idx, sliceNumInTile = 0; |
| |
| idx = tileRow * numTileColumns + tileCol; |
| for (slcCount = 0; slcCount < m_numSlices; slcCount++) |
| { |
| bool lastSliceInTile = false, sliceInTile = false; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount, |
| &tileParams[idx], |
| &sliceInTile, |
| &lastSliceInTile)); |
| |
| if (!sliceInTile) |
| { |
| continue; |
| } |
| |
| sliceNumInTile++; |
| } // end of slice |
| if (0 == sliceNumInTile) |
| { |
| // One tile must have at least one slice |
| CODECHAL_ENCODE_ASSERT(false); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| break; |
| } |
| |
| if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1)) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| // Set the number of slices per pipe in the Dmem structure |
| hucPakStitchDmem->NumSlices[tileCol] += sliceNumInTile; |
| } |
| } |
| |
| for (auto i = 0; i < m_numPipe; i++) |
| { |
| hucPakStitchDmem->NumTiles[i] = numTilesPerPipe; |
| hucPakStitchDmem->NumSlices[i] = numTilesPerPipe; // Assuming 1 slice/ tile. To do: change this later. |
| |
| // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic. |
| // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region. |
| hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) + m_hevcTileStatsOffset.uiTileSizeRecord; |
| hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiHevcPakStatistics) + m_hevcTileStatsOffset.uiHevcPakStatistics; |
| hucPakStitchDmem->VDENCSTAT_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiVdencStatistics) + m_hevcTileStatsOffset.uiVdencStatistics; |
| hucPakStitchDmem->HEVC_Streamout_offset[i + 1] = (i * hucPakStitchDmem->NumSlices[i] * CODECHAL_CACHELINE_SIZE) + m_hevcTileStatsOffset.uiHevcSliceStreamout; |
| } |
| } |
| else |
| { |
| hucPakStitchDmem->NumTiles[0] = numTiles; |
| hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe; |
| |
| // non-scalable mode, only VDEnc statistics need to be aggregated |
| hucPakStitchDmem->VDENCSTAT_offset[0] = m_hevcFrameStatsOffset.uiVdencStatistics; |
| hucPakStitchDmem->VDENCSTAT_offset[1] = m_hevcTileStatsOffset.uiVdencStatistics; |
| } |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass])); |
| |
| MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS)); |
| dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]); |
| dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE); |
| dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::HucPakIntegrate( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| |
| CODECHAL_ENCODE_CHK_COND_RETURN( |
| (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()), |
| "ERROR - vdbox index exceed the maximum"); |
| |
| auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex); |
| |
| // load kernel from WOPCM into L2 storage RAM |
| MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams; |
| MOS_ZeroMemory(&imemParams, sizeof(imemParams)); |
| imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams)); |
| |
| // pipe mode select |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams; |
| pipeModeSelectParams.Mode = m_mode; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams)); |
| |
| // DMEM set |
| MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrate(&dmemParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams)); |
| |
| MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrate(&virtualAddrParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams)); |
| |
| // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_resHucStatus2Buffer; |
| storeDataParams.dwResourceOffset = 0; |
| storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams)); |
| |
| // Store HUC_STATUS2 register |
| MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams; |
| MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams)); |
| storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer; |
| storeRegParams.dwOffset = sizeof(uint32_t); |
| storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true)); |
| |
| // wait Huc completion (use HEVC bit for now) |
| MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams; |
| MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams)); |
| vdPipeFlushParams.Flags.bFlushHEVC = 1; |
| vdPipeFlushParams.Flags.bWaitDoneHEVC = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams)); |
| |
| // Flush the engine to ensure memory written out |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams)); |
| |
| EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf; |
| |
| uint32_t baseOffset = |
| (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource |
| |
| // Write HUC_STATUS mask |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer; |
| storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset; |
| storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( |
| cmdBuffer, |
| &storeDataParams)); |
| |
| // store HUC_STATUS register |
| MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams)); |
| storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer; |
| storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset; |
| storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd( |
| cmdBuffer, |
| &storeRegParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::HucPakIntegrateStitch( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| |
| CODECHAL_ENCODE_CHK_COND_RETURN( |
| (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()), |
| "ERROR - vdbox index exceed the maximum"); |
| |
| auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex); |
| |
| // load kernel from WOPCM into L2 storage RAM |
| MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams; |
| MOS_ZeroMemory(&imemParams, sizeof(imemParams)); |
| imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams)); |
| |
| // pipe mode select |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams; |
| pipeModeSelectParams.Mode = m_mode; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams)); |
| |
| // DMEM set |
| MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrateStitch(&dmemParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams)); |
| |
| MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrateStitch(&virtualAddrParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams)); |
| |
| // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_resHucStatus2Buffer; |
| storeDataParams.dwResourceOffset = 0; |
| storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams)); |
| |
| // Store HUC_STATUS2 register |
| MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams; |
| MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams)); |
| storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer; |
| storeRegParams.dwOffset = sizeof(uint32_t); |
| storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true)); |
| |
| // wait Huc completion (use HEVC bit for now) |
| MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams; |
| MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams)); |
| vdPipeFlushParams.Flags.bFlushHEVC = 1; |
| vdPipeFlushParams.Flags.bWaitDoneHEVC = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams)); |
| |
| // Flush the engine to ensure memory written out |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams)); |
| |
| EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf; |
| |
| uint32_t baseOffset = |
| (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource |
| |
| // Write HUC_STATUS mask |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer; |
| storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset; |
| storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( |
| cmdBuffer, |
| &storeDataParams)); |
| |
| // store HUC_STATUS register |
| MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams)); |
| storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer; |
| storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset; |
| storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd( |
| cmdBuffer, |
| &storeRegParams)); |
| |
| return eStatus; |
| } |
| |
| void CodechalVdencHevcStateG12::CreateMhwParams() |
| { |
| m_sliceStateParams = MOS_New(MHW_VDBOX_HEVC_SLICE_STATE_G12); |
| m_pipeModeSelectParams = MOS_New(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12); |
| m_pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G12); |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::CalculatePictureStateCommandSize() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MHW_VDBOX_STATE_CMDSIZE_PARAMS_G12 stateCmdSizeParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| m_hwInterface->GetHxxStateCommandSize( |
| CODECHAL_ENCODE_MODE_HEVC, |
| &m_defaultPictureStatesSize, |
| &m_defaultPicturePatchListSize, |
| &stateCmdSizeParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::AddHcpPipeBufAddrCmd( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| #ifdef _MMC_SUPPORTED |
| m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams); |
| #endif |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(cmdBuffer, m_pipeBufAddrParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetTileData( |
| MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12* tileCodingParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| if (!m_hevcPicParams->tiles_enabled_flag) |
| { |
| return eStatus; |
| } |
| |
| uint32_t colBd[100] = { 0 }; |
| uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| for (uint32_t i = 0; i < numTileColumns; i++) |
| { |
| colBd[i + 1] = colBd[i] + m_hevcPicParams->tile_column_width[i]; |
| } |
| |
| uint32_t rowBd[100] = { 0 }; |
| uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| for (uint32_t i = 0; i < numTileRows; i++) |
| { |
| rowBd[i + 1] = rowBd[i] + m_hevcPicParams->tile_row_height[i]; |
| } |
| |
| m_numTiles = numTileRows * numTileColumns; |
| if (m_numTiles > CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_VDENC_MIN_TILE_WIDTH_SIZE) * |
| CODECHAL_GET_HEIGHT_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_VDENC_MIN_TILE_HEIGHT_SIZE)) |
| { |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| m_numTileRows = numTileRows; |
| |
| uint32_t const numCuRecordTab[] = { 1, 4, 16, 64 }; //LCU: 8x8->1, 16x16->4, 32x32->16, 64x64->64 |
| uint32_t numCuRecord = numCuRecordTab[MOS_MIN(3, m_hevcSeqParams->log2_max_coding_block_size_minus3)]; |
| uint32_t maxBytePerLCU = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3); |
| maxBytePerLCU = maxBytePerLCU * maxBytePerLCU; // number of pixels per LCU |
| maxBytePerLCU = maxBytePerLCU * 3 / (m_is10BitHevc ? 1 : 2); //assume 4:2:0 format |
| uint32_t bitstreamByteOffset = 0, saoRowstoreOffset = 0, cuLevelStreamoutOffset = 0, sseRowstoreOffset = 0; |
| int32_t frameWidthInMinCb = m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1; |
| int32_t frameHeightInMinCb = m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1; |
| int32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3; |
| uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3); |
| uint32_t streamInWidthinLCU = MOS_ROUNDUP_DIVIDE((frameWidthInMinCb << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize); |
| uint32_t numLcuInPic = 0; |
| uint32_t tileStartLCUAddr = 0; |
| |
| for (uint32_t numLcusInTiles = 0, i = 0; i < numTileRows; i++) |
| { |
| for (uint32_t j = 0; j < numTileColumns; j++) |
| { |
| numLcuInPic += m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j]; |
| } |
| } |
| |
| uint32_t numSliceInTile = 0; |
| uint64_t activeBitstreamSize = (uint64_t)m_encodeParams.dwBitstreamSize; |
| // There would be padding at the end of last tile in CBR, reserve dedicated part in the BS buf |
| if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR) |
| { |
| // Assume max padding num < target frame size derived from target bit rate and frame rate |
| uint32_t actualFrameRate = m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator; |
| uint64_t reservedPart = (uint64_t)m_hevcSeqParams->TargetBitRate / 8 / (uint64_t)actualFrameRate * 1024; |
| |
| if (reservedPart > activeBitstreamSize) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Frame size cal from target Bit rate is larger than BS buf! Issues in CBR paras!"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| // Capping the reserved part to 1/10 of bs buf size |
| if (reservedPart > activeBitstreamSize / 10) |
| { |
| reservedPart = activeBitstreamSize / 10; |
| } |
| |
| activeBitstreamSize -= reservedPart; |
| } |
| |
| for (uint32_t numLcusInTiles = 0, i = 0; i < numTileRows; i++) |
| { |
| for (uint32_t j = 0; j < numTileColumns; j++) |
| { |
| uint32_t idx = i * numTileColumns + j; |
| uint32_t numLcuInTile = m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j]; |
| |
| tileCodingParams[idx].TileStartLCUX = colBd[j]; |
| tileCodingParams[idx].TileStartLCUY = rowBd[i]; |
| |
| tileCodingParams[idx].TileColumnStoreSelect = j % 2; |
| tileCodingParams[idx].TileRowStoreSelect = i % 2; |
| |
| if (j != numTileColumns - 1) |
| { |
| tileCodingParams[idx].TileWidthInMinCbMinus1 = (m_hevcPicParams->tile_column_width[j] << shift) - 1; |
| tileCodingParams[idx].IsLastTileofRow = false; |
| } |
| else |
| { |
| tileCodingParams[idx].TileWidthInMinCbMinus1 = (frameWidthInMinCb - (colBd[j] << shift)) - 1; |
| tileCodingParams[idx].IsLastTileofRow = true; |
| |
| } |
| |
| if (i != numTileRows - 1) |
| { |
| tileCodingParams[idx].IsLastTileofColumn = false; |
| tileCodingParams[idx].TileHeightInMinCbMinus1 = (m_hevcPicParams->tile_row_height[i] << shift) - 1; |
| } |
| else |
| { |
| tileCodingParams[idx].TileHeightInMinCbMinus1 = (frameHeightInMinCb - (rowBd[i] << shift)) - 1; |
| tileCodingParams[idx].IsLastTileofColumn = true; |
| } |
| |
| tileCodingParams[idx].NumOfTilesInFrame = m_numTiles; |
| tileCodingParams[idx].NumOfTileColumnsInFrame = numTileColumns; |
| tileCodingParams[idx].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * numLcusInTiles) * m_hcpInterface->GetHevcEncCuRecordSize()), |
| CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE; |
| tileCodingParams[idx].NumberOfActiveBePipes = (m_numPipe > 1) ? m_numPipe : 1; |
| |
| tileCodingParams[idx].PakTileStatisticsOffset = 9 * idx; |
| tileCodingParams[idx].TileSizeStreamoutOffset = idx; |
| tileCodingParams[idx].Vp9ProbabilityCounterStreamoutOffset = 0; |
| tileCodingParams[idx].presHcpSyncBuffer = &m_resHcpScalabilitySyncBuffer.sResource; |
| tileCodingParams[idx].CuLevelStreamoutOffset = cuLevelStreamoutOffset; |
| tileCodingParams[idx].SliceSizeStreamoutOffset = numSliceInTile; |
| tileCodingParams[idx].SseRowstoreOffset = sseRowstoreOffset; |
| tileCodingParams[idx].BitstreamByteOffset = bitstreamByteOffset; |
| tileCodingParams[idx].SaoRowstoreOffset = saoRowstoreOffset; |
| |
| uint32_t tileHeightInLCU = MOS_ROUNDUP_DIVIDE(((tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize); |
| uint32_t tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize); |
| |
| //StreamIn data is 4 CLs per LCU |
| tileCodingParams[idx].TileStreaminOffset = 4 * (tileCodingParams[idx].TileStartLCUY * streamInWidthinLCU + tileCodingParams[idx].TileStartLCUX * tileHeightInLCU); |
| tileCodingParams[idx].SliceSizeStreamoutOffset = tileStartLCUAddr; |
| tileStartLCUAddr += (tileWidthInLCU * tileHeightInLCU); |
| |
| cuLevelStreamoutOffset += (tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) * 16 / CODECHAL_CACHELINE_SIZE; |
| sseRowstoreOffset += ((m_hevcPicParams->tile_column_width[j] + 3) * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE; |
| saoRowstoreOffset += (MOS_ALIGN_CEIL(m_hevcPicParams->tile_column_width[j], 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU) / CODECHAL_CACHELINE_SIZE; |
| |
| uint64_t totalSizeTemp = (uint64_t)activeBitstreamSize * (uint64_t)numLcuInTile; |
| uint32_t bitStreamSizePerTile = (uint32_t)(totalSizeTemp / (uint64_t)numLcuInPic) + ((totalSizeTemp % (uint64_t)numLcuInPic) ? 1 : 0); |
| bitstreamByteOffset += MOS_ALIGN_CEIL(bitStreamSizePerTile, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE; |
| |
| numLcusInTiles += numLcuInTile; |
| |
| for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++) |
| { |
| bool lastSliceInTile = false, sliceInTile = false; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount, |
| &tileCodingParams[idx], |
| &sliceInTile, |
| &lastSliceInTile)); |
| numSliceInTile += (sliceInTile ? 1 : 0); |
| } |
| } |
| |
| // same row store buffer for different tile rows. |
| saoRowstoreOffset = 0; |
| sseRowstoreOffset = 0; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::IsSliceInTile( |
| uint32_t sliceNumber, |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile, |
| bool *sliceInTile, |
| bool *lastSliceInTile) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(currentTile); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(sliceInTile); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(lastSliceInTile); |
| |
| uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3; |
| uint32_t residual = (1 << shift) - 1; |
| uint32_t frameWidthInLCU = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift; |
| uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift; |
| |
| PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = &m_hevcSliceParams[sliceNumber]; |
| uint32_t sliceStartLCU = hevcSlcParams->slice_segment_address; |
| uint32_t sliceLCUx = sliceStartLCU % frameWidthInLCU; |
| uint32_t sliceLCUy = sliceStartLCU / frameWidthInLCU; |
| |
| uint32_t tileColumnWidth = (currentTile->TileWidthInMinCbMinus1 + 1 + residual) >> shift; |
| uint32_t tileRowHeight = (currentTile->TileHeightInMinCbMinus1 + 1 + residual) >> shift; |
| if (sliceLCUx < currentTile->TileStartLCUX || |
| sliceLCUy < currentTile->TileStartLCUY || |
| sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth || |
| sliceLCUy >= currentTile->TileStartLCUY + tileRowHeight |
| ) |
| { |
| // slice start is not in the tile boundary |
| *lastSliceInTile = *sliceInTile = false; |
| return eStatus; |
| } |
| |
| sliceLCUx += (hevcSlcParams->NumLCUsInSlice - 1) % tileColumnWidth; |
| sliceLCUy += (hevcSlcParams->NumLCUsInSlice - 1) / tileColumnWidth; |
| |
| if (sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth) |
| { |
| sliceLCUx -= tileColumnWidth; |
| sliceLCUy++; |
| } |
| |
| if (sliceLCUx < currentTile->TileStartLCUX || |
| sliceLCUy < currentTile->TileStartLCUY || |
| sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth || |
| sliceLCUy >= currentTile->TileStartLCUY + tileRowHeight |
| ) |
| { |
| // last LCU of the slice is out of the tile boundary |
| *lastSliceInTile = *sliceInTile = false; |
| return eStatus; |
| } |
| |
| *sliceInTile = true; |
| |
| sliceLCUx++; |
| sliceLCUy++; |
| |
| // the end of slice is at the boundary of tile |
| *lastSliceInTile = ( |
| sliceLCUx == currentTile->TileStartLCUX + tileColumnWidth && |
| sliceLCUy == currentTile->TileStartLCUY + tileRowHeight); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::InitMmcState() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| #ifdef _MMC_SUPPORTED |
| m_mmcState = MOS_New(CodechalMmcEncodeHevcG12, m_hwInterface, this); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState); |
| #endif |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| MOS_STATUS CodechalVdencHevcStateG12::InitReserveState(CodechalSetting * settings) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_rsvdState = MOS_New(CodechalVdencHevcG12Rsvd, m_hwInterface, this); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_rsvdState); |
| m_rsvdState->Initialize(settings); |
| return MOS_STATUS_SUCCESS; |
| } |
| #endif |
| |
| uint32_t CodechalVdencHevcStateG12::CalculateCommandBufferSize() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| // To be refined later, differentiate BRC and CQP |
| uint32_t commandBufferSize = |
| m_pictureStatesSize + |
| m_extraPictureStatesSize + |
| (m_sliceStatesSize * m_numSlices) + |
| m_hucCommandsSize * 5; |
| |
| if (m_singleTaskPhaseSupported) |
| { |
| commandBufferSize *= (m_numPasses + 1); |
| } |
| |
| if (m_osInterface->bUsesPatchList && m_hevcPicParams->tiles_enabled_flag) |
| { |
| commandBufferSize += (m_tileLevelBatchSize * m_numTiles * CODECHAL_VDENC_BRC_NUM_OF_PASSES); |
| } |
| |
| // 4K align since allocation is in chunks of 4K bytes. |
| commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, 0x1000); |
| |
| return commandBufferSize; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::VerifyCommandBufferSize() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (UseRenderCommandBuffer() || m_numPipe == 1) |
| { |
| // legacy mode & resize CommandBuffer Size for every BRC pass |
| if (!m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable()); |
| } |
| return eStatus; |
| } |
| |
| // virtual engine |
| uint32_t requestedSize = |
| m_pictureStatesSize + |
| m_extraPictureStatesSize + |
| (m_sliceStatesSize * m_numSlices); |
| |
| requestedSize += (requestedSize * m_numPassesInOnePipe + m_hucCommandsSize); |
| |
| // Running in the multiple VDBOX mode |
| int currentPipe = GetCurrentPipe(); |
| if (currentPipe < 0 || currentPipe >= m_numPipe) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| int currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| if (IsFirstPipe() && m_osInterface->bUsesPatchList) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable()); |
| } |
| |
| PMOS_COMMAND_BUFFER pCmdBuffer; |
| if (m_osInterface->phasedSubmission) |
| { |
| m_osInterface->pfnVerifyCommandBufferSize(m_osInterface, requestedSize, 0); |
| return eStatus; |
| } |
| else |
| { |
| pCmdBuffer = m_singleTaskPhaseSupported ? &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass]; |
| } |
| |
| if (Mos_ResourceIsNull(&pCmdBuffer->OsResource) || |
| m_sizeOfVeBatchBuffer < requestedSize) |
| { |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = requestedSize; |
| allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX"; |
| |
| if (!Mos_ResourceIsNull(&pCmdBuffer->OsResource)) |
| { |
| if (pCmdBuffer->pCmdBase) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &pCmdBuffer->OsResource); |
| } |
| m_osInterface->pfnFreeResource(m_osInterface, &pCmdBuffer->OsResource); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &pCmdBuffer->OsResource)); |
| |
| m_sizeOfVeBatchBuffer = requestedSize; |
| } |
| |
| if (pCmdBuffer->pCmdBase == nullptr) |
| { |
| MOS_LOCK_PARAMS lockParams; |
| MOS_ZeroMemory(&lockParams, sizeof(lockParams)); |
| lockParams.WriteOnly = true; |
| pCmdBuffer->pCmdPtr = pCmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &pCmdBuffer->OsResource, &lockParams); |
| pCmdBuffer->iRemaining = m_sizeOfVeBatchBuffer; |
| pCmdBuffer->iOffset = 0; |
| |
| if (pCmdBuffer->pCmdBase == nullptr) |
| { |
| eStatus = MOS_STATUS_NULL_POINTER; |
| return eStatus; |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface); |
| |
| if (UseRenderCommandBuffer() || m_numPipe == 1) |
| { |
| // legacy mode |
| m_realCmdBuffer.pCmdBase = m_realCmdBuffer.pCmdPtr = nullptr; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0)); |
| return eStatus; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0)); |
| |
| int currentPipe = GetCurrentPipe(); |
| if (currentPipe < 0 || currentPipe >= m_numPipe) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| int currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| if (m_osInterface->phasedSubmission) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, currentPipe + 1)); |
| |
| CodecHalEncodeScalability_EncodePhaseToSubmissionType(IsFirstPipe(), cmdBuffer); |
| if (IsLastPipe()) |
| { |
| cmdBuffer->iSubmissionType |= SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE; |
| } |
| } |
| else |
| { |
| *cmdBuffer = m_singleTaskPhaseSupported ? m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass]; |
| } |
| |
| if (m_osInterface->osCpInterface->IsCpEnabled() && cmdBuffer->iOffset == 0) |
| { |
| // Insert CP Prolog |
| CODECHAL_ENCODE_NORMALMESSAGE("Adding cp prolog for secure scalable encode"); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, cmdBuffer)); |
| } |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| |
| if (UseRenderCommandBuffer() || m_numPipe == 1) |
| { |
| // legacy mode |
| m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0); |
| return eStatus; |
| } |
| |
| int currentPipe = GetCurrentPipe(); |
| if (currentPipe < 0 || currentPipe >= m_numPipe) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| int currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| if (m_osInterface->phasedSubmission) |
| { |
| m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, currentPipe + 1); |
| m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0); |
| } |
| else |
| { |
| uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass; |
| m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][passIndex] = *cmdBuffer; |
| m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SubmitCommandBuffer( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| bool nullRendering) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| |
| if (IsLastPass()) |
| { |
| if (m_osInterface->osCpInterface->IsHMEnabled()) |
| { |
| HalOcaInterface::DumpCpParam(*cmdBuffer, *m_osInterface->pOsContext, m_osInterface->osCpInterface->GetOcaDumper()); |
| } |
| |
| HalOcaInterface::On1stLevelBBEnd(*cmdBuffer, *m_osInterface->pOsContext); |
| } |
| |
| if (UseRenderCommandBuffer() || m_numPipe == 1) |
| { |
| // legacy mode |
| if (!UseRenderCommandBuffer()) // Set VE Hints for video contexts only |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer)); |
| } |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, nullRendering)); |
| return eStatus; |
| } |
| |
| bool cmdBufferReadyForSubmit = IsLastPipe(); |
| |
| // In STF, Hold the command buffer submission till last pass |
| if (m_singleTaskPhaseSupported) |
| { |
| cmdBufferReadyForSubmit = cmdBufferReadyForSubmit && IsLastPass(); |
| } |
| |
| if(!cmdBufferReadyForSubmit) |
| { |
| return eStatus; |
| } |
| |
| int currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| if (m_osInterface->phasedSubmission) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, nullRendering)); |
| } |
| else |
| { |
| uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass; |
| |
| for (uint32_t i = 0; i < m_numPipe; i++) |
| { |
| PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex]; |
| |
| if(cmdBuffer->pCmdBase) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource); |
| } |
| |
| cmdBuffer->pCmdBase = 0; |
| cmdBuffer->iOffset = cmdBuffer->iRemaining = 0; |
| } |
| m_sizeOfVeBatchBuffer = 0; |
| |
| if(eStatus == MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, nullRendering)); |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SendPrologWithFrameTracking( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| bool frameTrackingRequested, |
| MHW_MI_MMIOREGISTERS *mmioRegister) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| |
| MOS_GPU_CONTEXT gpuContext = m_osInterface->pfnGetGpuContext(m_osInterface); |
| |
| if (UseRenderCommandBuffer()) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister)); |
| return eStatus; |
| } |
| |
| #ifdef _MMC_SUPPORTED |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(m_miInterface, cmdBuffer, gpuContext)); |
| #endif |
| |
| if (!IsLastPipe()) |
| { |
| return eStatus; |
| } |
| |
| PMOS_COMMAND_BUFFER commandBufferInUse; |
| if (m_realCmdBuffer.pCmdBase) |
| { |
| commandBufferInUse = &m_realCmdBuffer; |
| } |
| else |
| if (cmdBuffer && cmdBuffer->pCmdBase) |
| { |
| commandBufferInUse = cmdBuffer; |
| } |
| else |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| // initialize command buffer attributes |
| commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode; |
| commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices; |
| commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices; |
| commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus; |
| commandBufferInUse->Attributes.bValidPowerGatingRequest = true; |
| |
| if (frameTrackingRequested && m_frameTrackingEnabled) |
| { |
| commandBufferInUse->Attributes.bEnableMediaFrameTracking = true; |
| commandBufferInUse->Attributes.resMediaFrameTrackingSurface = |
| m_encodeStatusBuf.resStatusBuffer; |
| commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData; |
| // Set media frame tracking address offset(the offset from the encoder status buffer page) |
| commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0; |
| } |
| |
| MHW_GENERIC_PROLOG_PARAMS genericPrologParams; |
| MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams)); |
| genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface(); |
| genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface(); |
| genericPrologParams.bMmcEnabled = CodecHalMmcState::IsMmcEnabled(); |
| genericPrologParams.dwStoreDataValue = m_storeData - 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetSliceStructs() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| eStatus = CodechalEncodeHevcBase::SetSliceStructs(); |
| CODECHAL_ENCODE_CHK_COND_RETURN((m_lookaheadPass && !m_lowDelay), "RA B frame is not expected in lookahead pass."); |
| m_numPassesInOnePipe = m_numPasses; |
| m_numPasses = (m_numPasses + 1) * m_numPipe - 1; |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::AllocateTileStatistics() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| if (!m_hevcPicParams->tiles_enabled_flag) |
| { |
| return eStatus; |
| } |
| |
| auto num_tile_rows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| auto num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| auto num_tiles = num_tile_rows * num_tile_columns; |
| |
| MOS_ZeroMemory(&m_hevcFrameStatsOffset, sizeof(HEVC_TILE_STATS_INFO)); |
| MOS_ZeroMemory(&m_hevcTileStatsOffset, sizeof(HEVC_TILE_STATS_INFO)); |
| MOS_ZeroMemory(&m_hevcStatsSize, sizeof(HEVC_TILE_STATS_INFO)); |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = true; |
| |
| // Set the maximum size based on frame level statistics. |
| m_hevcStatsSize.uiTileSizeRecord = CODECHAL_CACHELINE_SIZE; |
| m_hevcStatsSize.uiHevcPakStatistics = m_sizeOfHcpPakFrameStats; |
| m_hevcStatsSize.uiVdencStatistics = CODECHAL_HEVC_VDENC_STATS_SIZE; |
| m_hevcStatsSize.uiHevcSliceStreamout = CODECHAL_CACHELINE_SIZE; |
| |
| // Maintain the offsets to use for patching addresses in to the HuC Pak Integration kernel Aggregated Frame Statistics Output Buffer |
| // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions |
| m_hevcFrameStatsOffset.uiTileSizeRecord = 0; // Tile Size Record is not present in resHuCPakAggregatedFrameStatsBuffer |
| m_hevcFrameStatsOffset.uiHevcPakStatistics = 0; |
| m_hevcFrameStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcPakStatistics + m_hevcStatsSize.uiHevcPakStatistics, CODECHAL_PAGE_SIZE); |
| m_hevcFrameStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiVdencStatistics + m_hevcStatsSize.uiVdencStatistics, CODECHAL_PAGE_SIZE); |
| |
| // Frame level statistics |
| m_hwInterface->m_pakIntAggregatedFrameStatsSize = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcSliceStreamout + (m_hevcStatsSize.uiHevcSliceStreamout * m_numLcu), CODECHAL_PAGE_SIZE); |
| |
| // HEVC Frame Statistics Buffer - Output from HuC PAK Integration kernel |
| if (Mos_ResourceIsNull(&m_resHuCPakAggregatedFrameStatsBuffer.sResource)) |
| { |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntAggregatedFrameStatsSize; |
| allocParamsForBufferLinear.pBufName = "GEN12 HCP Aggregated Frame Statistics Streamout Buffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resHuCPakAggregatedFrameStatsBuffer.sResource)); |
| m_resHuCPakAggregatedFrameStatsBuffer.dwSize = m_hwInterface->m_pakIntAggregatedFrameStatsSize; |
| |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resHuCPakAggregatedFrameStatsBuffer.sResource, |
| &lockFlagsWriteOnly); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource); |
| } |
| |
| // Maintain the offsets to use for patching addresses in to the Tile Based Statistics Buffer |
| // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions |
| m_hevcTileStatsOffset.uiTileSizeRecord = 0; // TileReord is in a separated resource |
| m_hevcTileStatsOffset.uiHevcPakStatistics = 0; // PakStaticstics is head of m_resTileBasedStatisticsBuffer; |
| m_hevcTileStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcPakStatistics + (m_hevcStatsSize.uiHevcPakStatistics * num_tiles), CODECHAL_PAGE_SIZE); |
| m_hevcTileStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiVdencStatistics + (m_hevcStatsSize.uiVdencStatistics * num_tiles), CODECHAL_PAGE_SIZE); |
| // Combined statistics size for all tiles |
| m_hwInterface->m_pakIntTileStatsSize = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcSliceStreamout + m_hevcStatsSize.uiHevcSliceStreamout * m_numLcu, CODECHAL_PAGE_SIZE); |
| |
| // Tile size record size for all tiles |
| m_hwInterface->m_tileRecordSize = m_hevcStatsSize.uiTileSizeRecord * num_tiles; |
| |
| if (Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource) || m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_pakIntTileStatsSize) |
| { |
| if (!Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource)) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource); |
| } |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntTileStatsSize; |
| allocParamsForBufferLinear.pBufName = "GEN12 HCP Tile Level Statistics Streamout Buffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource)); |
| m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_pakIntTileStatsSize; |
| |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource, |
| &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource); |
| } |
| |
| // Allocate the updated tile size buffer for PAK integration kernel |
| if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource) || m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_tileRecordSize) |
| { |
| if (!Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource)) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource); |
| } |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = m_hwInterface->m_tileRecordSize; |
| allocParamsForBufferLinear.pBufName = "Tile Record buffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource), |
| "Failed to create GEN12 Tile Record buffer"); |
| |
| m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize = allocParamsForBufferLinear.dwBytes; |
| } |
| |
| // Only needed when tile & BRC is enabled, but the size is not changing at frame level |
| // Move to more properiate place later |
| if (Mos_ResourceIsNull(&m_resBrcDataBuffer)) |
| { |
| uint8_t* data; |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| |
| // Pak stitch DMEM |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE); |
| allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer"; |
| auto numOfPasses = CODECHAL_VDENC_BRC_NUM_OF_PASSES; |
| |
| for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++) |
| { |
| for (auto i = 0; i < numOfPasses; i++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( |
| m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resHucPakStitchDmemBuffer[k][i]), |
| "Failed to allocate PAK Stitch Dmem Buffer."); |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = 1; |
| |
| data = (uint8_t*)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resHucPakStitchDmemBuffer[k][i], |
| &lockFlagsWriteOnly); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory( |
| data, |
| allocParamsForBufferLinear.dwBytes); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucPakStitchDmemBuffer[k][i]); |
| } |
| } |
| |
| // BRC Data Buffer |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_numTiles * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); |
| allocParamsForBufferLinear.pBufName = "BRC Data Buffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( |
| m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resBrcDataBuffer), |
| "Failed to allocate BRC Data Buffer Buffer."); |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| |
| data = (uint8_t*)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resBrcDataBuffer, |
| &lockFlags); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory( |
| data, |
| allocParamsForBufferLinear.dwBytes); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resBrcDataBuffer); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| // encodeStatus is offset by 2 DWs in the resource |
| uint32_t sseOffsetinBytes = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2 + m_encodeStatusBuf.dwSumSquareErrorOffset; |
| for (auto i = 0; i < 6; i++) // 64 bit SSE values for luma/ chroma channels need to be copied |
| { |
| MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams; |
| MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams)); |
| miCpyMemMemParams.presSrc = m_hevcPicParams->tiles_enabled_flag && (m_numPipe > 1) ? &m_resHuCPakAggregatedFrameStatsBuffer.sResource : &m_resFrameStatStreamOutBuffer; |
| miCpyMemMemParams.dwSrcOffset = (HEVC_PAK_STATISTICS_SSE_OFFSET + i) * sizeof(uint32_t); // SSE luma offset is located at DW32 in Frame statistics, followed by chroma |
| miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer; |
| miCpyMemMemParams.dwDstOffset = sseOffsetinBytes + i * sizeof(uint32_t); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams)); |
| } |
| return eStatus; |
| } |
| |
| void CodechalVdencHevcStateG12::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams) |
| { |
| PCODECHAL_ENCODE_BUFFER tileRecordBuffer = &m_tileRecordBuffer[m_virtualEngineBbIndex]; |
| bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource); |
| |
| MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams)); |
| indObjBaseAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC; |
| indObjBaseAddrParams.presMvObjectBuffer = &m_resMbCodeSurface; |
| indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset; |
| indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset; |
| indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer; |
| indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound; |
| indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer ? &tileRecordBuffer->sResource : nullptr; |
| indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer ? m_hwInterface->m_tileRecordSize : 0; |
| indObjBaseAddrParams.dwPakTileSizeRecordOffset = useTileRecordBuffer ? m_hevcTileStatsOffset.uiTileSizeRecord : 0; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::HuCLookaheadInit() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_firstTaskInPhase = !m_singleTaskPhaseSupported; |
| m_lastTaskInPhase = !m_singleTaskPhaseSupported; |
| |
| // set DMEM |
| uint32_t avgFrameSize = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS * m_hevcSeqParams->FrameRate.Denominator / m_hevcSeqParams->FrameRate.Numerator; |
| uint32_t initVbvFullness = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit); |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = true; |
| |
| auto dmem = (PCodechalVdencHevcLaDmem)m_osInterface->pfnLockResource( |
| m_osInterface, &m_vdencLaInitDmemBuffer, &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(dmem); |
| MOS_ZeroMemory(dmem, sizeof(dmem)); |
| |
| dmem->lookAheadFunc = 0; |
| dmem->lengthAhead = m_lookaheadDepth; |
| dmem->vbvBufferSize = m_hevcSeqParams->VBVBufferSizeInBit / avgFrameSize; |
| dmem->vbvInitialFullness = initVbvFullness / avgFrameSize; |
| dmem->statsRecords = m_numLaDataEntry; |
| dmem->averageFrameSize = avgFrameSize >> 3; |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaInitDmemBuffer); |
| |
| // set HuC regions |
| MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams; |
| MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams)); |
| virtualAddrParams.regionParams[0].presRegion = &m_vdencLaHistoryBuffer; |
| virtualAddrParams.regionParams[0].isWritable = true; |
| |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| if (!m_singleTaskPhaseSupported || m_firstTaskInPhase) |
| { |
| // Send command buffer header at the beginning (OS dependent) |
| bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking)); |
| } |
| |
| // load kernel from WOPCM into L2 storage RAM |
| MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams; |
| MOS_ZeroMemory(&imemParams, sizeof(imemParams)); |
| imemParams.dwKernelDescriptor = m_vdboxHucHevcLaAnalysisKernelDescriptor; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams)); |
| |
| // pipe mode select |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams; |
| pipeModeSelectParams.Mode = m_mode; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams)); |
| |
| // set HuC DMEM param |
| MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams; |
| MOS_ZeroMemory(&dmemParams, sizeof(dmemParams)); |
| dmemParams.presHucDataSource = &m_vdencLaInitDmemBuffer; |
| dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencLaInitDmemBufferSize, CODECHAL_CACHELINE_SIZE); |
| dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true)); |
| |
| // wait Huc completion (use HEVC bit for now) |
| MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams; |
| MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams)); |
| vdPipeFlushParams.Flags.bFlushHEVC = 1; |
| vdPipeFlushParams.Flags.bWaitDoneHEVC = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams)); |
| |
| // Flush the engine to ensure memory written out |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| bool renderingFlags = m_videoContextUsesNullHw; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags)); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::HuCLookaheadUpdate() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_firstTaskInPhase = !m_singleTaskPhaseSupported; |
| m_lastTaskInPhase = true; |
| |
| // set DMEM |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = true; |
| |
| auto dmem = (PCodechalVdencHevcLaDmem)m_osInterface->pfnLockResource( |
| m_osInterface, &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(dmem); |
| MOS_ZeroMemory(dmem, sizeof(dmem)); |
| |
| dmem->lookAheadFunc = 1; |
| dmem->validStatsRecords = m_numValidLaRecords; |
| dmem->offset = (m_numLaDataEntry + m_currLaDataIdx + 1 - m_numValidLaRecords) % m_numLaDataEntry; |
| dmem->cqmQpThreshold = m_cqmQpThreshold; |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx]); |
| |
| // set HuC regions |
| MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams; |
| MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams)); |
| virtualAddrParams.regionParams[0].presRegion = &m_vdencLaHistoryBuffer; |
| virtualAddrParams.regionParams[0].isWritable = true; |
| virtualAddrParams.regionParams[1].presRegion = &m_vdencLaStatsBuffer; |
| virtualAddrParams.regionParams[2].presRegion = m_encodeParams.psLaDataBuffer; |
| virtualAddrParams.regionParams[2].isWritable = true; |
| if (m_osInterface->pfnSkipResourceSyncDynamic) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSkipResourceSyncDynamic(m_encodeParams.psLaDataBuffer)); |
| } |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| if (!m_singleTaskPhaseSupported || m_firstTaskInPhase) |
| { |
| // Send command buffer header at the beginning (OS dependent) |
| bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES)); |
| |
| // load kernel from WOPCM into L2 storage RAM |
| MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams; |
| MOS_ZeroMemory(&imemParams, sizeof(imemParams)); |
| imemParams.dwKernelDescriptor = m_vdboxHucHevcLaAnalysisKernelDescriptor; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams)); |
| |
| // pipe mode select |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams; |
| pipeModeSelectParams.Mode = m_mode; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams)); |
| |
| // set HuC DMEM param |
| MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams; |
| MOS_ZeroMemory(&dmemParams, sizeof(dmemParams)); |
| dmemParams.presHucDataSource = &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx]; |
| dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencLaUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE); |
| dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true)); |
| |
| // wait Huc completion (use HEVC bit for now) |
| MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams; |
| MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams)); |
| vdPipeFlushParams.Flags.bFlushHEVC = 1; |
| vdPipeFlushParams.Flags.bWaitDoneHEVC = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams)); |
| |
| // Flush the engine to ensure memory written out |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams)); |
| |
| // Write lookahead status to encode status buffer |
| MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams; |
| EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf; |
| uint32_t baseOffset = |
| (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource |
| MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS)); |
| miCpyMemMemParams.presSrc = m_encodeParams.psLaDataBuffer; |
| miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalEncodeLaData) + CODECHAL_OFFSETOF(CodechalEncodeLaData, report); |
| miCpyMemMemParams.presDst = &encodeStatusBuf.resStatusBuffer; |
| miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams)); |
| |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| bool renderingFlags = m_videoContextUsesNullHw; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags)); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::AnalyzeLookaheadStats() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_numValidLaRecords++; |
| |
| if (m_lookaheadUpdate) |
| { |
| CODECHAL_ENCODE_CHK_COND_RETURN(!m_encodeParams.bLaDataEnabled, "Lookahead Data Buffer is missing."); |
| |
| if (m_lookaheadInit) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadInit()); |
| m_lookaheadInit = false; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadUpdate()); |
| m_numValidLaRecords--; |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx], |
| CodechalDbgAttr::attrVdencOutput, |
| "_LookaheadDmem", |
| sizeof(CodechalVdencHevcLaDmem), |
| 0, |
| CODECHAL_NUM_MEDIA_STATES))); |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| m_encodeParams.psLaDataBuffer, |
| CodechalDbgAttr::attrVdencOutput, |
| "_LookaheadData", |
| m_numLaDataEntry * sizeof(CodechalEncodeLaData), |
| 0, |
| CODECHAL_NUM_MEDIA_STATES))); |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_vdencLaHistoryBuffer, |
| CodechalDbgAttr::attrVdencOutput, |
| "_LookaheadHistory", |
| m_LaHistoryBufSize, |
| 0, |
| CODECHAL_NUM_MEDIA_STATES))); |
| |
| if (m_hevcPicParams->bLastPicInStream) |
| { |
| // Flush the last frames |
| while (m_numValidLaRecords > 0) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadUpdate()); |
| m_numValidLaRecords--; |
| } |
| |
| if (!m_encodeParams.bLaDataEnabled) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, m_encodeParams.psLaDataBuffer); |
| } |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::HuCBrcInitReset() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && (m_numPipe == 1)) |
| { |
| // Send command buffer header at the beginning (OS dependent) |
| bool requestFrameTracking = m_singleTaskPhaseSupported ? |
| m_firstTaskInPhase : 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking)); |
| } |
| |
| // load kernel from WOPCM into L2 storage RAM |
| MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams; |
| MOS_ZeroMemory(&imemParams, sizeof(imemParams)); |
| imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcInitKernelDescriptor; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams)); |
| |
| // pipe mode select |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams; |
| pipeModeSelectParams.Mode = m_mode; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset()); |
| |
| // set HuC DMEM param |
| MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams; |
| MOS_ZeroMemory(&dmemParams, sizeof(dmemParams)); |
| dmemParams.presHucDataSource = &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx]; |
| dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcInitDmemBufferSize, CODECHAL_CACHELINE_SIZE); |
| dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams)); |
| |
| MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams; |
| MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams)); |
| virtualAddrParams.regionParams[0].presRegion = &m_vdencBrcHistoryBuffer; |
| virtualAddrParams.regionParams[0].isWritable = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams)); |
| |
| // Store HUC_STATUS2 register bit 6 before HUC_Start command |
| // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload |
| // (HUC_Start command with last start bit set). |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer)); |
| ) |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true)); |
| |
| // wait Huc completion (use HEVC bit for now) |
| MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams; |
| MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams)); |
| vdPipeFlushParams.Flags.bFlushHEVC = 1; |
| vdPipeFlushParams.Flags.bWaitDoneHEVC = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams)); |
| |
| // Flush the engine to ensure memory written out |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams)); |
| |
| if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && (m_numPipe == 1)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| |
| if (!m_singleTaskPhaseSupported) |
| { |
| bool renderingFlags = m_videoContextUsesNullHw; |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( |
| &cmdBuffer, |
| CODECHAL_MEDIA_STATE_BRC_INIT_RESET, |
| nullptr))); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags)); |
| } |
| |
| CODECHAL_DEBUG_TOOL(DumpHucBrcInit()); |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::HuCBrcUpdate() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| *m_pipeBufAddrParams = {}; |
| if (m_pictureCodingType != I_TYPE) |
| { |
| for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++) |
| { |
| if (!m_picIdx[i].bValid || !m_currUsedRefPic[i]) |
| { |
| continue; |
| } |
| |
| uint8_t idx = m_picIdx[i].ucPicIdx; |
| CodecHalGetResourceInfo(m_osInterface, &(m_refList[idx]->sRefReconBuffer)); |
| |
| uint8_t frameStoreId = (uint8_t)m_refIdxMapping[i]; |
| m_pipeBufAddrParams->presReferences[frameStoreId] = &(m_refList[idx]->sRefReconBuffer.OsResource); |
| } |
| } |
| |
| #ifdef _ENCODE_VDENC_RESERVED |
| if (m_rsvdState) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_rsvdState->HuCBrcUpdate(m_pipeBufAddrParams, m_slotForRecNotFiltered)); |
| } |
| #endif |
| |
| if (((!m_singleTaskPhaseSupported) || ((m_firstTaskInPhase) && (!m_brcInit))) && (m_numPipe == 1)) |
| { |
| // Send command buffer header at the beginning (OS dependent) |
| bool requestFrameTracking = m_singleTaskPhaseSupported ? |
| m_firstTaskInPhase : 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking)); |
| } |
| |
| int32_t currentPass = GetCurrentPass(); |
| if (currentPass < 0) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructBatchBufferHuCBRC(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass])); |
| |
| // load kernel from WOPCM into L2 storage RAM |
| MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams; |
| MOS_ZeroMemory(&imemParams, sizeof(imemParams)); |
| |
| if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) // Low Delay BRC |
| { |
| imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcLowdelayKernelDescriptor; |
| } |
| else |
| { |
| imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcUpdateKernelDescriptor; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams)); |
| |
| // pipe mode select |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams; |
| pipeModeSelectParams.Mode = m_mode; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams)); |
| |
| // DMEM set |
| m_CurrentPassForOverAll = 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate()); |
| |
| MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams; |
| MOS_ZeroMemory(&dmemParams, sizeof(dmemParams)); |
| dmemParams.presHucDataSource = &(m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]); |
| dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE); |
| dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams)); |
| |
| // Set Const Data buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate()); |
| |
| // Add Virtual addr |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCBrcUpdate(&m_virtualAddrParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &m_virtualAddrParams)); |
| |
| // Store HUC_STATUS2 register bit 6 before HUC_Start command |
| // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload |
| // (HUC_Start command with last start bit set). |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer)); |
| ) |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true)); |
| |
| // wait Huc completion (use HEVC bit for now) |
| MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams; |
| MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams)); |
| vdPipeFlushParams.Flags.bFlushHEVC = 1; |
| vdPipeFlushParams.Flags.bWaitDoneHEVC = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams)); |
| |
| // Flush the engine to ensure memory written out |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams)); |
| |
| // Write HUC_STATUS mask: DW1 (mask value) |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_resPakMmioBuffer; |
| storeDataParams.dwResourceOffset = sizeof(uint32_t); |
| storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams)); |
| |
| // store HUC_STATUS register: DW0 (actual value) |
| CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum"); |
| auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex); |
| MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams; |
| MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams)); |
| storeRegParams.presStoreBuffer = &m_resPakMmioBuffer; |
| storeRegParams.dwOffset = 0; |
| storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams)); |
| |
| // DW0 & DW1 will considered together for conditional batch buffer end cmd later |
| if ((!m_singleTaskPhaseSupported) && (m_osInterface->bNoParsingAssistanceInKmd) && (m_numPipe == 1)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); |
| } |
| |
| // HuC Input |
| CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| |
| if (!m_singleTaskPhaseSupported) |
| { |
| bool renderingFlags = m_videoContextUsesNullHw; |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( |
| &cmdBuffer, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE, |
| nullptr))); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags)); |
| } |
| |
| // HuC Output |
| CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::HuCBrcTileRowUpdate(PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = true; |
| |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow].OsResource), &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_COMMAND_BUFFER tileRowBRCBatchBuf; |
| MOS_ZeroMemory(&tileRowBRCBatchBuf, sizeof(tileRowBRCBatchBuf)); |
| tileRowBRCBatchBuf.pCmdBase = tileRowBRCBatchBuf.pCmdPtr = (uint32_t *)data; |
| tileRowBRCBatchBuf.iRemaining = m_hwInterface->m_hucCommandBufferSize; |
| |
| // Add batch buffer start for tile row BRC batch |
| HalOcaInterface::OnSubLevelBBStart(*cmdBuffer, *m_osInterface->pOsContext, &m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow].OsResource, 0, true, 0); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(cmdBuffer, &m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow])); |
| |
| // load kernel from WOPCM into L2 storage RAM |
| MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams; |
| MOS_ZeroMemory(&imemParams, sizeof(imemParams)); |
| |
| if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) // Low Delay BRC |
| { |
| imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcLowdelayKernelDescriptor; |
| } |
| else |
| { |
| imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcUpdateKernelDescriptor; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&tileRowBRCBatchBuf, &imemParams)); |
| |
| // pipe mode select |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams; |
| pipeModeSelectParams.Mode = m_mode; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&tileRowBRCBatchBuf, &pipeModeSelectParams)); |
| |
| // DMEM set |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate()); |
| |
| MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams; |
| MOS_ZeroMemory(&dmemParams, sizeof(dmemParams)); |
| dmemParams.presHucDataSource = &(m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][m_CurrentPassForOverAll]); |
| dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE); |
| dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&tileRowBRCBatchBuf, &dmemParams)); |
| |
| // Set Const Data buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate()); |
| |
| // Add Virtual addr |
| MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCTileRowBrcUpdate(&virtualAddrParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&tileRowBRCBatchBuf, &virtualAddrParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&tileRowBRCBatchBuf, true)); |
| |
| // wait Huc completion (use HEVC bit for now) |
| MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams; |
| MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams)); |
| vdPipeFlushParams.Flags.bFlushHEVC = 1; |
| vdPipeFlushParams.Flags.bWaitDoneHEVC = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&tileRowBRCBatchBuf, &vdPipeFlushParams)); |
| |
| // Flush the engine to ensure memory written out |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&tileRowBRCBatchBuf, &flushDwParams)); |
| |
| // Write HUC_STATUS mask: DW1 (mask value) |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_resPakMmioBuffer; |
| storeDataParams.dwResourceOffset = sizeof(uint32_t); |
| storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&tileRowBRCBatchBuf, &storeDataParams)); |
| |
| // store HUC_STATUS register: DW0 (actual value) |
| CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum"); |
| auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex); |
| MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams; |
| MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams)); |
| storeRegParams.presStoreBuffer = &m_resPakMmioBuffer; |
| storeRegParams.dwOffset = 0; |
| storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&tileRowBRCBatchBuf, &storeRegParams)); |
| |
| |
| // Set the tile row BRC update sync semaphore |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_resTileRowBRCsyncSemaphore; |
| storeDataParams.dwValue = 0xFF; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&tileRowBRCBatchBuf, &storeDataParams)); |
| |
| (&m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow])->iCurrent = tileRowBRCBatchBuf.iOffset; |
| (&m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow])->iRemaining = tileRowBRCBatchBuf.iRemaining; |
| (&m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow])->pData = data; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow])); |
| |
| if (data) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &(m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow].OsResource)); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::UpdateCmdBufAttribute( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| bool renderEngineInUse) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| // should not be there. Will remove it in the next change |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe) |
| { |
| PMOS_CMD_BUF_ATTRI_VE attriExt = |
| (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe); |
| |
| memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE)); |
| attriExt->bUseVirtualEngineHint = |
| attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::AddMediaVfeCmd( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| SendKernelCmdsParams *params) |
| { |
| CODECHAL_ENCODE_CHK_NULL_RETURN(params); |
| |
| MHW_VFE_PARAMS_G12 vfeParams = {}; |
| vfeParams.pKernelState = params->pKernelState; |
| vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL; |
| vfeParams.dwMaximumNumberofThreads = m_encodeVfeMaxThreads; |
| vfeParams.bFusedEuDispatch = false; // legacy mode |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams)); |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| void CodechalVdencHevcStateG12::SetStreaminDataPerLcu( |
| PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams, |
| void* streaminData) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| PCODECHAL_VDENC_HEVC_STREAMIN_STATE_G12 data = (PCODECHAL_VDENC_HEVC_STREAMIN_STATE_G12)streaminData; |
| if (streaminParams->setQpRoiCtrl) |
| { |
| if (m_vdencNativeROIEnabled) |
| { |
| data->DW0.RoiCtrl = streaminParams->roiCtrl; |
| } |
| else |
| { |
| data->DW7.QpEnable = 0xf; |
| data->DW14.ForceQp_0 = data->DW14.ForceQp_1 = data->DW14.ForceQp_2 = data->DW14.ForceQp_3 = streaminParams->forceQp; |
| } |
| } |
| else |
| { |
| data->DW0.MaxTuSize = streaminParams->maxTuSize; |
| data->DW0.MaxCuSize = streaminParams->maxCuSize; |
| data->DW0.NumImePredictors = streaminParams->numImePredictors; |
| data->DW0.PuTypeCtrl = streaminParams->puTypeCtrl; |
| data->DW6.NumMergeCandidateCu64x64 = streaminParams->numMergeCandidateCu64x64; |
| data->DW6.NumMergeCandidateCu32x32 = streaminParams->numMergeCandidateCu32x32; |
| data->DW6.NumMergeCandidateCu16x16 = streaminParams->numMergeCandidateCu16x16; |
| data->DW6.NumMergeCandidateCu8x8 = streaminParams->numMergeCandidateCu8x8; |
| } |
| } |
| |
| void CodechalVdencHevcStateG12::GetTileInfo( |
| uint32_t xPosition, |
| uint32_t yPosition, |
| uint32_t* tileId, |
| uint32_t* tileEndLCUX, |
| uint32_t* tileEndLCUY) |
| { |
| *tileId = 0; |
| uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3); |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex]; |
| |
| for (uint8_t i = 0; i < m_numTiles; i++) |
| { |
| uint32_t tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[i].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize); |
| uint32_t tileHeightInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[i].TileHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize); |
| *tileEndLCUX = tileParams[i].TileStartLCUX + tileWidthInLCU; |
| *tileEndLCUY = tileParams[i].TileStartLCUY + tileHeightInLCU; |
| |
| if (xPosition >= (tileParams[i].TileStartLCUX * 2) && |
| yPosition >= (tileParams[i].TileStartLCUY * 2) && |
| xPosition < (*tileEndLCUX * 2) && |
| yPosition < (*tileEndLCUY * 2)) |
| { |
| *tileId = i; |
| break; |
| } |
| } |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::PrepareVDEncStreamInData() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (m_hevcPicParams->tiles_enabled_flag) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams[m_virtualEngineBbIndex])); |
| } |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::PrepareVDEncStreamInData()); |
| |
| return eStatus; |
| } |
| |
| void CodechalVdencHevcStateG12::SetStreaminDataPerRegion( |
| uint32_t streamInWidth, |
| uint32_t top, |
| uint32_t bottom, |
| uint32_t left, |
| uint32_t right, |
| PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams, |
| void* streaminData) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (!m_hevcPicParams->tiles_enabled_flag) |
| { |
| CodechalVdencHevcState::SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, streaminParams, streaminData); |
| return; |
| } |
| |
| uint8_t* data = (uint8_t*)streaminData; |
| uint32_t tileId = 0, tileEndLCUX = 0, tileEndLCUY = 0; |
| uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3); |
| GetTileInfo(left, top, &tileId, &tileEndLCUX, &tileEndLCUY); |
| |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex]; |
| |
| for (auto y = top; y < bottom; y++) |
| { |
| for (auto x = left; x < right; x++) |
| { |
| uint32_t streamInBaseOffset = 0, offset = 0, xyOffset = 0; |
| |
| if (x < (tileParams[tileId].TileStartLCUX * 2) || |
| y < (tileParams[tileId].TileStartLCUY * 2) || |
| x >= (tileEndLCUX * 2) || |
| y >= (tileEndLCUY * 2)) |
| { |
| GetTileInfo(x, y, &tileId, &tileEndLCUX, &tileEndLCUY); |
| } |
| streamInBaseOffset = tileParams[tileId].TileStreaminOffset; |
| |
| auto xPositionInTile = x - (tileParams[tileId].TileStartLCUX * 2); |
| auto yPositionInTile = y - (tileParams[tileId].TileStartLCUY * 2); |
| auto tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[tileId].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize); |
| |
| StreaminZigZagToLinearMap(tileWidthInLCU * 2, xPositionInTile, yPositionInTile, &offset, &xyOffset); |
| |
| SetStreaminDataPerLcu(streaminParams, data + (streamInBaseOffset + offset + xyOffset) * 64); |
| } |
| } |
| } |
| |
| void CodechalVdencHevcStateG12::SetBrcRoiDeltaQpMap( |
| uint32_t streamInWidth, |
| uint32_t top, |
| uint32_t bottom, |
| uint32_t left, |
| uint32_t right, |
| uint8_t regionId, |
| PDeltaQpForROI deltaQpMap) |
| { |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (!m_hevcPicParams->tiles_enabled_flag) |
| { |
| CodechalVdencHevcState::SetBrcRoiDeltaQpMap(streamInWidth, top, bottom, left, right, regionId, deltaQpMap); |
| return; |
| } |
| |
| uint32_t tileId = 0, tileEndLCUX = 0, tileEndLCUY = 0; |
| uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3); |
| GetTileInfo(left, top, &tileId, &tileEndLCUX, &tileEndLCUY); |
| |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex]; |
| |
| for (auto y = top; y < bottom; y++) |
| { |
| for (auto x = left; x < right; x++) |
| { |
| uint32_t streamInBaseOffset = 0, offset = 0, xyOffset = 0; |
| |
| if (x < (tileParams[tileId].TileStartLCUX * 2) || |
| y < (tileParams[tileId].TileStartLCUY * 2) || |
| x >= (tileEndLCUX * 2) || |
| y >= (tileEndLCUY * 2)) |
| { |
| GetTileInfo(x, y, &tileId, &tileEndLCUX, &tileEndLCUY); |
| } |
| streamInBaseOffset = tileParams[tileId].TileStreaminOffset; |
| |
| auto xPositionInTile = x - (tileParams[tileId].TileStartLCUX * 2); |
| auto yPositionInTile = y - (tileParams[tileId].TileStartLCUY * 2); |
| auto tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[tileId].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize); |
| |
| StreaminZigZagToLinearMap(tileWidthInLCU * 2, xPositionInTile, yPositionInTile, &offset, &xyOffset); |
| |
| (deltaQpMap + (streamInBaseOffset + offset + xyOffset))->iDeltaQp = m_hevcPicParams->ROI[regionId].PriorityLevelOrDQp; |
| } |
| } |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetAndPopulateVEHintParams( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (!MOS_VE_SUPPORTED(m_osInterface)) |
| { |
| return eStatus; |
| } |
| |
| CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms; |
| MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS)); |
| |
| if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface)) |
| { |
| scalSetParms.bNeedSyncWithPrevious = true; |
| } |
| |
| int32_t currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass; |
| if (m_numPipe >= 2) |
| { |
| for (auto i = 0; i < m_numPipe; i++) |
| { |
| scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex].OsResource; |
| } |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms)); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer)); |
| |
| return eStatus; |
| } |
| |
| #if USE_CODECHAL_DEBUG_TOOL |
| MOS_STATUS CodechalVdencHevcStateG12::DumpVdencOutputs() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::DumpVdencOutputs()); |
| |
| if (m_hevcPicParams->tiles_enabled_flag) |
| { |
| PMOS_RESOURCE presVdencTileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; |
| auto num_tiles = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1); |
| auto vdencStatsSizeAllTiles = num_tiles * m_vdencBrcStatsBufferSize; |
| auto vdencStatsOffset = m_hevcTileStatsOffset.uiVdencStatistics; |
| auto pakStatsSizeAllTiles = num_tiles * 9 * CODECHAL_CACHELINE_SIZE; |
| auto pakStatsOffset = m_hevcTileStatsOffset.uiHevcPakStatistics; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| presVdencTileStatisticsBuffer, |
| CodechalDbgAttr::attrVdencOutput, |
| "_TileVDEncStats", |
| vdencStatsSizeAllTiles, |
| vdencStatsOffset, |
| CODECHAL_NUM_MEDIA_STATES)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| presVdencTileStatisticsBuffer, |
| CodechalDbgAttr::attrPakOutput, |
| "_TilePAKStats", |
| pakStatsSizeAllTiles, |
| pakStatsOffset, |
| CODECHAL_NUM_MEDIA_STATES)); |
| |
| // Slice Size Conformance |
| if (m_hevcSeqParams->SliceSizeControl) |
| { |
| PMOS_RESOURCE presLcuBaseAddressBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; |
| auto sliceStreamoutOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout; |
| uint32_t size = m_numLcu * CODECHAL_CACHELINE_SIZE; |
| // Slice Size StreamOut Surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| presLcuBaseAddressBuffer, |
| CodechalDbgAttr::attrVdencOutput, |
| "_SliceSize", |
| size, |
| sliceStreamoutOffset, |
| CODECHAL_NUM_MEDIA_STATES)); |
| } |
| } |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::DumpHucDebugOutputBuffers() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| // Virtual Engine does only one submit per pass. Dump all HuC debug outputs |
| bool dumpDebugBuffers = IsLastPipe() && (m_numPipe > 1); |
| if (m_singleTaskPhaseSupported) |
| { |
| dumpDebugBuffers = dumpDebugBuffers && IsLastPass(); |
| } |
| |
| if (dumpDebugBuffers) |
| { |
| CODECHAL_DEBUG_TOOL( |
| DumpHucPakIntegrate(); |
| DumpHucCqp(); |
| ) |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::DumpHucPakIntegrate() |
| { |
| int32_t currentPass = GetCurrentPass(); |
| // HuC Input |
| // HuC DMEM |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem( |
| &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass], |
| MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE), |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion( |
| &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource, |
| 0, |
| m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize, |
| 0, |
| "", |
| true, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion( |
| &m_resHuCPakAggregatedFrameStatsBuffer.sResource, |
| 0, |
| m_resHuCPakAggregatedFrameStatsBuffer.dwSize, |
| 1, |
| "", |
| false, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex]; |
| CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams); |
| |
| auto bitStreamSize = m_encodeParams.dwBitstreamSize - |
| MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); |
| |
| CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion( |
| &m_resBitstreamBuffer, |
| MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE), |
| bitStreamSize, |
| 4, |
| "", |
| true, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion( |
| &m_resBitstreamBuffer, |
| MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE), |
| bitStreamSize, |
| 5, |
| "", |
| false, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| // Region 6 - BRC History buffer |
| CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion( |
| &m_vdencBrcHistoryBuffer, |
| 0, |
| CODECHAL_VDENC_HEVC_BRC_HISTORY_BUF_SIZE, |
| 6, |
| "", |
| false, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion( |
| &m_thirdLevelBatchBuffer.OsResource, |
| 0, |
| m_thirdLBSize, |
| 7, |
| "", |
| true, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| // Region 8 |
| CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion( |
| &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], |
| 0, |
| MOS_ALIGN_CEIL(sizeof(HucCommandDataVdencG12), CODECHAL_PAGE_SIZE), |
| 8, |
| "", |
| true, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| // Region 9 - HCP BRC Data Output |
| CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion( |
| &m_resBrcDataBuffer, |
| 0, |
| CODECHAL_CACHELINE_SIZE, |
| 9, |
| "", |
| false, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| // Region 10 |
| CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion( |
| &m_HucStitchCmdBatchBuffer.OsResource, |
| 0, |
| m_hwInterface->m_HucStitchCmdBatchBufferSize, |
| 10, |
| "", |
| false, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion( |
| &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource, |
| 0, |
| m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize, |
| 15, |
| "", |
| true, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::DumpHucCqp() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| int32_t currentPass = GetCurrentPass(); |
| |
| // Region 5 - Output SLB Buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion( |
| &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource, |
| 0, |
| m_hwInterface->m_vdenc2ndLevelBatchBufferSize, |
| 5, |
| "_Out_Slb", |
| false, |
| currentPass, |
| hucRegionDumpUpdate)); |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| #endif |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetRoundingValues() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (m_hevcPicParams->CustomRoundingOffsetsParams.fields.EnableCustomRoudingIntra) |
| { |
| m_roundIntraValue = m_hevcPicParams->CustomRoundingOffsetsParams.fields.RoundingOffsetIntra; |
| } |
| else |
| { |
| if (m_hevcPicParams->CodingType == I_TYPE) |
| { |
| m_roundIntraValue = 10; |
| } |
| else if (m_hevcSeqParams->HierarchicalFlag && m_hevcPicParams->HierarchLevelPlus1 > 0) |
| { |
| //Hierachical GOP |
| if (m_hevcPicParams->HierarchLevelPlus1 == 1) |
| { |
| m_roundIntraValue = 10; |
| } |
| else if (m_hevcPicParams->HierarchLevelPlus1 == 2) |
| { |
| m_roundIntraValue = 9; |
| } |
| else |
| { |
| m_roundIntraValue = 8; |
| } |
| } |
| else |
| { |
| m_roundIntraValue = 10; |
| } |
| } |
| |
| if (m_hevcPicParams->CustomRoundingOffsetsParams.fields.EnableCustomRoudingInter) |
| { |
| m_roundInterValue = m_hevcPicParams->CustomRoundingOffsetsParams.fields.RoundingOffsetInter; |
| } |
| else |
| { |
| if (m_hevcPicParams->CodingType == I_TYPE) |
| { |
| m_roundInterValue = 4; |
| } |
| else if (m_hevcSeqParams->HierarchicalFlag && m_hevcPicParams->HierarchLevelPlus1 > 0) |
| { |
| //Hierachical GOP |
| if (m_hevcPicParams->HierarchLevelPlus1 == 1) |
| { |
| m_roundInterValue = 4; |
| } |
| else if (m_hevcPicParams->HierarchLevelPlus1 == 2) |
| { |
| m_roundInterValue = 3; |
| } |
| else |
| { |
| m_roundInterValue = 2; |
| } |
| } |
| else |
| { |
| m_roundInterValue = 4; |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalVdencHevcStateG12::SetAddCommands(uint32_t commandType, PMOS_COMMAND_BUFFER cmdBuffer, bool addToBatchBufferHuCBRC, uint32_t roundInterValue, uint32_t roundIntraValue, bool isLowDelayB, int8_t * pRefIdxMapping, int8_t recNotFilteredID) |
| { |
| #ifdef _HEVC_ENCODE_VDENC_SUPPORTED |
| void *pCmdParams = nullptr; |
| |
| if (commandType == CODECHAL_CMD1) |
| { |
| // Send CMD1 command |
| MHW_VDBOX_VDENC_CMD1_PARAMS cmd1Params; |
| MOS_ZeroMemory(&cmd1Params, sizeof(cmd1Params)); |
| cmd1Params.Mode = CODECHAL_ENCODE_MODE_HEVC; |
| cmd1Params.pHevcEncPicParams = m_hevcPicParams; |
| cmd1Params.pHevcEncSlcParams = m_hevcSliceParams; |
| cmd1Params.pInputParams = pCmdParams; |
| cmd1Params.bHevcVisualQualityImprovement = m_hevcVisualQualityImprovement; |
| //down cast? |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd1Cmd(cmdBuffer, nullptr, &cmd1Params)); |
| } |
| else if (commandType == CODECHAL_CMD2) |
| { |
| PMHW_VDBOX_VDENC_CMD2_STATE cmd2Params(new MHW_VDBOX_VDENC_CMD2_STATE); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmd2Params); |
| |
| // set CMD2 command |
| cmd2Params->Mode = CODECHAL_ENCODE_MODE_HEVC; |
| cmd2Params->pHevcEncSeqParams = m_hevcSeqParams; |
| cmd2Params->pHevcEncPicParams = m_hevcPicParams; |
| cmd2Params->pHevcEncSlcParams = m_hevcSliceParams; |
| cmd2Params->bRoundingEnabled = m_hevcVdencRoundingEnabled; |
| cmd2Params->bPakOnlyMultipassEnable = m_pakOnlyPass; |
| cmd2Params->bUseDefaultQpDeltas = (m_hevcVdencAcqpEnabled && cmd2Params->pHevcEncSeqParams->QpAdjustment ) || |
| (m_brcEnabled && cmd2Params->pHevcEncSeqParams->MBBRC != mbBrcDisabled); |
| cmd2Params->bPanicEnabled = (m_brcEnabled) && (m_panicEnable) && (IsLastPass()) && !m_pakOnlyPass; |
| cmd2Params->bStreamInEnabled = m_vdencStreamInEnabled; |
| cmd2Params->bROIStreamInEnabled = m_vdencNativeROIEnabled; |
| cmd2Params->bTileReplayEnable = m_enableTileReplay; |
| cmd2Params->bIsLowDelayB = isLowDelayB; |
| cmd2Params->bCaptureModeEnable = m_CaptureModeEnable; |
| cmd2Params->m_WirelessSessionID = 0; |
| cmd2Params->pRefIdxMapping = pRefIdxMapping; |
| cmd2Params->recNotFilteredID = recNotFilteredID; |
| cmd2Params->pInputParams = pCmdParams; |
| cmd2Params->ucNumRefIdxL0ActiveMinus1 = cmd2Params->pHevcEncSlcParams->num_ref_idx_l0_active_minus1; |
| cmd2Params->bHevcVisualQualityImprovement = m_hevcVisualQualityImprovement; |
| cmd2Params->roundInterValue = roundInterValue; |
| cmd2Params->roundIntraValue = roundIntraValue; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd2Cmd(cmdBuffer, nullptr, cmd2Params)); |
| } |
| #endif |
| return MOS_STATUS_SUCCESS; |
| } |