libavformat/iamf_parse.c - third_party/ffmpeg - Git at Google

 /*
  * Immersive Audio Model and Formats parsing
  * Copyright (c) 2023 James Almer <jamrial@gmail.com>
  *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */

 #include "libavutil/avassert.h"
 #include "libavutil/iamf.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/log.h"
 #include "libavutil/mem.h"
 #include "libavcodec/get_bits.h"
 #include "libavcodec/flac.h"
 #include "libavcodec/leb.h"
 #include "libavcodec/mpeg4audio.h"
 #include "libavcodec/put_bits.h"
 #include "avio_internal.h"
 #include "iamf_parse.h"
 #include "isom.h"

 static int opus_decoder_config(IAMFCodecConfig *codec_config,
                                AVIOContext *pb, int len)
 {
     int ret, left = len - avio_tell(pb);

     if (left < 11 || codec_config->audio_roll_distance >= 0)
         return AVERROR_INVALIDDATA;

     codec_config->extradata = av_malloc(left + 8);
     if (!codec_config->extradata)
         return AVERROR(ENOMEM);

     AV_WB32A(codec_config->extradata,     MKBETAG('O','p','u','s'));
     AV_WB32A(codec_config->extradata + 4, MKBETAG('H','e','a','d'));
     ret = ffio_read_size(pb, codec_config->extradata + 8, left);
     if (ret < 0)
         return ret;

     codec_config->extradata_size = left + 8;
     codec_config->sample_rate = 48000;

     return 0;
 }

 static int aac_decoder_config(IAMFCodecConfig *codec_config,
                               AVIOContext *pb, int len, void *logctx)
 {
     MPEG4AudioConfig cfg = { 0 };
     int object_type_id, codec_id, stream_type;
     int ret, tag, left;

     if (codec_config->audio_roll_distance >= 0)
         return AVERROR_INVALIDDATA;

     ff_mp4_read_descr(logctx, pb, &tag);
     if (tag != MP4DecConfigDescrTag)
         return AVERROR_INVALIDDATA;

     object_type_id = avio_r8(pb);
     if (object_type_id != 0x40)
         return AVERROR_INVALIDDATA;

     stream_type = avio_r8(pb);
     if (((stream_type >> 2) != 5) || ((stream_type >> 1) & 1))
         return AVERROR_INVALIDDATA;

     avio_skip(pb, 3); // buffer size db
     avio_skip(pb, 4); // rc_max_rate
     avio_skip(pb, 4); // avg bitrate

     codec_id = ff_codec_get_id(ff_mp4_obj_type, object_type_id);
     if (codec_id && codec_id != codec_config->codec_id)
         return AVERROR_INVALIDDATA;

     left = ff_mp4_read_descr(logctx, pb, &tag);
     if (tag != MP4DecSpecificDescrTag ||
         !left || left > (len - avio_tell(pb)))
         return AVERROR_INVALIDDATA;

     // We pad extradata here because avpriv_mpeg4audio_get_config2() needs it.
     codec_config->extradata = av_malloc((size_t)left + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!codec_config->extradata)
         return AVERROR(ENOMEM);

     ret = ffio_read_size(pb, codec_config->extradata, left);
     if (ret < 0)
         return ret;
     codec_config->extradata_size = left;
     memset(codec_config->extradata + codec_config->extradata_size, 0,
            AV_INPUT_BUFFER_PADDING_SIZE);

     ret = avpriv_mpeg4audio_get_config2(&cfg, codec_config->extradata,
                                         codec_config->extradata_size, 1, logctx);
     if (ret < 0)
         return ret;

     codec_config->sample_rate = cfg.sample_rate;

     return 0;
 }

 static int flac_decoder_config(IAMFCodecConfig *codec_config,
                                AVIOContext *pb, int len)
 {
     int ret, left;

     if (codec_config->audio_roll_distance)
         return AVERROR_INVALIDDATA;

     avio_skip(pb, 4); // METADATA_BLOCK_HEADER

     left = len - avio_tell(pb);
     if (left < FLAC_STREAMINFO_SIZE)
         return AVERROR_INVALIDDATA;

     codec_config->extradata = av_malloc(left);
     if (!codec_config->extradata)
         return AVERROR(ENOMEM);

     ret = ffio_read_size(pb, codec_config->extradata, left);
     if (ret < 0)
         return ret;

     codec_config->extradata_size = left;
     codec_config->sample_rate = AV_RB24(codec_config->extradata + 10) >> 4;

     return 0;
 }

 static int ipcm_decoder_config(IAMFCodecConfig *codec_config,
                                AVIOContext *pb, int len)
 {
     static const enum AVCodecID sample_fmt[2][3] = {
         { AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S24BE, AV_CODEC_ID_PCM_S32BE },
         { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S32LE },
     };
     int sample_format = avio_r8(pb); // 0 = BE, 1 = LE
     int sample_size = (avio_r8(pb) / 8 - 2); // 16, 24, 32
     if (sample_format > 1 || sample_size > 2U || codec_config->audio_roll_distance)
         return AVERROR_INVALIDDATA;

     codec_config->codec_id = sample_fmt[sample_format][sample_size];
     codec_config->sample_rate = avio_rb32(pb);

     if (len - avio_tell(pb))
         return AVERROR_INVALIDDATA;

     return 0;
 }

 static int codec_config_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
 {
     IAMFCodecConfig **tmp, *codec_config = NULL;
     FFIOContext b;
     AVIOContext *pbc;
     uint8_t *buf;
     enum AVCodecID avcodec_id;
     unsigned codec_config_id, nb_samples, codec_id;
     int16_t audio_roll_distance;
     int ret;

     buf = av_malloc(len);
     if (!buf)
         return AVERROR(ENOMEM);

     ret = ffio_read_size(pb, buf, len);
     if (ret < 0)
         goto fail;

     ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
     pbc = &b.pub;

     codec_config_id = ffio_read_leb(pbc);
     codec_id = avio_rb32(pbc);
     nb_samples = ffio_read_leb(pbc);
     audio_roll_distance = avio_rb16(pbc);

     switch(codec_id) {
     case MKBETAG('O','p','u','s'):
         avcodec_id = AV_CODEC_ID_OPUS;
         break;
     case MKBETAG('m','p','4','a'):
         avcodec_id = AV_CODEC_ID_AAC;
         break;
     case MKBETAG('f','L','a','C'):
         avcodec_id = AV_CODEC_ID_FLAC;
         break;
     default:
         avcodec_id = AV_CODEC_ID_NONE;
         break;
     }

     for (int i = 0; i < c->nb_codec_configs; i++)
         if (c->codec_configs[i]->codec_config_id == codec_config_id) {
             ret = AVERROR_INVALIDDATA;
             goto fail;
         }

     tmp = av_realloc_array(c->codec_configs, c->nb_codec_configs + 1, sizeof(*c->codec_configs));
     if (!tmp) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }
     c->codec_configs = tmp;

     codec_config = av_mallocz(sizeof(*codec_config));
     if (!codec_config) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }

     codec_config->codec_config_id = codec_config_id;
     codec_config->codec_id = avcodec_id;
     codec_config->nb_samples = nb_samples;
     codec_config->audio_roll_distance = audio_roll_distance;

     switch(codec_id) {
     case MKBETAG('O','p','u','s'):
         ret = opus_decoder_config(codec_config, pbc, len);
         break;
     case MKBETAG('m','p','4','a'):
         ret = aac_decoder_config(codec_config, pbc, len, s);
         break;
     case MKBETAG('f','L','a','C'):
         ret = flac_decoder_config(codec_config, pbc, len);
         break;
     case MKBETAG('i','p','c','m'):
         ret = ipcm_decoder_config(codec_config, pbc, len);
         break;
     default:
         break;
     }
     if (ret < 0)
         goto fail;

     if ((codec_config->nb_samples > INT_MAX) || codec_config->nb_samples <= 0 ||
         (-codec_config->audio_roll_distance > INT_MAX / codec_config->nb_samples)) {
         ret = AVERROR_INVALIDDATA;
         goto fail;
     }

     c->codec_configs[c->nb_codec_configs++] = codec_config;

     len -= avio_tell(pbc);
     if (len)
        av_log(s, AV_LOG_WARNING, "Underread in codec_config_obu. %d bytes left at the end\n", len);

     ret = 0;
 fail:
     av_free(buf);
     if (ret < 0) {
         if (codec_config)
             av_free(codec_config->extradata);
         av_free(codec_config);
     }
     return ret;
 }

 static int update_extradata(AVCodecParameters *codecpar)
 {
     GetBitContext gb;
     PutBitContext pb;
     int ret;

     switch(codecpar->codec_id) {
     case AV_CODEC_ID_OPUS:
         AV_WB8(codecpar->extradata   + 9,  codecpar->ch_layout.nb_channels);
         AV_WL16A(codecpar->extradata + 10, AV_RB16A(codecpar->extradata + 10)); // Byte swap pre-skip
         AV_WL32A(codecpar->extradata + 12, AV_RB32A(codecpar->extradata + 12)); // Byte swap sample rate
         AV_WL16A(codecpar->extradata + 16, AV_RB16A(codecpar->extradata + 16)); // Byte swap Output Gain
         break;
     case AV_CODEC_ID_AAC: {
         uint8_t buf[6];
         int size = FFMIN(codecpar->extradata_size, sizeof(buf));

         init_put_bits(&pb, buf, sizeof(buf));
         ret = init_get_bits8(&gb, codecpar->extradata, size);
         if (ret < 0)
             return ret;

         ret = get_bits(&gb, 5);
         put_bits(&pb, 5, ret);
         if (ret == AOT_ESCAPE) // violates section 3.11.2, but better check for it
             put_bits(&pb, 6, get_bits(&gb, 6));
         ret = get_bits(&gb, 4);
         put_bits(&pb, 4, ret);
         if (ret == 0x0f)
             put_bits(&pb, 24, get_bits(&gb, 24));

         skip_bits(&gb, 4);
         put_bits(&pb, 4, codecpar->ch_layout.nb_channels); // set channel config
         ret = get_bits_left(&gb);
         if (ret < 0)
             return AVERROR_INVALIDDATA;
         ret = FFMIN(ret, put_bits_left(&pb));
         while (ret >= 32) {
            put_bits32(&pb, get_bits_long(&gb, 32));
            ret -= 32;
         }
         put_bits(&pb, ret, get_bits_long(&gb, ret));
         flush_put_bits(&pb);

         memcpy(codecpar->extradata, buf, put_bytes_output(&pb));
         break;
     }
     case AV_CODEC_ID_FLAC: {
         uint8_t buf[13];
         int size = FFMIN(codecpar->extradata_size, sizeof(buf));

         init_put_bits(&pb, buf, sizeof(buf));
         ret = init_get_bits8(&gb, codecpar->extradata, size);
         if (ret < 0)
             return ret;

         put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize
         put_bits63(&pb, 48, get_bits64(&gb, 48)); // min/max framesize
         put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate
         skip_bits(&gb, 3);
         put_bits(&pb, 3, codecpar->ch_layout.nb_channels - 1);
         ret = get_bits_left(&gb);
         if (ret < 0)
             return AVERROR_INVALIDDATA;
         ret = FFMIN(ret, put_bits_left(&pb));
         put_bits(&pb, ret, get_bits(&gb, ret));
         flush_put_bits(&pb);

         memcpy(codecpar->extradata, buf, put_bytes_output(&pb));
         break;
     }
     }

     return 0;
 }

 static int parse_coupled_substream(AVChannelLayout *out, AVChannelLayout *in, int n)
 {
     if (in->u.mask & AV_CH_LAYOUT_STEREO) {
         out->u.map[n++].id = AV_CHAN_FRONT_LEFT;
         out->u.map[n++].id = AV_CHAN_FRONT_RIGHT;
         in->u.mask &= ~AV_CH_LAYOUT_STEREO;
     } else if (in->u.mask & (AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)) {
         out->u.map[n++].id = AV_CHAN_FRONT_LEFT_OF_CENTER;
         out->u.map[n++].id = AV_CHAN_FRONT_RIGHT_OF_CENTER;
         in->u.mask &= ~(AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER);
     } else if (in->u.mask & (AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)) {
         out->u.map[n++].id = AV_CHAN_SIDE_LEFT;
         out->u.map[n++].id = AV_CHAN_SIDE_RIGHT;
         in->u.mask &= ~(AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT);
     } else if (in->u.mask & (AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)) {
         out->u.map[n++].id = AV_CHAN_BACK_LEFT;
         out->u.map[n++].id = AV_CHAN_BACK_RIGHT;
         in->u.mask &= ~(AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT);
     } else if (in->u.mask & (AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT)) {
         out->u.map[n++].id = AV_CHAN_TOP_FRONT_LEFT;
         out->u.map[n++].id = AV_CHAN_TOP_FRONT_RIGHT;
         in->u.mask &= ~(AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT);
     } else if (in->u.mask & (AV_CH_TOP_SIDE_LEFT|AV_CH_TOP_SIDE_RIGHT)) {
         out->u.map[n++].id = AV_CHAN_TOP_SIDE_LEFT;
         out->u.map[n++].id = AV_CHAN_TOP_SIDE_RIGHT;
         in->u.mask &= ~(AV_CH_TOP_SIDE_LEFT|AV_CH_TOP_SIDE_RIGHT);
     } else if (in->u.mask & (AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT)) {
         out->u.map[n++].id = AV_CHAN_TOP_BACK_LEFT;
         out->u.map[n++].id = AV_CHAN_TOP_BACK_RIGHT;
         in->u.mask &= ~(AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT);
     }

     return n;
 }

 static int scalable_channel_layout_config(void *s, AVIOContext *pb,
                                           IAMFAudioElement *audio_element,
                                           const IAMFCodecConfig *codec_config)
 {
     int nb_layers, k = 0;

     nb_layers = avio_r8(pb) >> 5; // get_bits(&gb, 3);
     // skip_bits(&gb, 5); //reserved

     if (nb_layers > 6 || nb_layers == 0)
         return AVERROR_INVALIDDATA;

     audio_element->layers = av_calloc(nb_layers, sizeof(*audio_element->layers));
     if (!audio_element->layers)
         return AVERROR(ENOMEM);

     audio_element->nb_layers = nb_layers;
     for (int i = 0, n = 0; i < nb_layers; i++) {
         AVChannelLayout ch_layout = { 0 };
         AVIAMFLayer *layer;
         int loudspeaker_layout, output_gain_is_present_flag;
         int substream_count, coupled_substream_count;
         int expanded_loudspeaker_layout = -1;
         int ret, byte = avio_r8(pb);
         int channels;

         layer = av_iamf_audio_element_add_layer(audio_element->element);
         if (!layer)
             return AVERROR(ENOMEM);

         loudspeaker_layout = byte >> 4; // get_bits(&gb, 4);
         output_gain_is_present_flag = (byte >> 3) & 1; //get_bits1(&gb);
         if ((byte >> 2) & 1)
             layer->flags |= AV_IAMF_LAYER_FLAG_RECON_GAIN;
         substream_count = avio_r8(pb);
         coupled_substream_count = avio_r8(pb);

         if (!substream_count || coupled_substream_count > substream_count ||
             substream_count + k > audio_element->nb_substreams)
             return AVERROR_INVALIDDATA;

         audio_element->layers[i].substream_count         = substream_count;
         audio_element->layers[i].coupled_substream_count = coupled_substream_count;
         if (output_gain_is_present_flag) {
             layer->output_gain_flags = avio_r8(pb) >> 2;  // get_bits(&gb, 6);
             layer->output_gain = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
         }

         if (loudspeaker_layout == 15) {
             if (i) {
                 av_log(s, AV_LOG_ERROR, "expanded_loudspeaker_layout set with more than one layer in Audio Element #%d\n",
                        audio_element->audio_element_id);
                 return AVERROR_INVALIDDATA;
             }
             expanded_loudspeaker_layout = avio_r8(pb);
         }
         if (expanded_loudspeaker_layout >= 0 && expanded_loudspeaker_layout < 13) {
             av_channel_layout_copy(&ch_layout, &ff_iamf_expanded_scalable_ch_layouts[expanded_loudspeaker_layout]);
         } else if (loudspeaker_layout < 10) {
             av_channel_layout_copy(&ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
             if (i) {
                 uint64_t mask = av_channel_layout_subset(&audio_element->element->layers[i-1]->ch_layout, UINT64_MAX);
                 // When the first layer is Mono, the second layer may not have the C channel (e.g. Stereo)
                 if (audio_element->element->layers[i-1]->ch_layout.nb_channels == 1)
                     n--;
                 else if ((ch_layout.u.mask & mask) != mask)
                     return AVERROR_INVALIDDATA;
                 ch_layout.u.mask &= ~mask;
             }
         } else {
             if (expanded_loudspeaker_layout >= 0)
                 avpriv_request_sample(s, "expanded_loudspeaker_layout %d", expanded_loudspeaker_layout);
             else
                 avpriv_request_sample(s, "loudspeaker_layout %d", loudspeaker_layout);
             return AVERROR_PATCHWELCOME;
         }

         channels = ch_layout.nb_channels;
         if (i) {
             if (ch_layout.nb_channels <= audio_element->element->layers[i-1]->ch_layout.nb_channels)
                 return AVERROR_INVALIDDATA;
             channels -= audio_element->element->layers[i-1]->ch_layout.nb_channels;
         }
         if (channels != substream_count + coupled_substream_count)
             return AVERROR_INVALIDDATA;

         for (int j = 0; j < substream_count; j++) {
             IAMFSubStream *substream = &audio_element->substreams[k++];

             substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
                                                                              (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;

             ret = update_extradata(substream->codecpar);
             if (ret < 0)
                 return ret;
         }

             ret = av_channel_layout_custom_init(&layer->ch_layout, ch_layout.nb_channels);
             if (ret < 0)
                 return ret;
             for (int j = 0; j < n; j++)
                 layer->ch_layout.u.map[j].id = av_channel_layout_channel_from_index(&audio_element->element->layers[i-1]->ch_layout, j);

             coupled_substream_count = audio_element->layers[i].coupled_substream_count;
             while (coupled_substream_count--) {
                 n = parse_coupled_substream(&layer->ch_layout, &ch_layout, n);
             }

             substream_count -= audio_element->layers[i].coupled_substream_count;
             n = parse_coupled_substream(&layer->ch_layout, &ch_layout, n); // In case the first layer is Mono
             while (substream_count--) {
                 if (ch_layout.u.mask & AV_CH_FRONT_CENTER) {
                     layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_CENTER;
                     ch_layout.u.mask &= ~AV_CH_FRONT_CENTER;
                 }
                 if (ch_layout.u.mask & AV_CH_LOW_FREQUENCY) {
                     layer->ch_layout.u.map[n++].id = AV_CHAN_LOW_FREQUENCY;
                     ch_layout.u.mask &= ~AV_CH_LOW_FREQUENCY;
                 }
             }

             if (n != ch_layout.nb_channels)
                 return AVERROR_INVALIDDATA;

             ret = av_channel_layout_retype(&layer->ch_layout, AV_CHANNEL_ORDER_NATIVE, 0);
             if (ret < 0 && ret != AVERROR(ENOSYS))
                 return ret;
     }

     if (k != audio_element->nb_substreams)
         return AVERROR_INVALIDDATA;

     return 0;
 }

 static int ambisonics_config(void *s, AVIOContext *pb,
                              IAMFAudioElement *audio_element,
                              const IAMFCodecConfig *codec_config)
 {
     AVIAMFLayer *layer;
     unsigned ambisonics_mode;
     int output_channel_count, substream_count, order;
     int ret;

     ambisonics_mode = ffio_read_leb(pb);
     if (ambisonics_mode > 1)
         return AVERROR_INVALIDDATA;

     output_channel_count = avio_r8(pb);  // C
     substream_count = avio_r8(pb);  // N
     if (audio_element->nb_substreams != substream_count || output_channel_count == 0)
         return AVERROR_INVALIDDATA;

     order = floor(sqrt(output_channel_count - 1));
     /* incomplete order - some harmonics are missing */
     if ((order + 1) * (order + 1) != output_channel_count)
         return AVERROR_INVALIDDATA;

     audio_element->layers = av_mallocz(sizeof(*audio_element->layers));
     if (!audio_element->layers)
         return AVERROR(ENOMEM);

     audio_element->nb_layers = 1;
     audio_element->layers->substream_count = substream_count;

     layer = av_iamf_audio_element_add_layer(audio_element->element);
     if (!layer)
         return AVERROR(ENOMEM);

     layer->ambisonics_mode = ambisonics_mode;
     if (ambisonics_mode == 0) {
         for (int i = 0; i < substream_count; i++) {
             IAMFSubStream *substream = &audio_element->substreams[i];

             substream->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;

             ret = update_extradata(substream->codecpar);
             if (ret < 0)
                 return ret;
         }

         ret = av_channel_layout_custom_init(&layer->ch_layout, output_channel_count);
         if (ret < 0)
             return ret;

         for (int i = 0; i < output_channel_count; i++)
             layer->ch_layout.u.map[i].id = avio_r8(pb) + AV_CHAN_AMBISONIC_BASE;

         ret = av_channel_layout_retype(&layer->ch_layout, AV_CHANNEL_ORDER_AMBISONIC, 0);
         if (ret < 0 && ret != AVERROR(ENOSYS))
             return ret;
     } else {
         int coupled_substream_count = avio_r8(pb);  // M
         int count = substream_count + coupled_substream_count;
         int nb_demixing_matrix = count * output_channel_count;

         audio_element->layers->coupled_substream_count = coupled_substream_count;

         layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_AMBISONIC, .nb_channels = output_channel_count };
         layer->demixing_matrix = av_malloc_array(nb_demixing_matrix, sizeof(*layer->demixing_matrix));
         if (!layer->demixing_matrix)
             return AVERROR(ENOMEM);

         layer->nb_demixing_matrix = nb_demixing_matrix;

         for (int i = 0; i < layer->nb_demixing_matrix; i++)
             layer->demixing_matrix[i] = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 15);

         for (int i = 0; i < substream_count; i++) {
             IAMFSubStream *substream = &audio_element->substreams[i];

             substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
                                                                              (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;


             ret = update_extradata(substream->codecpar);
             if (ret < 0)
                 return ret;
         }
     }

     return 0;
 }

 static int param_parse(void *s, IAMFContext *c, AVIOContext *pb,
                        unsigned int type,
                        const IAMFAudioElement *audio_element,
                        AVIAMFParamDefinition **out_param_definition)
 {
     IAMFParamDefinition *param_definition = NULL;
     AVIAMFParamDefinition *param;
     unsigned int parameter_id, parameter_rate, mode;
     unsigned int duration = 0, constant_subblock_duration = 0, nb_subblocks = 0;
     unsigned int total_duration = 0;
     size_t param_size;

     parameter_id = ffio_read_leb(pb);

     for (int i = 0; i < c->nb_param_definitions; i++)
         if (c->param_definitions[i]->param->parameter_id == parameter_id) {
             param_definition = c->param_definitions[i];
             break;
         }

     parameter_rate = ffio_read_leb(pb);
     mode = avio_r8(pb) >> 7;

     if (mode == 0) {
         duration = ffio_read_leb(pb);
         if (!duration)
             return AVERROR_INVALIDDATA;
         if (audio_element) {
             const IAMFCodecConfig *codec_config = ff_iamf_get_codec_config(c, audio_element->codec_config_id);
             if (duration > av_rescale(codec_config->nb_samples, codec_config->sample_rate, parameter_rate)) {
                 av_log(s, AV_LOG_ERROR, "Invalid block duration in parameter_id %u\n", parameter_id);
                 return AVERROR_INVALIDDATA;
             }
         }
         constant_subblock_duration = ffio_read_leb(pb);
         if (constant_subblock_duration == 0)
             nb_subblocks = ffio_read_leb(pb);
         else {
             if (constant_subblock_duration > duration) {
                 av_log(s, AV_LOG_ERROR, "Invalid block duration in parameter_id %u\n", parameter_id);
                 return AVERROR_INVALIDDATA;
             }
             nb_subblocks = duration / constant_subblock_duration;
             total_duration = duration;
         }
     }

     if (nb_subblocks > duration) {
         av_log(s, AV_LOG_ERROR, "Invalid duration or subblock count in parameter_id %u\n", parameter_id);
         return AVERROR_INVALIDDATA;
     }

     param = av_iamf_param_definition_alloc(type, nb_subblocks, &param_size);
     if (!param)
         return AVERROR(ENOMEM);

     for (int i = 0; i < nb_subblocks; i++) {
         void *subblock = av_iamf_param_definition_get_subblock(param, i);
         unsigned int subblock_duration = constant_subblock_duration;

         if (constant_subblock_duration == 0) {
             subblock_duration = ffio_read_leb(pb);
             if (duration - total_duration > subblock_duration) {
                 av_log(s, AV_LOG_ERROR, "Invalid subblock durations in parameter_id %u\n", parameter_id);
                 av_free(param);
                 return AVERROR_INVALIDDATA;
             }
             total_duration += subblock_duration;
         } else if (i == nb_subblocks - 1)
             subblock_duration = duration - i * constant_subblock_duration;

         switch (type) {
         case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
             AVIAMFMixGain *mix = subblock;
             mix->subblock_duration = subblock_duration;
             break;
         }
         case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
             AVIAMFDemixingInfo *demix = subblock;
             demix->subblock_duration = subblock_duration;
             // DefaultDemixingInfoParameterData
             av_assert0(audio_element);
             demix->dmixp_mode = avio_r8(pb) >> 5;
             audio_element->element->default_w = avio_r8(pb) >> 4;
             break;
         }
         case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
             AVIAMFReconGain *recon = subblock;
             recon->subblock_duration = subblock_duration;
             break;
         }
         default:
             av_free(param);
             return AVERROR_INVALIDDATA;
         }
     }

     if (!mode && !constant_subblock_duration && total_duration != duration) {
         av_log(s, AV_LOG_ERROR, "Invalid subblock durations in parameter_id %u\n", parameter_id);
         av_free(param);
         return AVERROR_INVALIDDATA;
     }

     param->parameter_id = parameter_id;
     param->parameter_rate = parameter_rate;
     param->duration = duration;
     param->constant_subblock_duration = constant_subblock_duration;
     param->nb_subblocks = nb_subblocks;

     if (param_definition) {
         if (param_definition->param_size != param_size || memcmp(param_definition->param, param, param_size)) {
             av_log(s, AV_LOG_ERROR, "Inconsistent parameters for parameter_id %u\n", parameter_id);
             av_free(param);
             return AVERROR_INVALIDDATA;
         }
     } else {
         IAMFParamDefinition **tmp = av_realloc_array(c->param_definitions, c->nb_param_definitions + 1,
                                                      sizeof(*c->param_definitions));
         if (!tmp) {
             av_free(param);
             return AVERROR(ENOMEM);
         }
         c->param_definitions = tmp;

         param_definition = av_mallocz(sizeof(*param_definition));
         if (!param_definition) {
             av_free(param);
             return AVERROR(ENOMEM);
         }
         param_definition->param = param;
         param_definition->mode = !mode;
         param_definition->param_size = param_size;
         param_definition->audio_element = audio_element;

         c->param_definitions[c->nb_param_definitions++] = param_definition;
     }

     av_assert0(out_param_definition);
     *out_param_definition = param;

     return 0;
 }

 static int audio_element_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
 {
     const IAMFCodecConfig *codec_config;
     AVIAMFAudioElement *element;
     IAMFAudioElement **tmp, *audio_element = NULL;
     FFIOContext b;
     AVIOContext *pbc;
     uint8_t *buf;
     unsigned audio_element_id, nb_substreams, codec_config_id, num_parameters;
     int audio_element_type, ret;

     buf = av_malloc(len);
     if (!buf)
         return AVERROR(ENOMEM);

     ret = ffio_read_size(pb, buf, len);
     if (ret < 0)
         goto fail;

     ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
     pbc = &b.pub;

     audio_element_id = ffio_read_leb(pbc);

     for (int i = 0; i < c->nb_audio_elements; i++)
         if (c->audio_elements[i]->audio_element_id == audio_element_id) {
             av_log(s, AV_LOG_ERROR, "Duplicate audio_element_id %d\n", audio_element_id);
             ret = AVERROR_INVALIDDATA;
             goto fail;
         }

     audio_element_type = avio_r8(pbc) >> 5;
     if (audio_element_type > AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
         av_log(s, AV_LOG_DEBUG, "Unknown audio_element_type referenced in an audio element. Ignoring\n");
         ret = 0;
         goto fail;
     }

     codec_config_id = ffio_read_leb(pbc);

     codec_config = ff_iamf_get_codec_config(c, codec_config_id);
     if (!codec_config) {
         av_log(s, AV_LOG_ERROR, "Non existent codec config id %d referenced in an audio element\n", codec_config_id);
         ret = AVERROR_INVALIDDATA;
         goto fail;
     }

     if (codec_config->codec_id == AV_CODEC_ID_NONE) {
         av_log(s, AV_LOG_DEBUG, "Unknown codec id referenced in an audio element. Ignoring\n");
         ret = 0;
         goto fail;
     }

     tmp = av_realloc_array(c->audio_elements, c->nb_audio_elements + 1, sizeof(*c->audio_elements));
     if (!tmp) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }
     c->audio_elements = tmp;

     audio_element = av_mallocz(sizeof(*audio_element));
     if (!audio_element) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }

     nb_substreams = ffio_read_leb(pbc);
     audio_element->codec_config_id = codec_config_id;
     audio_element->audio_element_id = audio_element_id;
     audio_element->substreams = av_calloc(nb_substreams, sizeof(*audio_element->substreams));
     if (!audio_element->substreams) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }
     audio_element->nb_substreams = nb_substreams;

     element = audio_element->element = av_iamf_audio_element_alloc();
     if (!element) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }
     audio_element->celement = element;

     element->audio_element_type = audio_element_type;

     for (int i = 0; i < audio_element->nb_substreams; i++) {
         IAMFSubStream *substream = &audio_element->substreams[i];

         substream->codecpar = avcodec_parameters_alloc();
         if (!substream->codecpar) {
             ret = AVERROR(ENOMEM);
             goto fail;
         }

         substream->audio_substream_id = ffio_read_leb(pbc);

         substream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
         substream->codecpar->codec_id   = codec_config->codec_id;
         substream->codecpar->frame_size = codec_config->nb_samples;
         substream->codecpar->sample_rate = codec_config->sample_rate;
         substream->codecpar->seek_preroll = -codec_config->audio_roll_distance * codec_config->nb_samples;

         switch(substream->codecpar->codec_id) {
         case AV_CODEC_ID_AAC:
         case AV_CODEC_ID_FLAC:
         case AV_CODEC_ID_OPUS:
             substream->codecpar->extradata = av_malloc(codec_config->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!substream->codecpar->extradata) {
                 ret = AVERROR(ENOMEM);
                 goto fail;
             }
             memcpy(substream->codecpar->extradata, codec_config->extradata, codec_config->extradata_size);
             memset(substream->codecpar->extradata + codec_config->extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
             substream->codecpar->extradata_size = codec_config->extradata_size;
             break;
         }
     }

     num_parameters = ffio_read_leb(pbc);
     if (num_parameters > 2 && audio_element_type == 0) {
         av_log(s, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
                                 " for Channel representations\n", num_parameters);
         ret = AVERROR_INVALIDDATA;
         goto fail;
     }
     if (num_parameters && audio_element_type != 0) {
         av_log(s, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
                                 " for Scene representations\n", num_parameters);
         ret = AVERROR_INVALIDDATA;
         goto fail;
     }

     for (int i = 0; i < num_parameters; i++) {
         unsigned type;

         type = ffio_read_leb(pbc);
         if (type == AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN)
             ret = AVERROR_INVALIDDATA;
         else if (type == AV_IAMF_PARAMETER_DEFINITION_DEMIXING) {
             if (element->demixing_info) {
                 ret = AVERROR_INVALIDDATA;
                 goto fail;
             }
             ret = param_parse(s, c, pbc, type, audio_element, &element->demixing_info);
         } else if (type == AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
             if (element->recon_gain_info) {
                 ret = AVERROR_INVALIDDATA;
                 goto fail;
             }
             ret = param_parse(s, c, pbc, type, audio_element, &element->recon_gain_info);
         } else {
             unsigned param_definition_size = ffio_read_leb(pbc);
             avio_skip(pbc, param_definition_size);
         }
         if (ret < 0)
             goto fail;
     }

     if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
         ret = scalable_channel_layout_config(s, pbc, audio_element, codec_config);
         if (ret < 0)
             goto fail;
     } else if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
         ret = ambisonics_config(s, pbc, audio_element, codec_config);
         if (ret < 0)
             goto fail;
     } else {
         av_unreachable("audio_element_type should have been checked above");
     }

     c->audio_elements[c->nb_audio_elements++] = audio_element;

     len -= avio_tell(pbc);
     if (len)
        av_log(s, AV_LOG_WARNING, "Underread in audio_element_obu. %d bytes left at the end\n", len);

     ret = 0;
 fail:
     av_free(buf);
     if (ret < 0)
         ff_iamf_free_audio_element(&audio_element);
     return ret;
 }

 static int label_string(AVIOContext *pb, char **label)
 {
     uint8_t buf[128];

     avio_get_str(pb, sizeof(buf), buf, sizeof(buf));

     if (pb->error)
         return pb->error;
     if (pb->eof_reached)
         return AVERROR_INVALIDDATA;
     *label = av_strdup(buf);
     if (!*label)
         return AVERROR(ENOMEM);

     return 0;
 }

 static int mix_presentation_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
 {
     AVIAMFMixPresentation *mix;
     IAMFMixPresentation **tmp, *mix_presentation = NULL;
     FFIOContext b;
     AVIOContext *pbc;
     uint8_t *buf;
     unsigned nb_submixes, mix_presentation_id;
     int ret;

     buf = av_malloc(len);
     if (!buf)
         return AVERROR(ENOMEM);

     ret = ffio_read_size(pb, buf, len);
     if (ret < 0)
         goto fail;

     ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
     pbc = &b.pub;

     mix_presentation_id = ffio_read_leb(pbc);

     for (int i = 0; i < c->nb_mix_presentations; i++)
         if (c->mix_presentations[i]->mix_presentation_id == mix_presentation_id) {
             av_log(s, AV_LOG_ERROR, "Duplicate mix_presentation_id %d\n", mix_presentation_id);
             ret = AVERROR_INVALIDDATA;
             goto fail;
         }

     tmp = av_realloc_array(c->mix_presentations, c->nb_mix_presentations + 1, sizeof(*c->mix_presentations));
     if (!tmp) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }
     c->mix_presentations = tmp;

     mix_presentation = av_mallocz(sizeof(*mix_presentation));
     if (!mix_presentation) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }

     mix_presentation->mix_presentation_id = mix_presentation_id;
     mix = mix_presentation->mix = av_iamf_mix_presentation_alloc();
     if (!mix) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }
     mix_presentation->cmix = mix;

     mix_presentation->count_label = ffio_read_leb(pbc);
     mix_presentation->language_label = av_calloc(mix_presentation->count_label,
                                                  sizeof(*mix_presentation->language_label));
     if (!mix_presentation->language_label) {
         mix_presentation->count_label = 0;
         ret = AVERROR(ENOMEM);
         goto fail;
     }

     for (int i = 0; i < mix_presentation->count_label; i++) {
         ret = label_string(pbc, &mix_presentation->language_label[i]);
         if (ret < 0)
             goto fail;
     }

     for (int i = 0; i < mix_presentation->count_label; i++) {
         char *annotation = NULL;
         ret = label_string(pbc, &annotation);
         if (ret < 0)
             goto fail;
         ret = av_dict_set(&mix->annotations, mix_presentation->language_label[i], annotation,
                           AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
         if (ret < 0)
             goto fail;
     }

     nb_submixes = ffio_read_leb(pbc);
     for (int i = 0; i < nb_submixes; i++) {
         AVIAMFSubmix *sub_mix;
         unsigned nb_elements, nb_layouts;

         sub_mix = av_iamf_mix_presentation_add_submix(mix);
         if (!sub_mix) {
             ret = AVERROR(ENOMEM);
             goto fail;
         }

         nb_elements = ffio_read_leb(pbc);
         for (int j = 0; j < nb_elements; j++) {
             AVIAMFSubmixElement *submix_element;
             IAMFAudioElement *audio_element = NULL;
             unsigned int rendering_config_extension_size;

             submix_element = av_iamf_submix_add_element(sub_mix);
             if (!submix_element) {
                 ret = AVERROR(ENOMEM);
                 goto fail;
             }

             submix_element->audio_element_id = ffio_read_leb(pbc);

             for (int k = 0; k < c->nb_audio_elements; k++)
                 if (c->audio_elements[k]->audio_element_id == submix_element->audio_element_id) {
                     audio_element = c->audio_elements[k];
                     break;
                 }

             if (!audio_element) {
                 av_log(s, AV_LOG_ERROR, "Invalid Audio Element with id %u referenced by Mix Parameters %u\n",
                        submix_element->audio_element_id, mix_presentation_id);
                 ret = AVERROR_INVALIDDATA;
                 goto fail;
             }

             for (int k = 0; k < mix_presentation->count_label; k++) {
                 char *annotation = NULL;
                 ret = label_string(pbc, &annotation);
                 if (ret < 0)
                     goto fail;
                 ret = av_dict_set(&submix_element->annotations, mix_presentation->language_label[k], annotation,
                                   AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
                 if (ret < 0)
                     goto fail;
             }

             submix_element->headphones_rendering_mode = avio_r8(pbc) >> 6;

             rendering_config_extension_size = ffio_read_leb(pbc);
             avio_skip(pbc, rendering_config_extension_size);

             ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN,
                               audio_element,
                               &submix_element->element_mix_config);
             if (ret < 0)
                 goto fail;
             submix_element->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
         }

         ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, &sub_mix->output_mix_config);
         if (ret < 0)
             goto fail;
         sub_mix->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);

         nb_layouts = ffio_read_leb(pbc);
         for (int j = 0; j < nb_layouts; j++) {
             AVIAMFSubmixLayout *submix_layout;
             int info_type;
             int byte = avio_r8(pbc);

             submix_layout = av_iamf_submix_add_layout(sub_mix);
             if (!submix_layout) {
                 ret = AVERROR(ENOMEM);
                 goto fail;
             }

             submix_layout->layout_type = byte >> 6;
             if (submix_layout->layout_type < AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS ||
                 submix_layout->layout_type > AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) {
                 av_log(s, AV_LOG_ERROR, "Invalid Layout type %u in a submix from Mix Presentation %u\n",
                        submix_layout->layout_type, mix_presentation_id);
                 ret = AVERROR_INVALIDDATA;
                 goto fail;
             }
             if (submix_layout->layout_type == 2) {
                 int sound_system;
                 sound_system = (byte >> 2) & 0xF;
                 if (sound_system >= FF_ARRAY_ELEMS(ff_iamf_sound_system_map)) {
                     ret = AVERROR_INVALIDDATA;
                     goto fail;
                 }
                 av_channel_layout_copy(&submix_layout->sound_system, &ff_iamf_sound_system_map[sound_system].layout);
             } else
                 submix_layout->sound_system = (AVChannelLayout)AV_CHANNEL_LAYOUT_BINAURAL;

             info_type = avio_r8(pbc);
             submix_layout->integrated_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
             submix_layout->digital_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);

             if (info_type & 1)
                 submix_layout->true_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
             if (info_type & 2) {
                 unsigned int num_anchored_loudness = avio_r8(pbc);

                 for (int k = 0; k < num_anchored_loudness; k++) {
                     unsigned int anchor_element = avio_r8(pbc);
                     AVRational anchored_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
                     if (anchor_element == IAMF_ANCHOR_ELEMENT_DIALOGUE)
                         submix_layout->dialogue_anchored_loudness = anchored_loudness;
                     else if (anchor_element <= IAMF_ANCHOR_ELEMENT_ALBUM)
                         submix_layout->album_anchored_loudness = anchored_loudness;
                     else
                         av_log(s, AV_LOG_DEBUG, "Unknown anchor_element. Ignoring\n");
                 }
             }

             if (info_type & 0xFC) {
                 unsigned int info_type_size = ffio_read_leb(pbc);
                 avio_skip(pbc, info_type_size);
             }
         }
     }

     c->mix_presentations[c->nb_mix_presentations++] = mix_presentation;

     len -= avio_tell(pbc);
     if (len)
         av_log(s, AV_LOG_WARNING, "Underread in mix_presentation_obu. %d bytes left at the end\n", len);

     ret = 0;
 fail:
     av_free(buf);
     if (ret < 0)
         ff_iamf_free_mix_presentation(&mix_presentation);
     return ret;
 }

 int ff_iamf_parse_obu_header(const uint8_t *buf, int buf_size,
                              unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
                              unsigned *skip_samples, unsigned *discard_padding)
 {
     GetBitContext gb;
     int ret, extension_flag, trimming, start;
     unsigned skip = 0, discard = 0;
     unsigned size;

     ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE));
     if (ret < 0)
         return ret;

     *type          = get_bits(&gb, 5);
     /*redundant      =*/ get_bits1(&gb);
     trimming       = get_bits1(&gb);
     extension_flag = get_bits1(&gb);

     *obu_size = get_leb(&gb);
     if (*obu_size > INT_MAX)
         return AVERROR_INVALIDDATA;

     start = get_bits_count(&gb) / 8;

     if (trimming) {
         discard = get_leb(&gb); // num_samples_to_trim_at_end
         skip = get_leb(&gb); // num_samples_to_trim_at_start
     }

     if (skip_samples)
         *skip_samples = skip;
     if (discard_padding)
         *discard_padding = discard;

     if (extension_flag) {
         unsigned int extension_bytes;
         extension_bytes = get_leb(&gb);
         if (extension_bytes > INT_MAX / 8)
             return AVERROR_INVALIDDATA;
         skip_bits_long(&gb, extension_bytes * 8);
     }

     if (get_bits_left(&gb) < 0)
         return AVERROR_INVALIDDATA;

     size = *obu_size + start;
     if (size > INT_MAX)
         return AVERROR_INVALIDDATA;

     *obu_size -= get_bits_count(&gb) / 8 - start;
     *start_pos = size - *obu_size;

     return size;
 }

 int ff_iamfdec_read_descriptors(IAMFContext *c, AVIOContext *pb,
                                 int max_size, void *log_ctx)
 {
     uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
     int ret;

     while (1) {
         unsigned obu_size;
         enum IAMF_OBU_Type type;
         int start_pos, len, size;

         if ((ret = ffio_ensure_seekback(pb, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size))) < 0)
             return ret;
         size = avio_read(pb, header, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size));
         if (size < 0)
             return size;
         memset(header + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);

         len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type, NULL, NULL);
         if (len < 0 || obu_size > max_size) {
             av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu header\n");
             avio_seek(pb, -size, SEEK_CUR);
             return len;
         }

         if (type >= IAMF_OBU_IA_PARAMETER_BLOCK && type < IAMF_OBU_IA_SEQUENCE_HEADER) {
             avio_seek(pb, -size, SEEK_CUR);
             break;
         }

         avio_seek(pb, -(size - start_pos), SEEK_CUR);
         switch (type) {
         case IAMF_OBU_IA_CODEC_CONFIG:
             ret = codec_config_obu(log_ctx, c, pb, obu_size);
             break;
         case IAMF_OBU_IA_AUDIO_ELEMENT:
             ret = audio_element_obu(log_ctx, c, pb, obu_size);
             break;
         case IAMF_OBU_IA_MIX_PRESENTATION:
             ret = mix_presentation_obu(log_ctx, c, pb, obu_size);
             break;
         default: {
             int64_t offset = avio_skip(pb, obu_size);
             if (offset < 0)
                 ret = offset;
             break;
         }
         }
         if (ret < 0) {
             av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu type %d\n", type);
             return ret;
         }
         max_size -= obu_size + start_pos;
         if (max_size < 0)
             return AVERROR_INVALIDDATA;
         if (!max_size)
             break;
     }

     return 0;
 }