libavcodec/mpegvideo.c - third_party/ffmpeg - Git at Google

 /*
  * The simplest mpeg encoder (well, it was the simplest!)
  * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */

 /**
  * @file
  * The simplest mpeg encoder (well, it was the simplest!).
  */

 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mem.h"

 #include "avcodec.h"
 #include "blockdsp.h"
 #include "idctdsp.h"
 #include "mathops.h"
 #include "mpeg_er.h"
 #include "mpegutils.h"
 #include "mpegvideo.h"
 #include "mpegvideodata.h"
 #include "libavutil/refstruct.h"


 static void gray16(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
 {
     while(h--)
         memset(dst + h*linesize, 128, 16);
 }

 static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
 {
     while(h--)
         memset(dst + h*linesize, 128, 8);
 }

 /* init common dct for both encoder and decoder */
 static av_cold void dsp_init(MpegEncContext *s)
 {
     ff_blockdsp_init(&s->bdsp);
     ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
     ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);

     if (s->avctx->debug & FF_DEBUG_NOMC) {
         int i;
         for (i=0; i<4; i++) {
             s->hdsp.avg_pixels_tab[0][i] = gray16;
             s->hdsp.put_pixels_tab[0][i] = gray16;
             s->hdsp.put_no_rnd_pixels_tab[0][i] = gray16;

             s->hdsp.avg_pixels_tab[1][i] = gray8;
             s->hdsp.put_pixels_tab[1][i] = gray8;
             s->hdsp.put_no_rnd_pixels_tab[1][i] = gray8;
         }
     }
 }

 av_cold void ff_mpv_idct_init(MpegEncContext *s)
 {
     if (s->codec_id == AV_CODEC_ID_MPEG4)
         s->idsp.mpeg4_studio_profile = s->studio_profile;
     ff_idctdsp_init(&s->idsp, s->avctx);

     /* load & permutate scantables
      * note: only wmv uses different ones
      */
     if (s->alternate_scan) {
         ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan);
         ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan);
     } else {
         ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct);
         ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
     }
 }

 av_cold int ff_mpv_init_duplicate_contexts(MpegEncContext *s)
 {
     const int nb_slices = s->slice_context_count;
     const size_t slice_size = s->slice_ctx_size;

     for (int i = 1; i < nb_slices; i++) {
         s->thread_context[i] = av_memdup(s, slice_size);
         if (!s->thread_context[i])
             return AVERROR(ENOMEM);
         s->thread_context[i]->start_mb_y =
             (s->mb_height * (i    ) + nb_slices / 2) / nb_slices;
         s->thread_context[i]->end_mb_y   =
             (s->mb_height * (i + 1) + nb_slices / 2) / nb_slices;
     }
     s->start_mb_y = 0;
     s->end_mb_y   = nb_slices > 1 ? (s->mb_height + nb_slices / 2) / nb_slices
                                   : s->mb_height;
     return 0;
 }

 static av_cold void free_duplicate_context(MpegEncContext *s)
 {
     if (!s)
         return;

     av_freep(&s->sc.edge_emu_buffer);
     av_freep(&s->sc.scratchpad_buf);
     s->sc.obmc_scratchpad = NULL;
     s->sc.linesize = 0;
 }

 static av_cold void free_duplicate_contexts(MpegEncContext *s)
 {
     for (int i = 1; i < s->slice_context_count; i++) {
         free_duplicate_context(s->thread_context[i]);
         av_freep(&s->thread_context[i]);
     }
     free_duplicate_context(s);
 }

 int ff_update_duplicate_context(MpegEncContext *dst, const MpegEncContext *src)
 {
 #define COPY(M)              \
     M(ScratchpadContext, sc) \
     M(int, start_mb_y)       \
     M(int, end_mb_y)         \
     M(int16_t*, dc_val)      \
     M(void*, ac_val)

     int ret;
     // FIXME copy only needed parts
 #define BACKUP(T, member) T member = dst->member;
     COPY(BACKUP)
     memcpy(dst, src, sizeof(MpegEncContext));
 #define RESTORE(T, member) dst->member = member;
     COPY(RESTORE)

     ret = ff_mpv_framesize_alloc(dst->avctx, &dst->sc, dst->linesize);
     if (ret < 0) {
         av_log(dst->avctx, AV_LOG_ERROR, "failed to allocate context "
                "scratch buffers.\n");
         return ret;
     }
     return 0;
 }

 /**
  * Set the given MpegEncContext to common defaults
  * (same for encoding and decoding).
  * The changed fields will not depend upon the
  * prior state of the MpegEncContext.
  */
 av_cold void ff_mpv_common_defaults(MpegEncContext *s)
 {
     s->chroma_qscale_table   = ff_default_chroma_qscale_table;
     s->progressive_frame     = 1;
     s->progressive_sequence  = 1;
     s->picture_structure     = PICT_FRAME;

     s->slice_context_count   = 1;
 }

 static av_cold void free_buffer_pools(BufferPoolContext *pools)
 {
     av_refstruct_pool_uninit(&pools->mbskip_table_pool);
     av_refstruct_pool_uninit(&pools->qscale_table_pool);
     av_refstruct_pool_uninit(&pools->mb_type_pool);
     av_refstruct_pool_uninit(&pools->motion_val_pool);
     av_refstruct_pool_uninit(&pools->ref_index_pool);
     pools->alloc_mb_height = pools->alloc_mb_width = pools->alloc_mb_stride = 0;
 }

 av_cold int ff_mpv_init_context_frame(MpegEncContext *s)
 {
     int nb_slices = (HAVE_THREADS &&
                      s->avctx->active_thread_type & FF_THREAD_SLICE) ?
                     s->avctx->thread_count : 1;
     BufferPoolContext *const pools = &s->buffer_pools;
     int y_size, c_size, yc_size, mb_array_size, mv_table_size, x, y;
     int mb_height;

     if (s->encoding && s->avctx->slices)
         nb_slices = s->avctx->slices;

     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO && !s->progressive_sequence)
         s->mb_height = (s->height + 31) / 32 * 2;
     else
         s->mb_height = (s->height + 15) / 16;

     if (nb_slices > MAX_THREADS || (nb_slices > s->mb_height && s->mb_height)) {
         int max_slices;
         if (s->mb_height)
             max_slices = FFMIN(MAX_THREADS, s->mb_height);
         else
             max_slices = MAX_THREADS;
         av_log(s->avctx, AV_LOG_WARNING, "too many threads/slices (%d),"
                " reducing to %d\n", nb_slices, max_slices);
         nb_slices = max_slices;
     }

     s->slice_context_count = nb_slices;

     /* VC-1 can change from being progressive to interlaced on a per-frame
      * basis. We therefore allocate certain buffers so big that they work
      * in both instances. */
     mb_height = s->msmpeg4_version == MSMP4_VC1 ?
                     FFALIGN(s->mb_height, 2) : s->mb_height;

     s->mb_width   = (s->width + 15) / 16;
     s->mb_stride  = s->mb_width + 1;
     s->b8_stride  = s->mb_width * 2 + 1;
     mb_array_size = mb_height * s->mb_stride;
     mv_table_size = (mb_height + 2) * s->mb_stride + 1;

     /* set default edge pos, will be overridden
      * in decode_header if needed */
     s->h_edge_pos = s->mb_width * 16;
     s->v_edge_pos = s->mb_height * 16;

     s->mb_num     = s->mb_width * s->mb_height;

     s->block_wrap[0] =
     s->block_wrap[1] =
     s->block_wrap[2] =
     s->block_wrap[3] = s->b8_stride;
     s->block_wrap[4] =
     s->block_wrap[5] = s->mb_stride;

     y_size  = s->b8_stride * (2 * mb_height + 1);
     c_size  = s->mb_stride * (mb_height + 1);
     yc_size = y_size + 2   * c_size;

     if (!FF_ALLOCZ_TYPED_ARRAY(s->mb_index2xy, s->mb_num + 1))
         return AVERROR(ENOMEM);
     for (y = 0; y < s->mb_height; y++)
         for (x = 0; x < s->mb_width; x++)
             s->mb_index2xy[x + y * s->mb_width] = x + y * s->mb_stride;

     s->mb_index2xy[s->mb_height * s->mb_width] = (s->mb_height - 1) * s->mb_stride + s->mb_width; // FIXME really needed?

 #define ALLOC_POOL(name, size, flags) do { \
     pools->name ##_pool = av_refstruct_pool_alloc((size), (flags)); \
     if (!pools->name ##_pool) \
         return AVERROR(ENOMEM); \
 } while (0)

     if (s->codec_id == AV_CODEC_ID_MPEG4 ||
         (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME)) {
         /* interlaced direct mode decoding tables */
         int16_t (*tmp)[2] = av_calloc(mv_table_size, 4 * sizeof(*tmp));
         if (!tmp)
             return AVERROR(ENOMEM);
         s->p_field_mv_table_base = tmp;
         tmp += s->mb_stride + 1;
         for (int i = 0; i < 2; i++) {
             for (int j = 0; j < 2; j++) {
                 s->p_field_mv_table[i][j] = tmp;
                 tmp += mv_table_size;
             }
         }
         if (s->codec_id == AV_CODEC_ID_MPEG4) {
             ALLOC_POOL(mbskip_table, mb_array_size + 2,
                        !s->encoding ? AV_REFSTRUCT_POOL_FLAG_ZERO_EVERY_TIME : 0);
             if (!s->encoding) {
                 /* cbp, pred_dir */
                 if (!(s->cbp_table      = av_mallocz(mb_array_size)) ||
                     !(s->pred_dir_table = av_mallocz(mb_array_size)))
                     return AVERROR(ENOMEM);
             }
         }
     }

     if (s->msmpeg4_version >= MSMP4_V3) {
         s->coded_block_base = av_mallocz(y_size);
         if (!s->coded_block_base)
             return AVERROR(ENOMEM);
         s->coded_block = s->coded_block_base + s->b8_stride + 1;
     }

     if (s->h263_pred || s->h263_aic || !s->encoding) {
         // When encoding, each slice (and therefore each thread)
         // gets its own ac_val and dc_val buffers in order to avoid
         // races.
         size_t allslice_yc_size = yc_size * (s->encoding ? nb_slices : 1);
         if (s->out_format == FMT_H263) {
             /* ac values */
             if (!FF_ALLOCZ_TYPED_ARRAY(s->ac_val_base, allslice_yc_size))
                 return AVERROR(ENOMEM);
             s->ac_val = s->ac_val_base + s->b8_stride + 1;
         }

         /* dc values */
         // MN: we need these for error resilience of intra-frames
         // Allocating them unconditionally for decoders also means
         // that we don't need to reinitialize when e.g. h263_aic changes.

         // y_size and therefore yc_size is always odd; allocate one element
         // more for each encoder slice in order to be able to align each slice's
         // dc_val to four in order to use aligned stores when cleaning dc_val.
         allslice_yc_size += s->encoding * nb_slices;
         if (!FF_ALLOC_TYPED_ARRAY(s->dc_val_base, allslice_yc_size))
             return AVERROR(ENOMEM);
         s->dc_val = s->dc_val_base + s->b8_stride + 1;
         for (size_t i = 0; i < allslice_yc_size; ++i)
             s->dc_val_base[i] = 1024;
     }

     // Note the + 1 is for a quicker MPEG-4 slice_end detection
     if (!(s->mbskip_table  = av_mallocz(mb_array_size + 2)) ||
         /* which mb is an intra block,  init macroblock skip table */
         !(s->mbintra_table = av_mallocz(mb_array_size)))
         return AVERROR(ENOMEM);

     ALLOC_POOL(qscale_table, mv_table_size, 0);
     ALLOC_POOL(mb_type, mv_table_size * sizeof(uint32_t), 0);

     if (s->out_format == FMT_H263 || s->encoding ||
         (s->avctx->export_side_data & AV_CODEC_EXPORT_DATA_MVS)) {
         const int b8_array_size = s->b8_stride * mb_height * 2;
         int mv_size        = 2 * (b8_array_size + 4) * sizeof(int16_t);
         int ref_index_size = 4 * mb_array_size;

         /* FIXME: The output of H.263 with OBMC depends upon
          * the earlier content of the buffer; therefore we set
          * the flags to always reset returned buffers here. */
         ALLOC_POOL(motion_val, mv_size, AV_REFSTRUCT_POOL_FLAG_ZERO_EVERY_TIME);
         ALLOC_POOL(ref_index, ref_index_size, 0);
     }
 #undef ALLOC_POOL
     pools->alloc_mb_width  = s->mb_width;
     pools->alloc_mb_height = mb_height;
     pools->alloc_mb_stride = s->mb_stride;

     return !CONFIG_MPEGVIDEODEC || s->encoding ? 0 : ff_mpeg_er_init(s);
 }

 /**
  * init common structure for both encoder and decoder.
  * this assumes that some variables like width/height are already set
  */
 av_cold int ff_mpv_common_init(MpegEncContext *s)
 {
     int ret;

     if (s->avctx->pix_fmt == AV_PIX_FMT_NONE) {
         av_log(s->avctx, AV_LOG_ERROR,
                "decoding to AV_PIX_FMT_NONE is not supported.\n");
         return AVERROR(EINVAL);
     }

     if ((s->width || s->height) &&
         av_image_check_size(s->width, s->height, 0, s->avctx))
         return AVERROR(EINVAL);

     dsp_init(s);

     /* set chroma shifts */
     ret = av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
                                            &s->chroma_x_shift,
                                            &s->chroma_y_shift);
     if (ret)
         return ret;

     if ((ret = ff_mpv_init_context_frame(s)))
         goto fail;

     s->context_initialized = 1;
     s->thread_context[0]   = s;

 //     if (s->width && s->height) {
     if (!s->encoding) {
         ret = ff_mpv_init_duplicate_contexts(s);
         if (ret < 0)
             goto fail;
     }
 //     }

     return 0;
  fail:
     ff_mpv_common_end(s);
     return ret;
 }

 av_cold void ff_mpv_free_context_frame(MpegEncContext *s)
 {
     free_duplicate_contexts(s);

     free_buffer_pools(&s->buffer_pools);
     av_freep(&s->p_field_mv_table_base);
     for (int i = 0; i < 2; i++)
         for (int j = 0; j < 2; j++)
             s->p_field_mv_table[i][j] = NULL;

     av_freep(&s->ac_val_base);
     av_freep(&s->dc_val_base);
     av_freep(&s->coded_block_base);
     av_freep(&s->mbintra_table);
     av_freep(&s->cbp_table);
     av_freep(&s->pred_dir_table);

     av_freep(&s->mbskip_table);

     av_freep(&s->er.error_status_table);
     av_freep(&s->er.er_temp_buffer);
     av_freep(&s->mb_index2xy);

     s->linesize = s->uvlinesize = 0;
 }

 av_cold void ff_mpv_common_end(MpegEncContext *s)
 {
     ff_mpv_free_context_frame(s);
     if (s->slice_context_count > 1)
         s->slice_context_count = 1;

     ff_mpv_unref_picture(&s->last_pic);
     ff_mpv_unref_picture(&s->cur_pic);
     ff_mpv_unref_picture(&s->next_pic);

     s->context_initialized      = 0;
     s->context_reinit           = 0;
     s->linesize = s->uvlinesize = 0;
 }


 /**
  * Clean dc, ac for the current non-intra MB.
  */
 void ff_clean_intra_table_entries(MpegEncContext *s)
 {
     int wrap = s->b8_stride;
     int xy = s->block_index[0];
     /* chroma */
     unsigned uxy = s->block_index[4];
     unsigned vxy = s->block_index[5];
     int16_t *dc_val = s->dc_val;

     AV_WN32A(dc_val + xy,        1024 << 16 | 1024);
     AV_WN32 (dc_val + xy + wrap, 1024 << 16 | 1024);
     dc_val[uxy] =
     dc_val[vxy] = 1024;
     /* ac pred */
     int16_t (*ac_val)[16] = s->ac_val;
     av_assume(!((uintptr_t)ac_val & 0xF));
     // Don't reset the upper-left luma block, as it will only ever be
     // referenced by blocks from the same macroblock.
     memset(ac_val[xy +    1], 0,     sizeof(*ac_val));
     memset(ac_val[xy + wrap], 0, 2 * sizeof(*ac_val));
     /* ac pred */
     memset(ac_val[uxy], 0, sizeof(*ac_val));
     memset(ac_val[vxy], 0, sizeof(*ac_val));
 }

 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
     const int linesize   = s->cur_pic.linesize[0]; //not s->linesize as this would be wrong for field pics
     const int uvlinesize = s->cur_pic.linesize[1];
     const int width_of_mb = (4 + (s->avctx->bits_per_raw_sample > 8)) - s->avctx->lowres;
     const int height_of_mb = 4 - s->avctx->lowres;

     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
     //block_index is not used by mpeg2, so it is not affected by chroma_format

     s->dest[0] = s->cur_pic.data[0] + (int)((s->mb_x - 1U) <<  width_of_mb);
     s->dest[1] = s->cur_pic.data[1] + (int)((s->mb_x - 1U) << (width_of_mb - s->chroma_x_shift));
     s->dest[2] = s->cur_pic.data[2] + (int)((s->mb_x - 1U) << (width_of_mb - s->chroma_x_shift));

     if (s->picture_structure == PICT_FRAME) {
         s->dest[0] += s->mb_y *   linesize << height_of_mb;
         s->dest[1] += s->mb_y * uvlinesize << (height_of_mb - s->chroma_y_shift);
         s->dest[2] += s->mb_y * uvlinesize << (height_of_mb - s->chroma_y_shift);
     } else {
         s->dest[0] += (s->mb_y>>1) *   linesize << height_of_mb;
         s->dest[1] += (s->mb_y>>1) * uvlinesize << (height_of_mb - s->chroma_y_shift);
         s->dest[2] += (s->mb_y>>1) * uvlinesize << (height_of_mb - s->chroma_y_shift);
         av_assert1((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
     }
 }

 /**
  * set qscale and update qscale dependent variables.
  */
 void ff_set_qscale(MpegEncContext * s, int qscale)
 {
     if (qscale < 1)
         qscale = 1;
     else if (qscale > 31)
         qscale = 31;

     s->qscale = qscale;
     s->chroma_qscale= s->chroma_qscale_table[qscale];

     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
 }
	/*
	* The simplest mpeg encoder (well, it was the simplest!)
	* Copyright (c) 2000,2001 Fabrice Bellard
	* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
	*
	* 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
	*
	* This file is part of FFmpeg.
	*
	* FFmpeg is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* FFmpeg is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with FFmpeg; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	/**
	* @file
	* The simplest mpeg encoder (well, it was the simplest!).
	*/

	#include "libavutil/attributes.h"
	#include "libavutil/avassert.h"
	#include "libavutil/imgutils.h"
	#include "libavutil/internal.h"
	#include "libavutil/intreadwrite.h"
	#include "libavutil/mem.h"

	#include "avcodec.h"
	#include "blockdsp.h"
	#include "idctdsp.h"
	#include "mathops.h"
	#include "mpeg_er.h"
	#include "mpegutils.h"
	#include "mpegvideo.h"
	#include "mpegvideodata.h"
	#include "libavutil/refstruct.h"


	static void gray16(uint8_t dst, const uint8_t src, ptrdiff_t linesize, int h)
	{
	while(h--)
	memset(dst + h*linesize, 128, 16);
	}

	static void gray8(uint8_t dst, const uint8_t src, ptrdiff_t linesize, int h)
	{
	while(h--)
	memset(dst + h*linesize, 128, 8);
	}

	/* init common dct for both encoder and decoder */
	static av_cold void dsp_init(MpegEncContext *s)
	{
	ff_blockdsp_init(&s->bdsp);
	ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
	ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);

	if (s->avctx->debug & FF_DEBUG_NOMC) {
	int i;
	for (i=0; i<4; i++) {
	s->hdsp.avg_pixels_tab[0][i] = gray16;
	s->hdsp.put_pixels_tab[0][i] = gray16;
	s->hdsp.put_no_rnd_pixels_tab[0][i] = gray16;

	s->hdsp.avg_pixels_tab[1][i] = gray8;
	s->hdsp.put_pixels_tab[1][i] = gray8;
	s->hdsp.put_no_rnd_pixels_tab[1][i] = gray8;
	}
	}
	}

	av_cold void ff_mpv_idct_init(MpegEncContext *s)
	{
	if (s->codec_id == AV_CODEC_ID_MPEG4)
	s->idsp.mpeg4_studio_profile = s->studio_profile;
	ff_idctdsp_init(&s->idsp, s->avctx);

	/* load & permutate scantables
	* note: only wmv uses different ones
	*/
	if (s->alternate_scan) {
	ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan);
	ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan);
	} else {
	ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct);
	ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
	}
	}

	av_cold int ff_mpv_init_duplicate_contexts(MpegEncContext *s)
	{
	const int nb_slices = s->slice_context_count;
	const size_t slice_size = s->slice_ctx_size;

	for (int i = 1; i < nb_slices; i++) {
	s->thread_context[i] = av_memdup(s, slice_size);
	if (!s->thread_context[i])
	return AVERROR(ENOMEM);
	s->thread_context[i]->start_mb_y =
	(s->mb_height * (i ) + nb_slices / 2) / nb_slices;
	s->thread_context[i]->end_mb_y =
	(s->mb_height * (i + 1) + nb_slices / 2) / nb_slices;
	}
	s->start_mb_y = 0;
	s->end_mb_y = nb_slices > 1 ? (s->mb_height + nb_slices / 2) / nb_slices
	: s->mb_height;
	return 0;
	}

	static av_cold void free_duplicate_context(MpegEncContext *s)
	{
	if (!s)
	return;

	av_freep(&s->sc.edge_emu_buffer);
	av_freep(&s->sc.scratchpad_buf);
	s->sc.obmc_scratchpad = NULL;
	s->sc.linesize = 0;
	}

	static av_cold void free_duplicate_contexts(MpegEncContext *s)
	{
	for (int i = 1; i < s->slice_context_count; i++) {
	free_duplicate_context(s->thread_context[i]);
	av_freep(&s->thread_context[i]);
	}
	free_duplicate_context(s);
	}

	int ff_update_duplicate_context(MpegEncContext dst, const MpegEncContext src)
	{
	#define COPY(M) \
	M(ScratchpadContext, sc) \
	M(int, start_mb_y) \
	M(int, end_mb_y) \
	M(int16_t*, dc_val) \
	M(void*, ac_val)

	int ret;
	// FIXME copy only needed parts
	#define BACKUP(T, member) T member = dst->member;
	COPY(BACKUP)
	memcpy(dst, src, sizeof(MpegEncContext));
	#define RESTORE(T, member) dst->member = member;
	COPY(RESTORE)

	ret = ff_mpv_framesize_alloc(dst->avctx, &dst->sc, dst->linesize);
	if (ret < 0) {
	av_log(dst->avctx, AV_LOG_ERROR, "failed to allocate context "
	"scratch buffers.\n");
	return ret;
	}
	return 0;
	}

	/**
	* Set the given MpegEncContext to common defaults
	* (same for encoding and decoding).
	* The changed fields will not depend upon the
	* prior state of the MpegEncContext.
	*/
	av_cold void ff_mpv_common_defaults(MpegEncContext *s)
	{
	s->chroma_qscale_table = ff_default_chroma_qscale_table;
	s->progressive_frame = 1;
	s->progressive_sequence = 1;
	s->picture_structure = PICT_FRAME;

	s->slice_context_count = 1;
	}

	static av_cold void free_buffer_pools(BufferPoolContext *pools)
	{
	av_refstruct_pool_uninit(&pools->mbskip_table_pool);
	av_refstruct_pool_uninit(&pools->qscale_table_pool);
	av_refstruct_pool_uninit(&pools->mb_type_pool);
	av_refstruct_pool_uninit(&pools->motion_val_pool);
	av_refstruct_pool_uninit(&pools->ref_index_pool);
	pools->alloc_mb_height = pools->alloc_mb_width = pools->alloc_mb_stride = 0;
	}

	av_cold int ff_mpv_init_context_frame(MpegEncContext *s)
	{
	int nb_slices = (HAVE_THREADS &&
	s->avctx->active_thread_type & FF_THREAD_SLICE) ?
	s->avctx->thread_count : 1;
	BufferPoolContext *const pools = &s->buffer_pools;
	int y_size, c_size, yc_size, mb_array_size, mv_table_size, x, y;
	int mb_height;

	if (s->encoding && s->avctx->slices)
	nb_slices = s->avctx->slices;

	if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO && !s->progressive_sequence)
	s->mb_height = (s->height + 31) / 32 * 2;
	else
	s->mb_height = (s->height + 15) / 16;

	if (nb_slices > MAX_THREADS \|\| (nb_slices > s->mb_height && s->mb_height)) {
	int max_slices;
	if (s->mb_height)
	max_slices = FFMIN(MAX_THREADS, s->mb_height);
	else
	max_slices = MAX_THREADS;
	av_log(s->avctx, AV_LOG_WARNING, "too many threads/slices (%d),"
	" reducing to %d\n", nb_slices, max_slices);
	nb_slices = max_slices;
	}

	s->slice_context_count = nb_slices;

	/* VC-1 can change from being progressive to interlaced on a per-frame
	* basis. We therefore allocate certain buffers so big that they work
	* in both instances. */
	mb_height = s->msmpeg4_version == MSMP4_VC1 ?
	FFALIGN(s->mb_height, 2) : s->mb_height;

	s->mb_width = (s->width + 15) / 16;
	s->mb_stride = s->mb_width + 1;
	s->b8_stride = s->mb_width * 2 + 1;
	mb_array_size = mb_height * s->mb_stride;
	mv_table_size = (mb_height + 2) * s->mb_stride + 1;

	/* set default edge pos, will be overridden
	* in decode_header if needed */
	s->h_edge_pos = s->mb_width * 16;
	s->v_edge_pos = s->mb_height * 16;

	s->mb_num = s->mb_width * s->mb_height;

	s->block_wrap[0] =
	s->block_wrap[1] =
	s->block_wrap[2] =
	s->block_wrap[3] = s->b8_stride;
	s->block_wrap[4] =
	s->block_wrap[5] = s->mb_stride;

	y_size = s->b8_stride * (2 * mb_height + 1);
	c_size = s->mb_stride * (mb_height + 1);
	yc_size = y_size + 2 * c_size;

	if (!FF_ALLOCZ_TYPED_ARRAY(s->mb_index2xy, s->mb_num + 1))
	return AVERROR(ENOMEM);
	for (y = 0; y < s->mb_height; y++)
	for (x = 0; x < s->mb_width; x++)
	s->mb_index2xy[x + y * s->mb_width] = x + y * s->mb_stride;

	s->mb_index2xy[s->mb_height * s->mb_width] = (s->mb_height - 1) * s->mb_stride + s->mb_width; // FIXME really needed?

	#define ALLOC_POOL(name, size, flags) do { \
	pools->name ##_pool = av_refstruct_pool_alloc((size), (flags)); \
	if (!pools->name ##_pool) \
	return AVERROR(ENOMEM); \
	} while (0)

	if (s->codec_id == AV_CODEC_ID_MPEG4 \|\|
	(s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME)) {
	/* interlaced direct mode decoding tables */
	int16_t (tmp)[2] = av_calloc(mv_table_size, 4 sizeof(*tmp));
	if (!tmp)
	return AVERROR(ENOMEM);
	s->p_field_mv_table_base = tmp;
	tmp += s->mb_stride + 1;
	for (int i = 0; i < 2; i++) {
	for (int j = 0; j < 2; j++) {
	s->p_field_mv_table[i][j] = tmp;
	tmp += mv_table_size;
	}
	}
	if (s->codec_id == AV_CODEC_ID_MPEG4) {
	ALLOC_POOL(mbskip_table, mb_array_size + 2,
	!s->encoding ? AV_REFSTRUCT_POOL_FLAG_ZERO_EVERY_TIME : 0);
	if (!s->encoding) {
	/* cbp, pred_dir */
	if (!(s->cbp_table = av_mallocz(mb_array_size)) \|\|
	!(s->pred_dir_table = av_mallocz(mb_array_size)))
	return AVERROR(ENOMEM);
	}
	}
	}

	if (s->msmpeg4_version >= MSMP4_V3) {
	s->coded_block_base = av_mallocz(y_size);
	if (!s->coded_block_base)
	return AVERROR(ENOMEM);
	s->coded_block = s->coded_block_base + s->b8_stride + 1;
	}

	if (s->h263_pred \|\| s->h263_aic \|\| !s->encoding) {
	// When encoding, each slice (and therefore each thread)
	// gets its own ac_val and dc_val buffers in order to avoid
	// races.
	size_t allslice_yc_size = yc_size * (s->encoding ? nb_slices : 1);
	if (s->out_format == FMT_H263) {
	/* ac values */
	if (!FF_ALLOCZ_TYPED_ARRAY(s->ac_val_base, allslice_yc_size))
	return AVERROR(ENOMEM);
	s->ac_val = s->ac_val_base + s->b8_stride + 1;
	}

	/* dc values */
	// MN: we need these for error resilience of intra-frames
	// Allocating them unconditionally for decoders also means
	// that we don't need to reinitialize when e.g. h263_aic changes.

	// y_size and therefore yc_size is always odd; allocate one element
	// more for each encoder slice in order to be able to align each slice's
	// dc_val to four in order to use aligned stores when cleaning dc_val.
	allslice_yc_size += s->encoding * nb_slices;
	if (!FF_ALLOC_TYPED_ARRAY(s->dc_val_base, allslice_yc_size))
	return AVERROR(ENOMEM);
	s->dc_val = s->dc_val_base + s->b8_stride + 1;
	for (size_t i = 0; i < allslice_yc_size; ++i)
	s->dc_val_base[i] = 1024;
	}

	// Note the + 1 is for a quicker MPEG-4 slice_end detection
	if (!(s->mbskip_table = av_mallocz(mb_array_size + 2)) \|\|
	/* which mb is an intra block, init macroblock skip table */
	!(s->mbintra_table = av_mallocz(mb_array_size)))
	return AVERROR(ENOMEM);

	ALLOC_POOL(qscale_table, mv_table_size, 0);
	ALLOC_POOL(mb_type, mv_table_size * sizeof(uint32_t), 0);

	if (s->out_format == FMT_H263 \|\| s->encoding \|\|
	(s->avctx->export_side_data & AV_CODEC_EXPORT_DATA_MVS)) {
	const int b8_array_size = s->b8_stride * mb_height * 2;
	int mv_size = 2 * (b8_array_size + 4) * sizeof(int16_t);
	int ref_index_size = 4 * mb_array_size;

	/* FIXME: The output of H.263 with OBMC depends upon
	* the earlier content of the buffer; therefore we set
	* the flags to always reset returned buffers here. */
	ALLOC_POOL(motion_val, mv_size, AV_REFSTRUCT_POOL_FLAG_ZERO_EVERY_TIME);
	ALLOC_POOL(ref_index, ref_index_size, 0);
	}
	#undef ALLOC_POOL
	pools->alloc_mb_width = s->mb_width;
	pools->alloc_mb_height = mb_height;
	pools->alloc_mb_stride = s->mb_stride;

	return !CONFIG_MPEGVIDEODEC \|\| s->encoding ? 0 : ff_mpeg_er_init(s);
	}

	/**
	* init common structure for both encoder and decoder.
	* this assumes that some variables like width/height are already set
	*/
	av_cold int ff_mpv_common_init(MpegEncContext *s)
	{
	int ret;

	if (s->avctx->pix_fmt == AV_PIX_FMT_NONE) {
	av_log(s->avctx, AV_LOG_ERROR,
	"decoding to AV_PIX_FMT_NONE is not supported.\n");
	return AVERROR(EINVAL);
	}

	if ((s->width \|\| s->height) &&
	av_image_check_size(s->width, s->height, 0, s->avctx))
	return AVERROR(EINVAL);

	dsp_init(s);

	/* set chroma shifts */
	ret = av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
	&s->chroma_x_shift,
	&s->chroma_y_shift);
	if (ret)
	return ret;

	if ((ret = ff_mpv_init_context_frame(s)))
	goto fail;

	s->context_initialized = 1;
	s->thread_context[0] = s;

	// if (s->width && s->height) {
	if (!s->encoding) {
	ret = ff_mpv_init_duplicate_contexts(s);
	if (ret < 0)
	goto fail;
	}
	// }

	return 0;
	fail:
	ff_mpv_common_end(s);
	return ret;
	}

	av_cold void ff_mpv_free_context_frame(MpegEncContext *s)
	{
	free_duplicate_contexts(s);

	free_buffer_pools(&s->buffer_pools);
	av_freep(&s->p_field_mv_table_base);
	for (int i = 0; i < 2; i++)
	for (int j = 0; j < 2; j++)
	s->p_field_mv_table[i][j] = NULL;

	av_freep(&s->ac_val_base);
	av_freep(&s->dc_val_base);
	av_freep(&s->coded_block_base);
	av_freep(&s->mbintra_table);
	av_freep(&s->cbp_table);
	av_freep(&s->pred_dir_table);

	av_freep(&s->mbskip_table);

	av_freep(&s->er.error_status_table);
	av_freep(&s->er.er_temp_buffer);
	av_freep(&s->mb_index2xy);

	s->linesize = s->uvlinesize = 0;
	}

	av_cold void ff_mpv_common_end(MpegEncContext *s)
	{
	ff_mpv_free_context_frame(s);
	if (s->slice_context_count > 1)
	s->slice_context_count = 1;

	ff_mpv_unref_picture(&s->last_pic);
	ff_mpv_unref_picture(&s->cur_pic);
	ff_mpv_unref_picture(&s->next_pic);

	s->context_initialized = 0;
	s->context_reinit = 0;
	s->linesize = s->uvlinesize = 0;
	}


	/**
	* Clean dc, ac for the current non-intra MB.
	*/
	void ff_clean_intra_table_entries(MpegEncContext *s)
	{
	int wrap = s->b8_stride;
	int xy = s->block_index[0];
	/* chroma */
	unsigned uxy = s->block_index[4];
	unsigned vxy = s->block_index[5];
	int16_t *dc_val = s->dc_val;

	AV_WN32A(dc_val + xy, 1024 << 16 \| 1024);
	AV_WN32 (dc_val + xy + wrap, 1024 << 16 \| 1024);
	dc_val[uxy] =
	dc_val[vxy] = 1024;
	/* ac pred */
	int16_t (*ac_val)[16] = s->ac_val;
	av_assume(!((uintptr_t)ac_val & 0xF));
	// Don't reset the upper-left luma block, as it will only ever be
	// referenced by blocks from the same macroblock.
	memset(ac_val[xy + 1], 0, sizeof(*ac_val));
	memset(ac_val[xy + wrap], 0, 2 * sizeof(*ac_val));
	/* ac pred */
	memset(ac_val[uxy], 0, sizeof(*ac_val));
	memset(ac_val[vxy], 0, sizeof(*ac_val));
	}

	void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
	const int linesize = s->cur_pic.linesize[0]; //not s->linesize as this would be wrong for field pics
	const int uvlinesize = s->cur_pic.linesize[1];
	const int width_of_mb = (4 + (s->avctx->bits_per_raw_sample > 8)) - s->avctx->lowres;
	const int height_of_mb = 4 - s->avctx->lowres;

	s->block_index[0]= s->b8_stride(s->mb_y2 ) - 2 + s->mb_x*2;
	s->block_index[1]= s->b8_stride(s->mb_y2 ) - 1 + s->mb_x*2;
	s->block_index[2]= s->b8_stride(s->mb_y2 + 1) - 2 + s->mb_x*2;
	s->block_index[3]= s->b8_stride(s->mb_y2 + 1) - 1 + s->mb_x*2;
	s->block_index[4]= s->mb_stride(s->mb_y + 1) + s->b8_strides->mb_height*2 + s->mb_x - 1;
	s->block_index[5]= s->mb_stride(s->mb_y + s->mb_height + 2) + s->b8_strides->mb_height*2 + s->mb_x - 1;
	//block_index is not used by mpeg2, so it is not affected by chroma_format

	s->dest[0] = s->cur_pic.data[0] + (int)((s->mb_x - 1U) << width_of_mb);
	s->dest[1] = s->cur_pic.data[1] + (int)((s->mb_x - 1U) << (width_of_mb - s->chroma_x_shift));
	s->dest[2] = s->cur_pic.data[2] + (int)((s->mb_x - 1U) << (width_of_mb - s->chroma_x_shift));

	if (s->picture_structure == PICT_FRAME) {
	s->dest[0] += s->mb_y * linesize << height_of_mb;
	s->dest[1] += s->mb_y * uvlinesize << (height_of_mb - s->chroma_y_shift);
	s->dest[2] += s->mb_y * uvlinesize << (height_of_mb - s->chroma_y_shift);
	} else {
	s->dest[0] += (s->mb_y>>1) * linesize << height_of_mb;
	s->dest[1] += (s->mb_y>>1) * uvlinesize << (height_of_mb - s->chroma_y_shift);
	s->dest[2] += (s->mb_y>>1) * uvlinesize << (height_of_mb - s->chroma_y_shift);
	av_assert1((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
	}
	}

	/**
	* set qscale and update qscale dependent variables.
	*/
	void ff_set_qscale(MpegEncContext * s, int qscale)
	{
	if (qscale < 1)
	qscale = 1;
	else if (qscale > 31)
	qscale = 31;

	s->qscale = qscale;
	s->chroma_qscale= s->chroma_qscale_table[qscale];

	s->y_dc_scale= s->y_dc_scale_table[ qscale ];
	s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
	}